summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r300/r500_fragprog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/r500_fragprog.c')
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog.c207
1 files changed, 139 insertions, 68 deletions
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index 5d72ec2784f..7b18efa69d6 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -27,11 +27,9 @@
#include "r500_fragprog.h"
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
+#include "radeon_nqssadce.h"
+#include "radeon_program_alu.h"
+
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
@@ -41,7 +39,7 @@ static void reset_srcreg(struct prog_src_register* reg)
*
*/
static GLboolean transform_TEX(
- struct radeon_program_transform_context* context,
+ struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
{
struct r500_fragment_program_compiler *compiler =
@@ -58,12 +56,11 @@ static GLboolean transform_TEX(
/* ARB_shadow & EXT_shadow_funcs */
if (inst.Opcode != OPCODE_KIL &&
- compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
- context->dest->NumInstructions, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = inst.DstReg.File;
@@ -75,70 +72,72 @@ static GLboolean transform_TEX(
}
inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonCompilerAllocateTemporary(context->compiler);
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
}
- tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
- context->dest->NumInstructions, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
if (inst.Opcode != OPCODE_KIL &&
- compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
-
- tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
- context->dest->NumInstructions, 2);
-
- tgt[0].Opcode = OPCODE_MAD;
- tgt[0].DstReg = inst.DstReg;
- tgt[0].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[0].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[0].SrcReg[0].Index = inst.DstReg.Index;
+ int rcptemp = radeonFindFreeTemporary(t);
+
+ tgt = radeonAppendInstructions(t->Program, 3);
+
+ tgt[0].Opcode = OPCODE_RCP;
+ tgt[0].DstReg.File = PROGRAM_TEMPORARY;
+ tgt[0].DstReg.Index = rcptemp;
+ tgt[0].DstReg.WriteMask = WRITEMASK_W;
+ tgt[0].SrcReg[0] = inst.SrcReg[0];
+ tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ tgt[1].Opcode = OPCODE_MAD;
+ tgt[1].DstReg = inst.DstReg;
+ tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
+ tgt[1].SrcReg[0] = inst.SrcReg[0];
+ tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[1].Index = rcptemp;
+ tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[2].Index = inst.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
- tgt[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
- tgt[0].SrcReg[1].File = PROGRAM_BUILTIN;
- tgt[0].SrcReg[1].Swizzle = SWIZZLE_1111;
- tgt[0].SrcReg[2] = inst.SrcReg[0];
- tgt[0].SrcReg[2].Swizzle = SWIZZLE_ZZZZ;
+ tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[0].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
+ tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
else
- tgt[0].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
+ tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
- tgt[1].Opcode = OPCODE_CMP;
- tgt[1].DstReg = orig_inst->DstReg;
- tgt[1].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[0].Index = tgt[0].DstReg.Index;
- tgt[1].SrcReg[1].File = PROGRAM_BUILTIN;
- tgt[1].SrcReg[2].File = PROGRAM_BUILTIN;
+ tgt[2].Opcode = OPCODE_CMP;
+ tgt[2].DstReg = orig_inst->DstReg;
+ tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
+ tgt[2].SrcReg[1].File = PROGRAM_BUILTIN;
+ tgt[2].SrcReg[2].File = PROGRAM_BUILTIN;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_1111;
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_0000;
+ tgt[2].SrcReg[1].Swizzle = SWIZZLE_1111;
+ tgt[2].SrcReg[2].Swizzle = SWIZZLE_0000;
} else {
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_0000;
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_1111;
+ tgt[2].SrcReg[1].Swizzle = SWIZZLE_0000;
+ tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111;
}
} else if (destredirect) {
- tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
- context->dest->NumInstructions, 1);
+ tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MAD;
+ tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
tgt->SrcReg[0].Index = inst.DstReg.Index;
- tgt->SrcReg[1].File = PROGRAM_BUILTIN;
- tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
- tgt->SrcReg[2].File = PROGRAM_BUILTIN;
- tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
}
return GL_TRUE;
@@ -179,9 +178,10 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
- GLuint tempregi = radeonCompilerAllocateTemporary(&compiler->compiler);
+ GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
- fpi = radeonClauseInsertInstructions(&compiler->compiler, &compiler->compiler.Clauses[0], 0, 3);
+ _mesa_insert_instructions(compiler->program, 0, 3);
+ fpi = compiler->program->Instructions;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
@@ -213,7 +213,7 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
i++;
/* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens);
+ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
fpi[i].Opcode = OPCODE_MAD;
@@ -238,7 +238,7 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
i++;
- for (; i < compiler->compiler.Clauses[0].NumInstructions; ++i) {
+ for (; i < compiler->program->NumInstructions; ++i) {
int reg;
for (reg = 0; reg < 3; reg++) {
if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
@@ -251,6 +251,57 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
}
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+ s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
+
+static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
+{
+ GLuint relevant;
+ int i;
+
+ if (reg.Abs)
+ return GL_TRUE;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+/**
+ * Implement a non-native swizzle. This function assumes that
+ * is_native_swizzle returned true.
+ */
+static void nqssadce_build_swizzle(struct nqssadce_state *s,
+ struct prog_dst_register dst, struct prog_src_register src)
+{
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(s->Program, s->IP, 2);
+ inst = s->Program->Instructions + s->IP;
+
+ inst[0].Opcode = OPCODE_MOV;
+ inst[0].DstReg = dst;
+ inst[0].DstReg.WriteMask &= src.NegateBase;
+ inst[0].SrcReg[0] = src;
+
+ inst[1].Opcode = OPCODE_MOV;
+ inst[1].DstReg = dst;
+ inst[1].DstReg.WriteMask &= ~src.NegateBase;
+ inst[1].SrcReg[0] = src;
+
+ s->IP += 2;
+}
+
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
@@ -310,36 +361,57 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
compiler.r300 = r300;
compiler.fp = fp;
compiler.code = &fp->code;
+ compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
- radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base);
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: Initial program:\n");
+ _mesa_print_program(compiler.program);
+ }
insert_WPOS_trailer(&compiler);
- struct radeon_program_transformation transformations[1] = {
- { &transform_TEX, &compiler }
+ struct radeon_program_transformation transformations[3] = {
+ { &transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(r300->radeon.glCtx, compiler.program,
+ 3, transformations);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ }
+
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &is_native_swizzle,
+ .BuildSwizzle = &nqssadce_build_swizzle,
+ .RewriteDepthOut = GL_TRUE
};
- radeonClauseLocalTransform(&compiler.compiler,
- &compiler.compiler.Clauses[0],
- 1, transformations);
+ radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler state after transformations:\n");
- radeonCompilerDump(&compiler.compiler);
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ _mesa_print_program(compiler.program);
}
fp->translated = r500FragmentProgramEmit(&compiler);
- radeonCompilerCleanup(&compiler.compiler);
+ /* Subtle: Rescue any parameters that have been added during transformations */
+ _mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
+ fp->mesa_program.Base.Parameters = compiler.program->Parameters;
+ compiler.program->Parameters = 0;
+
+ _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
if (RADEON_DEBUG & DEBUG_PIXEL) {
- fprintf(stderr, "Mesa program:\n");
- fprintf(stderr, "-------------\n");
- _mesa_print_program(&fp->mesa_program.Base);
- fflush(stdout);
- if (fp->translated)
+ if (fp->translated) {
+ _mesa_printf("Machine-readable code:\n");
dump_program(&fp->code);
+ }
}
}
@@ -458,9 +530,8 @@ static void dump_program(struct r500_fragment_program_code *code)
if (code->const_nr) {
fprintf(stderr, "--------\nConstants:\n");
for (n = 0; n < code->const_nr; n++) {
- fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n,
- code->constant[n][0], code->constant[n][1], code->constant[n][2],
- code->constant[n][3]);
+ fprintf(stderr, "Constant %d: %i[%i]\n", n,
+ code->constant[n].File, code->constant[n].Index);
}
fprintf(stderr, "--------\n");
}