summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorNicolai Haehnle <[email protected]>2008-07-06 16:58:51 +0200
committerNicolai Haehnle <[email protected]>2008-07-12 09:36:02 +0200
commit7904c9fad4c2cb2a4153258a9e86e530a0330a78 (patch)
tree67494991c06e8c2e4e3ac76c03bb7701d49775eb /src
parentb0ef353b4696672ecaea4b370b612bbb482880ca (diff)
r500_fragprog: Transform trigonometric functions in first pass
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog.c7
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog_emit.c91
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.c52
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_alu.h5
4 files changed, 65 insertions, 90 deletions
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index 1cdb065354b..9bb92d3ba41 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -318,12 +318,13 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
insert_WPOS_trailer(&compiler);
- struct radeon_program_transformation transformations[2] = {
+ struct radeon_program_transformation transformations[3] = {
{ &transform_TEX, &compiler },
- { &radeonTransformALU, 0 }
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(r300->radeon.glCtx, compiler.program,
- 2, transformations);
+ 3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after all transformations:\n");
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
index 8c900941c4d..4f658039536 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
@@ -156,17 +156,6 @@ struct r500_pfs_compile_state {
#define R500_WRITEMASK_AB 0xC
#define R500_WRITEMASK_ARGB 0xF
-/* 1/(2pi), needed for quick modulus in trig insts
- * Thanks to glisse for pointing out how to do it! */
-static const GLfloat RCP_2PI[] = {0.15915494309189535,
- 0.15915494309189535,
- 0.15915494309189535,
- 0.15915494309189535};
-
-static const GLfloat LIT[] = {127.999999,
- 127.999999,
- 127.999999,
- -127.999999};
static const struct prog_dst_register dstreg_template = {
.File = PROGRAM_TEMPORARY,
@@ -476,12 +465,6 @@ static int emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alph
return _helper_emit_alu(cs, rgbop, alphaop, dst.File, dst.Index, dst.WriteMask);
}
-static int emit_alu_temp(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, int dst, int writemask)
-{
- return _helper_emit_alu(cs, rgbop, alphaop,
- PROGRAM_TEMPORARY, dst - cs->compiler->code->temp_reg_offset, writemask);
-}
-
/**
* Set an instruction's source 0 (both RGB and ALPHA) to the given hardware index.
*/
@@ -612,56 +595,6 @@ static int emit_sop(struct r500_pfs_compile_state *cs,
}
-/**
- * Emit trigonometric function COS, SIN, SCS
- */
-static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi)
-{
- int ip;
- struct prog_dst_register temp = dstreg_template;
- temp.Index = get_temp(cs, 0);
- temp.WriteMask = WRITEMASK_W;
-
- struct prog_src_register srcreg;
- GLuint constant_swizzle;
-
- srcreg.File = PROGRAM_CONSTANT;
- srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
- RCP_2PI, 4, &constant_swizzle);
- srcreg.Swizzle = constant_swizzle;
-
- /* temp = Input*(1/2pi) */
- ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp);
- set_src0(cs, ip, fpi->SrcReg[0]);
- set_src1(cs, ip, srcreg);
- set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0]));
- set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, make_alpha_swizzle(srcreg));
- set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
-
- /* temp = frac(dst) */
- ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, temp);
- set_src0_direct(cs, ip, temp.Index);
- set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W);
-
- /* Dest = trig(temp) */
- if (fpi->Opcode == OPCODE_COS) {
- emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W);
- } else if (fpi->Opcode == OPCODE_SIN) {
- emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W);
- } else if (fpi->Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = fpi->DstReg;
-
- if (fpi->DstReg.WriteMask & WRITEMASK_X) {
- moddst.WriteMask = WRITEMASK_X;
- emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W);
- }
- if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
- moddst.WriteMask = WRITEMASK_Y;
- emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W);
- }
- }
-}
-
static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) {
PROG_CODE;
GLuint src[3], dest = 0;
@@ -693,7 +626,8 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
set_argC_reg(cs, ip, 0, fpi->SrcReg[0]);
break;
case OPCODE_COS:
- emit_trig(cs, fpi);
+ src[0] = make_src(cs, fpi->SrcReg[0]);
+ emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
case OPCODE_DP3:
ip = emit_alu(cs, R500_ALU_RGBA_OP_DP3, R500_ALPHA_OP_DP, fpi->DstReg);
@@ -713,21 +647,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
src[0] = make_src(cs, fpi->SrcReg[0]);
emit_sop(cs, R500_ALPHA_OP_EX2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
- case OPCODE_FLR:
- dest = get_temp(cs, 0);
- ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, dest, WRITEMASK_XYZW);
- set_src0(cs, ip, fpi->SrcReg[0]);
- set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
-
- ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
- set_src0(cs, ip, fpi->SrcReg[0]);
- set_src1_direct(cs, ip, dest);
- set_argA_reg(cs, ip, 0, fpi->SrcReg[1]);
- set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE);
- set_argC(cs, ip, 1,
- R500_SWIZ_RGB_RGB|(R500_SWIZ_MOD_NEG<<9),
- SWIZZLE_W|(R500_SWIZ_MOD_NEG<<3));
- break;
case OPCODE_FRC:
ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, fpi->DstReg);
set_src0(cs, ip, fpi->SrcReg[0]);
@@ -787,11 +706,9 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
emit_sop(cs, R500_ALPHA_OP_RSQ, fpi->DstReg, src[0],
(make_sop_swizzle(fpi->SrcReg[0]) | (R500_SWIZ_MOD_ABS<<3)) & ~(R500_SWIZ_MOD_NEG<<3));
break;
- case OPCODE_SCS:
- emit_trig(cs, fpi);
- break;
case OPCODE_SIN:
- emit_trig(cs, fpi);
+ src[0] = make_src(cs, fpi->SrcReg[0]);
+ emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
break;
case OPCODE_KIL:
case OPCODE_TEX:
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c
index fa6a67f0c19..8daa94c7268 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c
@@ -556,3 +556,55 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
return GL_TRUE;
}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_COS &&
+ inst->Opcode != OPCODE_SIN &&
+ inst->Opcode != OPCODE_SCS)
+ return GL_FALSE;
+
+ static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+ GLuint temp;
+ GLuint constant;
+ GLuint constant_swizzle;
+
+ temp = radeonFindFreeTemporary(t);
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+
+ emit2(t->Program, OPCODE_MUL, dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
+ emit1(t->Program, OPCODE_FRC, dstregtmpmask(temp, WRITEMASK_W),
+ srcreg(PROGRAM_TEMPORARY, temp));
+
+ if (inst->Opcode == OPCODE_COS) {
+ emit1(t->Program, OPCODE_COS, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->Opcode == OPCODE_SIN) {
+ emit1(t->Program, OPCODE_SIN, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = inst->DstReg;
+
+ if (inst->DstReg.WriteMask & WRITEMASK_X) {
+ moddst.WriteMask = WRITEMASK_X;
+ emit1(t->Program, OPCODE_COS, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ }
+ if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+ moddst.WriteMask = WRITEMASK_Y;
+ emit1(t->Program, OPCODE_SIN, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ }
+ }
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h
index 3fe6153fd82..ea9d5bb669c 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h
@@ -40,4 +40,9 @@ GLboolean radeonTransformTrigSimple(
struct prog_instruction*,
void*);
+GLboolean radeonTransformTrigScale(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
#endif /* __RADEON_PROGRAM_ALU_H_ */