From ebd05a798e34f99bfa35c18803de47662e9e4840 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 18 Apr 2010 20:49:50 +0200 Subject: r300/compiler: optimize CMP for vertex shaders a bit --- .../drivers/dri/r300/compiler/radeon_program_alu.c | 29 +++++++--------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index f5b7d57eab7..fced31d6cb9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -511,37 +511,26 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and three extra slots, + * The following sequence consumes two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * * SLT tmp0, src0, 0.0 - * SGE tmp1, src0, 0.0 - * MUL tmp0, tmp0, src1 - * MAD dst, src2, tmp1, tmp0 + * LRP dst, tmp0, src1, src2 * - * Yes, I know, I'm a mad scientist. ~ C. */ + * Yes, I know, I'm a mad scientist. ~ C. & M. */ int tempreg0 = rc_find_free_temporary(c); - int tempreg1 = rc_find_free_temporary(c); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dstreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[0], builtin_zero); - /* SGE tmp1, src0, 0.0 */ - emit2(c, inst->Prev, RC_OPCODE_SGE, 0, - dstreg(RC_FILE_TEMPORARY, tempreg1), - inst->U.I.SrcReg[0], builtin_zero); - - /* MUL tmp0, tmp0, src1 */ - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]); - - /* MAD dst, src2, tmp1, tmp0 */ - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, - inst->U.I.DstReg, - inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0)); + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } -- cgit v1.2.3