diff options
author | Marek Olšák <[email protected]> | 2012-08-27 07:38:15 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2012-08-27 14:35:18 +0200 |
commit | 7f0fcf17c342dcb788c2182b20973c48806ee498 (patch) | |
tree | a41fbe9647bca0a778b1e2a7e8bdb9f1bb4b6ad7 | |
parent | f402acdbe244e5de9b2b616e0a908f5d1416ce89 (diff) |
r300g: implement TRUNC correctly
This fixes some integer division tests.
4 files changed, 42 insertions, 1 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c index 9bcb3c990ad..916baa23608 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -343,6 +343,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_TRUNC, + .Name = "TRUNC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_XPD, .Name = "XPD", .NumSrcRegs = 2, diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h index 9c4b456168a..0a70901a82f 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -175,6 +175,9 @@ typedef enum { /** vec4 instruction: dst.c = src0.c */ RC_OPCODE_SWZ, + /** vec4 instruction: dst.c = (abs(src0.c) - fract(abs(src0.c))) * sgn(src0.c) */ + RC_OPCODE_TRUNC, + /** special instruction, see ARB_fragment_program */ RC_OPCODE_XPD, diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index b3da311498b..f4ee86de5d0 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -321,6 +321,24 @@ static void transform_FLR(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_TRUNC(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Definition of trunc: + * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x) + * + * The multiplication by sgn(x) can be simplified using CMP: + * y * sgn(x) = (x < 0 ? -y : y) + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]), + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], + negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + /** * Definition of LIT (from ARB_fragment_program): * @@ -666,6 +684,7 @@ int radeonTransformALU( case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: return 0; @@ -866,6 +885,17 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_vertex_TRUNC(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next = inst->Next; + + /* next->Prev is removed after each transformation and replaced + * by a new instruction. */ + transform_TRUNC(c, next->Prev); + transform_r300_vertex_CMP(c, next->Prev); +} + /** * For use with rc_local_transform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -904,6 +934,7 @@ int r300_transform_vertex_alu( case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: return 0; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 4cb08b5836b..a0587b42174 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -114,7 +114,7 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; /* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */ /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ - case TGSI_OPCODE_TRUNC: return RC_OPCODE_FLR; + case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ |