aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300/compiler/radeon_program_alu.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2012-08-27 07:38:15 +0200
committerMarek Olšák <[email protected]>2012-08-27 14:35:18 +0200
commit7f0fcf17c342dcb788c2182b20973c48806ee498 (patch)
treea41fbe9647bca0a778b1e2a7e8bdb9f1bb4b6ad7 /src/gallium/drivers/r300/compiler/radeon_program_alu.c
parentf402acdbe244e5de9b2b616e0a908f5d1416ce89 (diff)
r300g: implement TRUNC correctly
This fixes some integer division tests.
Diffstat (limited to 'src/gallium/drivers/r300/compiler/radeon_program_alu.c')
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_program_alu.c31
1 files changed, 31 insertions, 0 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index b3da311498b..f4ee86de5d0 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -321,6 +321,24 @@ static void transform_FLR(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_TRUNC(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Definition of trunc:
+ * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x)
+ *
+ * The multiplication by sgn(x) can be simplified using CMP:
+ * y * sgn(x) = (x < 0 ? -y : y)
+ */
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]),
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0],
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index));
+ rc_remove_instruction(inst);
+}
+
/**
* Definition of LIT (from ARB_fragment_program):
*
@@ -666,6 +684,7 @@ int radeonTransformALU(
case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
@@ -866,6 +885,17 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_vertex_TRUNC(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_instruction *next = inst->Next;
+
+ /* next->Prev is removed after each transformation and replaced
+ * by a new instruction. */
+ transform_TRUNC(c, next->Prev);
+ transform_r300_vertex_CMP(c, next->Prev);
+}
+
/**
* For use with rc_local_transform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -904,6 +934,7 @@ int r300_transform_vertex_alu(
case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;