summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2013-08-13 18:59:35 +0200
committerRoland Scheidegger <[email protected]>2013-08-15 00:40:14 +0200
commit7727fbb7c5d64348994bce6682e681d6181a91e9 (patch)
tree8b1d3b15c899e0f54f0ae9388f17507f3e786e17
parent72874d2352ed3c2cf7baf7fb455f112c24ce77b3 (diff)
r600/radeonsi: implement new float comparison instructions
Also use ordered comparisons for old cmp instructions. Tested-by: Michel Dänzer <[email protected]> Reviewed-by: Tom Stellard <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600_shader.c18
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c49
2 files changed, 48 insertions, 19 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 37298ccdcf2..fb766c491cd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5743,11 +5743,10 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
@@ -5936,11 +5935,10 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 7a47746ce1e..8ff9abd5ed9 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -850,18 +850,16 @@ static void emit_cmp(
LLVMRealPredicate pred;
LLVMValueRef cond;
- /* XXX I'm not sure whether to do unordered or ordered comparisons,
- * but llvmpipe uses unordered comparisons, so for consistency we use
- * unordered. (The authors of llvmpipe aren't sure about using
- * unordered vs ordered comparisons either.
+ /* Use ordered for everything but NE (which is usual for
+ * float comparisons)
*/
switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break;
- case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break;
- case TGSI_OPCODE_SLE: pred = LLVMRealULE; break;
- case TGSI_OPCODE_SLT: pred = LLVMRealULT; break;
+ case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
+ case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
+ case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
+ case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
- case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break;
+ case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
default: assert(!"unknown instruction"); pred = 0; break;
}
@@ -872,6 +870,35 @@ static void emit_cmp(
cond, bld_base->base.one, bld_base->base.zero, "");
}
+static void emit_fcmp(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ LLVMRealPredicate pred;
+
+ /* Use ordered for everything but NE (which is usual for
+ * float comparisons)
+ */
+ switch (emit_data->inst->Instruction.Opcode) {
+ case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
+ case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
+ case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
+ case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
+ default: assert(!"unknown instruction"); pred = 0; break;
+ }
+
+ LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+ emit_data->args[0], emit_data->args[1],"");
+
+ v = LLVMBuildSExtOrBitCast(builder, v,
+ LLVMInt32TypeInContext(context), "");
+
+ emit_data->output[emit_data->chan] = v;
+}
+
static void emit_not(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1236,6 +1263,10 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
+ bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
+ bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
+ bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
+ bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;