gallium: Disambiguate TGSI_OPCODE_IF.

TGSI_OPCODE_IF condition had two possible interpretations: - src.x != 0.0f - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was false either for vertex and fragment shaders - gallivm/llvmpipe - postprocess - vl state tracker - vega state tracker - most old drivers - old internal state trackers - many graw examples - src.x != 0U - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was true for both vertex and fragment shaders - tgsi_exec/softpipe - r600 - radeonsi - nv50 And drivers that use draw module also were a mess (because Mesa would emit float IFs, but draw module supports native integers so it would interpret IF arg as integers...) This sort of works if the source argument is limited to float +0.0f or +1.0f, integer 0, but would fail if source is float -0.0f, or integer in the float NaN range. It could also fail if source is integer 1, and hardware flushes denormalized numbers to zero. But with this change there are now two opcodes, IF and UIF, with clear meaning. Drivers that do not support native integers do not need to worry about UIF. However, for backwards compatibility with old state trackers and examples, it is advisable that native integer capable drivers also support the float IF opcode. I tried to implement this for r600 and radeonsi based on the surrounding code. I couldn't do this for nouveau, so I just shunted IF/UIF together, which matches the current behavior. Reviewed-by: Roland Scheidegger <[email protected]> Reviewed-by: Marek Olšák <[email protected]> v2: - Incorporate Roland's feedback. - Fix r600_shader.c merge conflict. - Fix typo in radeon, spotted by Michel Dänzer. - Incorporte Christoph Bumiller's patch to handle TGSI_OPCODE_IF(float) properly in nv50/ir.
author: José Fonseca <[email protected]> 2013-04-17 10:47:03 +0100
committer: José Fonseca <[email protected]> 2013-04-17 10:54:08 +0100
commit: 50b3fc6204a28881f625605f988cb0866ae6a6a5 (patch)
tree: c198f26065d7e2d21858a33d4014205b90f96011 /src/gallium/drivers
parent: f61b7da80e238892b0832ec12b11589fba946b47 (diff)
5 files changed, 55 insertions, 18 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 68976914240..d8abccd15af 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -242,6 +242,7 @@ unsigned int Instruction::srcMask(unsigned int s) const
    case TGSI_OPCODE_SCS:
       return 0x1;
    case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
       return 0x1;
    case TGSI_OPCODE_LIT:
       return 0xb;
@@ -385,6 +386,7 @@ static nv50_ir::TexTarget translateTexture(uint tex)
 nv50_ir::DataType Instruction::inferSrcType() const
 {
    switch (getOpcode()) {
+   case TGSI_OPCODE_UIF:
    case TGSI_OPCODE_AND:
    case TGSI_OPCODE_OR:
    case TGSI_OPCODE_XOR:
@@ -2430,6 +2432,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
       break;
    case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
    {
       BasicBlock *ifBB = new BasicBlock(func);
 
@@ -2437,7 +2440,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       condBBs.push(bb);
       joinBBs.push(bb);
 
-      mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
+      mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
 
       setPosition(ifBB, true);
    }
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index 20f76f81ded..03086e379c0 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -1011,7 +1011,7 @@ NV50LoweringPreSSA::checkPredicate(Instruction *insn)
       return;
    cdst = bld.getSSA(1, FILE_FLAGS);
 
-   bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, cdst, bld.loadImm(NULL, 0), pred);
+   bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, bld.loadImm(NULL, 0), pred);
 
    insn->setPredicate(insn->cc, cdst);
 }
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index 4d1d37281bc..7676185f03f 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1490,7 +1490,7 @@ NVC0LoweringPass::checkPredicate(Instruction *insn)
    // CAUTION: don't use pdst->getInsn, the definition might not be unique,
    //  delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
 
-   bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
+   bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, bld.mkImm(0), pred);
 
    insn->setPredicate(insn->cc, pdst);
 }
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 7185c8e78f0..5ac8c095183 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5728,7 +5728,7 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
 }
 #endif
 
-static int tgsi_if(struct r600_shader_ctx *ctx)
+static int emit_if(struct r600_shader_ctx *ctx, int opcode)
 {
 	int alu_type = CF_OP_ALU_PUSH_BEFORE;
 
@@ -5742,7 +5742,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
 		alu_type = CF_OP_ALU;
 	}
 
-	emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, alu_type);
+	emit_logic_pred(ctx, opcode, alu_type);
 
 	r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
 
@@ -5752,6 +5752,16 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+	return emit_if(ctx, ALU_OP2_PRED_SETNE);
+}
+
+static int tgsi_uif(struct r600_shader_ctx *ctx)
+{
+	return emit_if(ctx, ALU_OP2_PRED_SETNE_INT);
+}
+
 static int tgsi_else(struct r600_shader_ctx *ctx)
 {
 	r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
@@ -6003,8 +6013,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_TXL,	0, FETCH_OP_SAMPLE_L, tgsi_tex},
 	{TGSI_OPCODE_BRK,	0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
 	{TGSI_OPCODE_IF,	0, ALU_OP0_NOP, tgsi_if},
-	/* gap */
-	{75,			0, ALU_OP0_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_UIF,	0, ALU_OP0_NOP, tgsi_uif},
 	{76,			0, ALU_OP0_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ELSE,	0, ALU_OP0_NOP, tgsi_else},
 	{TGSI_OPCODE_ENDIF,	0, ALU_OP0_NOP, tgsi_endif},
@@ -6197,8 +6206,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_TXL,	0, FETCH_OP_SAMPLE_L, tgsi_tex},
 	{TGSI_OPCODE_BRK,	0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
 	{TGSI_OPCODE_IF,	0, ALU_OP0_NOP, tgsi_if},
-	/* gap */
-	{75,			0, ALU_OP0_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_UIF,	0, ALU_OP0_NOP, tgsi_uif},
 	{76,			0, ALU_OP0_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ELSE,	0, ALU_OP0_NOP, tgsi_else},
 	{TGSI_OPCODE_ENDIF,	0, ALU_OP0_NOP, tgsi_endif},
@@ -6391,8 +6399,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_TXL,	0, FETCH_OP_SAMPLE_L, tgsi_tex},
 	{TGSI_OPCODE_BRK,	0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
 	{TGSI_OPCODE_IF,	0, ALU_OP0_NOP, tgsi_if},
-	/* gap */
-	{75,			0, ALU_OP0_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_UIF,	0, ALU_OP0_NOP, tgsi_uif},
 	{76,			0, ALU_OP0_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ELSE,	0, ALU_OP0_NOP, tgsi_else},
 	{TGSI_OPCODE_ENDIF,	0, ALU_OP0_NOP, tgsi_endif},
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 314c9634fa7..0629b89a8e5 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -515,20 +515,16 @@ static void endloop_emit(
 	ctx->loop_depth--;
 }
 
-static void if_emit(
+static void if_cond_emit(
 	const struct lp_build_tgsi_action * action,
 	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
+	struct lp_build_emit_data * emit_data,
+	LLVMValueRef cond)
 {
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
-	LLVMValueRef cond;
 	LLVMBasicBlockRef if_block, else_block, endif_block;
 
-	cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
-	        bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
-			bld_base->int_bld.zero, "");
-
 	endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
 						ctx->main_fn, "ENDIF");
 	if_block = LLVMInsertBasicBlockInContext(gallivm->context,
@@ -545,6 +541,36 @@ static void if_emit(
 	ctx->branch[ctx->branch_depth - 1].has_else = 0;
 }
 
+static void if_emit(
+	const struct lp_build_tgsi_action * action,
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	struct gallivm_state * gallivm = bld_base->base.gallivm;
+	LLVMValueRef cond;
+
+	cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
+			emit_data->args[0],
+			bld_base->base.zero, "");
+
+	if_cond_emit(action, bld_base, emit_data, cond);
+}
+
+static void uif_emit(
+	const struct lp_build_tgsi_action * action,
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	struct gallivm_state * gallivm = bld_base->base.gallivm;
+	LLVMValueRef cond;
+
+	cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+	        bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
+			bld_base->int_bld.zero, "");
+
+	if_cond_emit(action, bld_base, emit_data, cond);
+}
+
 static void kil_emit(
 	const struct lp_build_tgsi_action * action,
 	struct lp_build_tgsi_context * bld_base,
@@ -1209,6 +1235,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
 	bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
 	bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
+	bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
 	bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
 	bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem;
author	José Fonseca <[email protected]>	2013-04-17 10:47:03 +0100
committer	José Fonseca <[email protected]>	2013-04-17 10:54:08 +0100
commit	50b3fc6204a28881f625605f988cb0866ae6a6a5 (patch)
tree	c198f26065d7e2d21858a33d4014205b90f96011 /src/gallium/drivers
parent	f61b7da80e238892b0832ec12b11589fba946b47 (diff)