diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 37 |
1 files changed, 34 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 123ff5d86b0..18e905b275b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -459,6 +459,8 @@ static void emit_bfi(const struct lp_build_tgsi_action *action, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef bfi_args[3]; + LLVMValueRef bfi_sm5; + LLVMValueRef cond; // Calculate the bitmask: (((1 << src3) - 1) << src2 bfi_args[0] = LLVMBuildShl(builder, @@ -478,11 +480,40 @@ static void emit_bfi(const struct lp_build_tgsi_action *action, * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) * Use the right-hand side, which the LLVM backend can convert to V_BFI. */ - emit_data->output[emit_data->chan] = + bfi_sm5 = LLVMBuildXor(builder, bfi_args[2], LLVMBuildAnd(builder, bfi_args[0], LLVMBuildXor(builder, bfi_args[1], bfi_args[2], ""), ""), ""); + + /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend + * uses the convenient V_BFI lowering for the above, which follows SM5 + * and disagrees with GLSL semantics when bits (src3) is 32. + */ + cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3], + lp_build_const_int32(gallivm, 32), ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, ""); +} + +static void emit_bfe(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bfe_sm5; + LLVMValueRef cond; + + bfe_sm5 = lp_build_intrinsic(builder, action->intr_name, + emit_data->dst_type, emit_data->args, + emit_data->arg_count, LLVMReadNoneAttribute); + + /* Correct for GLSL semantics. */ + cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2], + lp_build_const_int32(gallivm, 32), ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, ""); } /* this is ffs in C */ @@ -783,7 +814,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs; - bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe; bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32"; bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int; @@ -835,7 +866,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32"; bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; - bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe; bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32"; bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int; |