diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 31 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 4 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 27 |
4 files changed, 38 insertions, 54 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index e54806a33fe..ea238fa006e 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1774,3 +1774,34 @@ void ac_lds_store(struct ac_llvm_context *ctx, ac_build_indexed_store(ctx, ctx->lds, dw_addr, value); } + +LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, + LLVMTypeRef dst_type, + LLVMValueRef src0) +{ + LLVMValueRef params[2] = { + src0, + + /* The value of 1 means that ffs(x=0) = undef, so LLVM won't + * add special code to check for x=0. The reason is that + * the LLVM behavior for x=0 is different from what we + * need here. However, LLVM also assumes that ffs(x) is + * in [0, 31], but GLSL expects that ffs(0) = -1, so + * a conditional assignment to handle 0 is still required. + * + * The hardware already implements the correct behavior. + */ + LLVMConstInt(ctx->i1, 1, false), + }; + + LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, + params, 2, + AC_FUNC_ATTR_READNONE); + + /* TODO: We need an intrinsic to skip this conditional. */ + /* Check for zero: */ + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, + LLVMIntEQ, src0, + ctx->i32_0, ""), + LLVMConstInt(ctx->i32, -1, 0), lsb, ""); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index b47d51acebb..f7906198277 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -297,6 +297,10 @@ LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr); void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); + +LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, + LLVMTypeRef dst_type, + LLVMValueRef src0); #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index f78f4863a7e..01677558d86 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1227,34 +1227,6 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, return LLVMBuildSelect(ctx->builder, v, src1, src2, ""); } -static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef params[2] = { - src0, - - /* The value of 1 means that ffs(x=0) = undef, so LLVM won't - * add special code to check for x=0. The reason is that - * the LLVM behavior for x=0 is different from what we - * need here. - * - * The hardware already implements the correct behavior. - */ - LLVMConstInt(ctx->i1, 1, false), - }; - - LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, - params, 2, - AC_FUNC_ATTR_READNONE); - - /* TODO: We need an intrinsic to skip this conditional. */ - /* Check for zero: */ - return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, - LLVMIntEQ, src0, - ctx->i32_0, ""), - LLVMConstInt(ctx->i32, -1, 0), lsb, ""); -} - static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx, LLVMValueRef src0) { @@ -1895,7 +1867,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_find_lsb: src[0] = ac_to_integer(&ctx->ac, src[0]); - result = emit_find_lsb(&ctx->ac, src[0]); + result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]); break; case nir_op_ufind_msb: src[0] = ac_to_integer(&ctx->ac, src[0]); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 913b6c32959..a27586b3fff 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -535,31 +535,8 @@ static void emit_lsb(const struct lp_build_tgsi_action *action, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef args[2] = { - emit_data->args[0], - - /* The value of 1 means that ffs(x=0) = undef, so LLVM won't - * add special code to check for x=0. The reason is that - * the LLVM behavior for x=0 is different from what we - * need here. However, LLVM also assumes that ffs(x) is - * in [0, 31], but GLSL expects that ffs(0) = -1, so - * a conditional assignment to handle 0 is still required. - */ - LLVMConstInt(ctx->i1, 1, 0) - }; - - LLVMValueRef lsb = - lp_build_intrinsic(ctx->ac.builder, "llvm.cttz.i32", - emit_data->dst_type, args, ARRAY_SIZE(args), - LP_FUNC_ATTR_READNONE); - - /* TODO: We need an intrinsic to skip this conditional. */ - /* Check for zero: */ - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, - LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, args[0], - ctx->i32_0, ""), - LLVMConstInt(ctx->i32, -1, 0), lsb, ""); + + emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]); } /* Find the last bit set. */ |