diff options
author | Marek Olšák <[email protected]> | 2015-02-28 14:31:45 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2015-03-16 14:58:19 +0100 |
commit | b5f19db9766ac54d78b8087b0433011f908ebd2c (patch) | |
tree | 2ebc5ae2c4f5018c8a829560702f0f6e6c9c9c35 | |
parent | d3723c614fb42c22e4e87fe8151bbb36462b425a (diff) |
radeonsi: implement TGSI_OPCODE_BFI (v2)
v2: Don't use the intrinsics, the shader backend can recognize these
patterns and generates optimal code automatically.
Reviewed-by: Tom Stellard <[email protected]>
-rw-r--r-- | docs/GL3.txt | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 34 |
2 files changed, 35 insertions, 1 deletions
diff --git a/docs/GL3.txt b/docs/GL3.txt index 267740a7bb2..b2951492c23 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -102,7 +102,7 @@ GL 4.0, GLSL 4.00: - Dynamically uniform UBO array indices DONE (r600) - Implicit signed -> unsigned conversions DONE - Fused multiply-add DONE () - - Packing/bitfield/conversion functions DONE (r600) + - Packing/bitfield/conversion functions DONE (r600, radeonsi) - Enhanced textureGather DONE (r600, radeonsi) - Geometry shader instancing DONE (r600) - Geometry shader multiple streams DONE () diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 0034b56bc56..d89e2b42eeb 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1234,6 +1234,39 @@ build_tgsi_intrinsic_nomem( build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute); } +static void emit_bfi(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bfi_args[3]; + + // Calculate the bitmask: (((1 << src3) - 1) << src2 + bfi_args[0] = LLVMBuildShl(builder, + LLVMBuildSub(builder, + LLVMBuildShl(builder, + bld_base->int_bld.one, + emit_data->args[3], ""), + bld_base->int_bld.one, ""), + emit_data->args[2], ""); + + bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], + emit_data->args[2], ""); + + bfi_args[2] = emit_data->args[0]; + + /* Calculate: + * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) + * Use the right-hand side, which the LLVM backend can convert to V_BFI. + */ + emit_data->output[emit_data->chan] = + LLVMBuildXor(builder, bfi_args[2], + LLVMBuildAnd(builder, bfi_args[0], + LLVMBuildXor(builder, bfi_args[1], bfi_args[2], + ""), ""), ""); +} + /* this is ffs in C */ static void emit_lsb(const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1381,6 +1414,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs"; bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; + bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev"; |