summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2015-02-28 14:31:45 +0100
committerMarek Olšák <[email protected]>2015-03-16 14:58:19 +0100
commitb5f19db9766ac54d78b8087b0433011f908ebd2c (patch)
tree2ebc5ae2c4f5018c8a829560702f0f6e6c9c9c35
parentd3723c614fb42c22e4e87fe8151bbb36462b425a (diff)
radeonsi: implement TGSI_OPCODE_BFI (v2)
v2: Don't use the intrinsics, the shader backend can recognize these patterns and generates optimal code automatically. Reviewed-by: Tom Stellard <[email protected]>
-rw-r--r--docs/GL3.txt2
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c34
2 files changed, 35 insertions, 1 deletions
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 267740a7bb2..b2951492c23 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -102,7 +102,7 @@ GL 4.0, GLSL 4.00:
- Dynamically uniform UBO array indices DONE (r600)
- Implicit signed -> unsigned conversions DONE
- Fused multiply-add DONE ()
- - Packing/bitfield/conversion functions DONE (r600)
+ - Packing/bitfield/conversion functions DONE (r600, radeonsi)
- Enhanced textureGather DONE (r600, radeonsi)
- Geometry shader instancing DONE (r600)
- Geometry shader multiple streams DONE ()
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 0034b56bc56..d89e2b42eeb 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1234,6 +1234,39 @@ build_tgsi_intrinsic_nomem(
build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
}
+static void emit_bfi(const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef bfi_args[3];
+
+ // Calculate the bitmask: (((1 << src3) - 1) << src2
+ bfi_args[0] = LLVMBuildShl(builder,
+ LLVMBuildSub(builder,
+ LLVMBuildShl(builder,
+ bld_base->int_bld.one,
+ emit_data->args[3], ""),
+ bld_base->int_bld.one, ""),
+ emit_data->args[2], "");
+
+ bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
+ emit_data->args[2], "");
+
+ bfi_args[2] = emit_data->args[0];
+
+ /* Calculate:
+ * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
+ * Use the right-hand side, which the LLVM backend can convert to V_BFI.
+ */
+ emit_data->output[emit_data->chan] =
+ LLVMBuildXor(builder, bfi_args[2],
+ LLVMBuildAnd(builder, bfi_args[0],
+ LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
+ ""), ""), "");
+}
+
/* this is ffs in C */
static void emit_lsb(const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1381,6 +1414,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
+ bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";