summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2015-04-13 13:25:41 +0000
committerTom Stellard <[email protected]>2015-04-14 13:37:12 +0000
commite0994e0f97a2078735f0b5e86cbad9f74c565c05 (patch)
tree90e68990dac508f2b15787975f27eeadefdb06e3 /src/gallium
parentc6d79ed289a75f13c65f011be870f7e43a0fedc7 (diff)
radeon/llvm: Improve codegen for KILL_IF
Rather than emitting one kill instruction per component of KILL_IF's src reg, we now or the components of the src register together and use the result as a condition for just one kill instruction. shader-db stats (bonaire): 979 shaders Totals: SGPRS: 34872 -> 34848 (-0.07 %) VGPRS: 20696 -> 20676 (-0.10 %) Code Size: 749032 -> 748452 (-0.08 %) bytes LDS: 11 -> 11 (0.00 %) blocks Scratch: 12288 -> 12288 (0.00 %) bytes per wave Totals from affected shaders: SGPRS: 1184 -> 1160 (-2.03 %) VGPRS: 600 -> 580 (-3.33 %) Code Size: 13200 -> 12620 (-4.39 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Increases: SGPRS: 2 (0.00 %) VGPRS: 0 (0.00 %) Code Size: 0 (0.00 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) Decreases: SGPRS: 5 (0.01 %) VGPRS: 5 (0.01 %) Code Size: 25 (0.03 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) *** BY PERCENTAGE *** Max Increase: SGPRS: 32 -> 40 (25.00 %) VGPRS: 0 -> 0 (0.00 %) Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Max Decrease: SGPRS: 32 -> 24 (-25.00 %) VGPRS: 16 -> 12 (-25.00 %) Code Size: 116 -> 96 (-17.24 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave *** BY UNIT *** Max Increase: SGPRS: 64 -> 72 (12.50 %) VGPRS: 0 -> 0 (0.00 %) Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Max Decrease: SGPRS: 32 -> 24 (-25.00 %) VGPRS: 16 -> 12 (-25.00 %) Code Size: 424 -> 356 (-16.04 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 91c56a3e631..18afbcb0a2e 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -637,6 +637,34 @@ static void uif_emit(
if_cond_emit(action, bld_base, emit_data, cond);
}
+static void kill_if_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned i;
+ LLVMValueRef conds[TGSI_NUM_CHANNELS];
+
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
+ conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+ bld_base->base.zero, "");
+ }
+
+ /* Or the conditions together */
+ for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
+ conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+ }
+
+ emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+ emit_data->arg_count = 1;
+ emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
+ lp_build_const_float(gallivm, -1.0f),
+ bld_base->base.zero, "");
+}
+
static void kil_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1467,6 +1495,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
+ bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;