summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorConnor Abbott <[email protected]>2019-06-04 14:42:54 +0200
committerConnor Abbott <[email protected]>2019-06-19 14:08:28 +0200
commit53a7649e5de22d6d30dc43a129d60efdc5f3c7be (patch)
tree870cb5151e8f2c0f99650d7c7fde389901d2ebc0
parent77be5b2f88d44a16c8f272a05f23b6c0a5971222 (diff)
ac/nir: Set speculatable for buffer loads where allowed
This brings the nir path in line with the TGSI path. Totals from affected shaders: SGPRS: 2984 -> 2984 (0.00 %) VGPRS: 2792 -> 2652 (-5.01 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 247380 -> 248072 (0.28 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 121 -> 132 (9.09 %) Wait states: 0 -> 0 (0.00 %) Most of the change came from DiRT: Showdown, and came from sinking SSBO loads. Reviewed-by: Timothy Arceri <[email protected]>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c7
1 files changed, 4 insertions, 3 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b73a0f599f1..dffaeedfbb4 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1779,11 +1779,12 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
cache_policy & ac_glc);
} else {
int num_channels = util_next_power_of_two(load_bytes) / 4;
+ bool can_speculate = access & ACCESS_CAN_REORDER;
ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
vindex, offset, immoffset, 0,
cache_policy & ac_glc, 0,
- false, false);
+ can_speculate, false);
}
LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
@@ -2489,11 +2490,11 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, "");
- /* TODO: set "can_speculate" when OpenGL needs it. */
+ bool can_speculate = access & ACCESS_CAN_REORDER;
res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
ctx->ac.i32_0, num_channels,
!!(args.cache_policy & ac_glc),
- false);
+ can_speculate);
res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);