diff options
author | Connor Abbott <[email protected]> | 2019-06-04 14:42:54 +0200 |
---|---|---|
committer | Connor Abbott <[email protected]> | 2019-06-19 14:08:28 +0200 |
commit | 53a7649e5de22d6d30dc43a129d60efdc5f3c7be (patch) | |
tree | 870cb5151e8f2c0f99650d7c7fde389901d2ebc0 | |
parent | 77be5b2f88d44a16c8f272a05f23b6c0a5971222 (diff) |
ac/nir: Set speculatable for buffer loads where allowed
This brings the nir path in line with the TGSI path.
Totals from affected shaders:
SGPRS: 2984 -> 2984 (0.00 %)
VGPRS: 2792 -> 2652 (-5.01 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 247380 -> 248072 (0.28 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 121 -> 132 (9.09 %)
Wait states: 0 -> 0 (0.00 %)
Most of the change came from DiRT: Showdown, and came from sinking SSBO
loads.
Reviewed-by: Timothy Arceri <[email protected]>
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b73a0f599f1..dffaeedfbb4 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1779,11 +1779,12 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, cache_policy & ac_glc); } else { int num_channels = util_next_power_of_two(load_bytes) / 4; + bool can_speculate = access & ACCESS_CAN_REORDER; ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0, cache_policy & ac_glc, 0, - false, false); + can_speculate, false); } LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); @@ -2489,11 +2490,11 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); - /* TODO: set "can_speculate" when OpenGL needs it. */ + bool can_speculate = access & ACCESS_CAN_REORDER; res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, !!(args.cache_policy & ac_glc), - false); + can_speculate); res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); |