diff options
author | Samuel Pitoiset <[email protected]> | 2019-02-05 20:22:01 +0100 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2019-02-12 17:25:54 +0100 |
commit | bd1186572f6924a15ea10cd72a95c6d451016bae (patch) | |
tree | b2eb501b2d40295a63024c3c23fbd1eac9f938ae /src/amd/common | |
parent | 8364ffe82349aee3aab79d0a62f1788752d1325c (diff) |
radv: add support for push constants inlining when possible
This removes some scalar loads from shaders, but it increases
the number of SET_SH_REG packets. This is currently basic but
it could be improved if needed. Inlining dynamic offsets might
also help.
Original idea from Dave Airlie.
29077 shaders in 15096 tests
Totals:
SGPRS: 1321325 -> 1357101 (2.71 %)
VGPRS: 936000 -> 932576 (-0.37 %)
Spilled SGPRs: 24804 -> 24791 (-0.05 %)
Code Size: 49827960 -> 49642232 (-0.37 %) bytes
Max Waves: 242007 -> 242700 (0.29 %)
Totals from affected shaders:
SGPRS: 290989 -> 326765 (12.29 %)
VGPRS: 244680 -> 241256 (-1.40 %)
Spilled SGPRs: 1442 -> 1429 (-0.90 %)
Code Size: 8126688 -> 7940960 (-2.29 %) bytes
Max Waves: 80952 -> 81645 (0.86 %)
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 27 | ||||
-rw-r--r-- | src/amd/common/ac_shader_abi.h | 5 |
2 files changed, 29 insertions, 3 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 54559b19f02..4f44e32d9f9 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1392,10 +1392,31 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef ptr, addr; + LLVMValueRef src0 = get_src(ctx, instr->src[0]); + unsigned index = nir_intrinsic_base(instr); - addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); - addr = LLVMBuildAdd(ctx->ac.builder, addr, - get_src(ctx, instr->src[0]), ""); + addr = LLVMConstInt(ctx->ac.i32, index, 0); + addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, ""); + + /* Load constant values from user SGPRS when possible, otherwise + * fallback to the default path that loads directly from memory. + */ + if (LLVMIsConstant(src0) && + instr->dest.ssa.bit_size == 32) { + unsigned count = instr->dest.ssa.num_components; + unsigned offset = index; + + offset += LLVMConstIntGetZExtValue(src0); + offset /= 4; + + offset -= ctx->abi->base_inline_push_consts; + + if (offset + count <= ctx->abi->num_inline_push_consts) { + return ac_build_gather_values(&ctx->ac, + ctx->abi->inline_push_consts + offset, + count); + } + } ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr); diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index ee18e6c1923..c9b2c2eb4b8 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -32,6 +32,8 @@ struct nir_variable; #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) +#define AC_MAX_INLINE_PUSH_CONSTS 8 + enum ac_descriptor_type { AC_DESC_IMAGE, AC_DESC_FMASK, @@ -66,6 +68,9 @@ struct ac_shader_abi { /* Vulkan only */ LLVMValueRef push_constants; + LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS]; + unsigned num_inline_push_consts; + unsigned base_inline_push_consts; LLVMValueRef view_index; LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4]; |