From bd1186572f6924a15ea10cd72a95c6d451016bae Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 5 Feb 2019 20:22:01 +0100 Subject: radv: add support for push constants inlining when possible This removes some scalar loads from shaders, but it increases the number of SET_SH_REG packets. This is currently basic but it could be improved if needed. Inlining dynamic offsets might also help. Original idea from Dave Airlie. 29077 shaders in 15096 tests Totals: SGPRS: 1321325 -> 1357101 (2.71 %) VGPRS: 936000 -> 932576 (-0.37 %) Spilled SGPRs: 24804 -> 24791 (-0.05 %) Code Size: 49827960 -> 49642232 (-0.37 %) bytes Max Waves: 242007 -> 242700 (0.29 %) Totals from affected shaders: SGPRS: 290989 -> 326765 (12.29 %) VGPRS: 244680 -> 241256 (-1.40 %) Spilled SGPRs: 1442 -> 1429 (-0.90 %) Code Size: 8126688 -> 7940960 (-2.29 %) bytes Max Waves: 80952 -> 81645 (0.86 %) Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/common/ac_nir_to_llvm.c | 27 ++++++++++++++++++++++++--- src/amd/common/ac_shader_abi.h | 5 +++++ 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'src/amd/common') diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 54559b19f02..4f44e32d9f9 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1392,10 +1392,31 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef ptr, addr; + LLVMValueRef src0 = get_src(ctx, instr->src[0]); + unsigned index = nir_intrinsic_base(instr); - addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); - addr = LLVMBuildAdd(ctx->ac.builder, addr, - get_src(ctx, instr->src[0]), ""); + addr = LLVMConstInt(ctx->ac.i32, index, 0); + addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, ""); + + /* Load constant values from user SGPRS when possible, otherwise + * fallback to the default path that loads directly from memory. + */ + if (LLVMIsConstant(src0) && + instr->dest.ssa.bit_size == 32) { + unsigned count = instr->dest.ssa.num_components; + unsigned offset = index; + + offset += LLVMConstIntGetZExtValue(src0); + offset /= 4; + + offset -= ctx->abi->base_inline_push_consts; + + if (offset + count <= ctx->abi->num_inline_push_consts) { + return ac_build_gather_values(&ctx->ac, + ctx->abi->inline_push_consts + offset, + count); + } + } ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr); diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index ee18e6c1923..c9b2c2eb4b8 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -32,6 +32,8 @@ struct nir_variable; #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) +#define AC_MAX_INLINE_PUSH_CONSTS 8 + enum ac_descriptor_type { AC_DESC_IMAGE, AC_DESC_FMASK, @@ -66,6 +68,9 @@ struct ac_shader_abi { /* Vulkan only */ LLVMValueRef push_constants; + LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS]; + unsigned num_inline_push_consts; + unsigned base_inline_push_consts; LLVMValueRef view_index; LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4]; -- cgit v1.2.3