summaryrefslogtreecommitdiffstats
path: root/src/amd/common
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2019-02-05 20:22:01 +0100
committerSamuel Pitoiset <[email protected]>2019-02-12 17:25:54 +0100
commitbd1186572f6924a15ea10cd72a95c6d451016bae (patch)
treeb2eb501b2d40295a63024c3c23fbd1eac9f938ae /src/amd/common
parent8364ffe82349aee3aab79d0a62f1788752d1325c (diff)
radv: add support for push constants inlining when possible
This removes some scalar loads from shaders, but it increases the number of SET_SH_REG packets. This is currently basic but it could be improved if needed. Inlining dynamic offsets might also help. Original idea from Dave Airlie. 29077 shaders in 15096 tests Totals: SGPRS: 1321325 -> 1357101 (2.71 %) VGPRS: 936000 -> 932576 (-0.37 %) Spilled SGPRs: 24804 -> 24791 (-0.05 %) Code Size: 49827960 -> 49642232 (-0.37 %) bytes Max Waves: 242007 -> 242700 (0.29 %) Totals from affected shaders: SGPRS: 290989 -> 326765 (12.29 %) VGPRS: 244680 -> 241256 (-1.40 %) Spilled SGPRs: 1442 -> 1429 (-0.90 %) Code Size: 8126688 -> 7940960 (-2.29 %) bytes Max Waves: 80952 -> 81645 (0.86 %) Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r--src/amd/common/ac_nir_to_llvm.c27
-rw-r--r--src/amd/common/ac_shader_abi.h5
2 files changed, 29 insertions, 3 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 54559b19f02..4f44e32d9f9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1392,10 +1392,31 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef ptr, addr;
+ LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
- addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
- addr = LLVMBuildAdd(ctx->ac.builder, addr,
- get_src(ctx, instr->src[0]), "");
+ addr = LLVMConstInt(ctx->ac.i32, index, 0);
+ addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
+
+ /* Load constant values from user SGPRS when possible, otherwise
+ * fallback to the default path that loads directly from memory.
+ */
+ if (LLVMIsConstant(src0) &&
+ instr->dest.ssa.bit_size == 32) {
+ unsigned count = instr->dest.ssa.num_components;
+ unsigned offset = index;
+
+ offset += LLVMConstIntGetZExtValue(src0);
+ offset /= 4;
+
+ offset -= ctx->abi->base_inline_push_consts;
+
+ if (offset + count <= ctx->abi->num_inline_push_consts) {
+ return ac_build_gather_values(&ctx->ac,
+ ctx->abi->inline_push_consts + offset,
+ count);
+ }
+ }
ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr);
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index ee18e6c1923..c9b2c2eb4b8 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -32,6 +32,8 @@ struct nir_variable;
#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
+#define AC_MAX_INLINE_PUSH_CONSTS 8
+
enum ac_descriptor_type {
AC_DESC_IMAGE,
AC_DESC_FMASK,
@@ -66,6 +68,9 @@ struct ac_shader_abi {
/* Vulkan only */
LLVMValueRef push_constants;
+ LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+ unsigned num_inline_push_consts;
+ unsigned base_inline_push_consts;
LLVMValueRef view_index;
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];