diff options
author | Connor Abbott <[email protected]> | 2019-06-12 18:58:13 +0200 |
---|---|---|
committer | Connor Abbott <[email protected]> | 2019-08-05 11:45:18 +0200 |
commit | 74470baebbdacc8fd31c9912eb8c00c0cd102903 (patch) | |
tree | e461603621ae1a21203dcbf2d31df1281bb3d691 /src/amd | |
parent | 3c9144f9e5888e9a31886b64168f63e991da01d0 (diff) |
ac/nir: Lower large indirect variables to scratch
results from radeonsi NIR:
Totals from affected shaders:
SGPRS: 704 -> 464 (-34.09 %)
VGPRS: 2056 -> 672 (-67.32 %)
Spilled SGPRs: 24 -> 0 (-100.00 %)
Spilled VGPRs: 28406 -> 0 (-100.00 %)
Private memory VGPRs: 0 -> 3182 (0.00 %)
Scratch size: 1064 -> 3228 (203.38 %) dwords per thread
Code Size: 935260 -> 40180 (-95.70 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 28 -> 70 (150.00 %)
Wait states: 0 -> 0 (0.00 %)
results from radv:
Totals from affected shaders:
SGPRS: 80 -> 48 (-40.00 %)
VGPRS: 204 -> 108 (-47.06 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 256 (0.00 %) dwords per thread
Code Size: 15792 -> 9504 (-39.82 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 1 -> 2 (100.00 %)
Wait states: 0 -> 0 (0.00 %)
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 53b93f7e1d3..5e25e838f8f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -42,6 +42,8 @@ struct ac_nir_context { LLVMValueRef *ssa_defs; + LLVMValueRef scratch; + struct hash_table *defs; struct hash_table *phis; struct hash_table *vars; @@ -3573,6 +3575,36 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_mbcnt_amd: result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0])); break; + case nir_intrinsic_load_scratch: { + LLVMValueRef offset = get_src(ctx, instr->src[0]); + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, + offset); + LLVMTypeRef comp_type = + LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + LLVMTypeRef vec_type = + instr->dest.ssa.num_components == 1 ? comp_type : + LLVMVectorType(comp_type, instr->dest.ssa.num_components); + unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, + LLVMPointerType(vec_type, addr_space), ""); + result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + break; + } + case nir_intrinsic_store_scratch: { + LLVMValueRef offset = get_src(ctx, instr->src[1]); + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, + offset); + LLVMTypeRef comp_type = + LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size); + LLVMTypeRef vec_type = + instr->src[0].ssa->num_components == 1 ? comp_type : + LLVMVectorType(comp_type, instr->src[0].ssa->num_components); + unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, + LLVMPointerType(vec_type, addr_space), ""); + LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr); + break; + } default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); @@ -4474,6 +4506,18 @@ setup_locals(struct ac_nir_context *ctx, } static void +setup_scratch(struct ac_nir_context *ctx, + struct nir_shader *shader) +{ + if (shader->scratch_size == 0) + return; + + ctx->scratch = ac_build_alloca_undef(&ctx->ac, + LLVMArrayType(ctx->ac.i8, shader->scratch_size), + "scratch"); +} + +static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir) { @@ -4518,6 +4562,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef)); setup_locals(&ctx, func); + setup_scratch(&ctx, nir); if (gl_shader_stage_is_compute(nir->info.stage)) setup_shared(&ctx, nir); @@ -4539,6 +4584,15 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) { + /* Lower large variables to scratch first so that we won't bloat the + * shader by generating large if ladders for them. We later lower + * scratch to alloca's, assuming LLVM won't generate VGPR indexing. + */ + NIR_PASS_V(nir, nir_lower_vars_to_scratch, + nir_var_function_temp, + 256, + glsl_get_natural_size_align_bytes); + /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9. */ |