diff options
author | Marek Olšák <[email protected]> | 2016-09-10 00:29:31 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-09-12 21:06:57 +0200 |
commit | 546bc07349d0e53b71766780cf726fe21f7d6983 (patch) | |
tree | ef256b4ae1740e37f2bd9a0703eeeef8db358edb /src/gallium/drivers/radeonsi | |
parent | e2fb0441150e81f7ba30db1c8e8583858c3d8d5e (diff) |
radeonsi: don't preload constants at the beginning of shaders
LLVM can CSE the loads, thus we can always re-load constants before each
use. The decrease in SGPR spilling is huge.
The best improvements are the dumbest ones.
26011 shaders in 14651 tests
Totals:
SGPRS: 1453346 -> 1251920 (-13.86 %)
VGPRS: 742576 -> 728421 (-1.91 %)
Spilled SGPRs: 52298 -> 16644 (-68.17 %)
Spilled VGPRs: 397 -> 369 (-7.05 %)
Scratch VGPRs: 1372 -> 1344 (-2.04 %) dwords per thread
Code Size: 36136488 -> 36001064 (-0.37 %) bytes
LDS: 767 -> 767 (0.00 %) blocks
Max Waves: 219315 -> 222221 (1.33 %)
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Reviewed-by: Edward O'Callaghan <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 31 |
1 files changed, 11 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0b7de182cdb..68017221391 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -107,7 +107,6 @@ struct si_shader_context LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS]; LLVMValueRef lds; - LLVMValueRef *constants[SI_NUM_CONST_BUFFERS]; LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS]; LLVMValueRef sampler_views[SI_NUM_SAMPLERS]; LLVMValueRef sampler_states[SI_NUM_SAMPLERS]; @@ -1881,12 +1880,19 @@ static LLVMValueRef fetch_constant( idx = reg->Register.Index * 4 + swizzle; if (!reg->Register.Indirect && !reg->Dimension.Indirect) { + LLVMValueRef c0, c1; + + c0 = buffer_load_const(ctx, ctx->const_buffers[buf], + LLVMConstInt(ctx->i32, idx * 4, 0)); + if (!tgsi_type_is_64bit(type)) - return bitcast(bld_base, type, ctx->constants[buf][idx]); + return bitcast(bld_base, type, c0); else { + c1 = buffer_load_const(ctx, ctx->const_buffers[buf], + LLVMConstInt(ctx->i32, + (idx + 1) * 4, 0)); return radeon_llvm_emit_fetch_64bit(bld_base, type, - ctx->constants[buf][idx], - ctx->constants[buf][idx + 1]); + c0, c1); } } @@ -5796,25 +5802,12 @@ static void preload_constants(struct si_shader_context *ctx) LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS); for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) { - unsigned i, num_const = info->const_file_max[buf] + 1; - - if (num_const == 0) + if (info->const_file_max[buf] == -1) continue; - /* Allocate space for the constant values */ - ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef)); - /* Load the resource descriptor */ ctx->const_buffers[buf] = build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf)); - - /* Load the constants, we rely on the code sinking to do the rest */ - for (i = 0; i < num_const * 4; ++i) { - ctx->constants[buf][i] = - buffer_load_const(ctx, - ctx->const_buffers[buf], - lp_build_const_int32(gallivm, i * 4)); - } } } @@ -6905,8 +6898,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, } out: - for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++) - FREE(ctx.constants[i]); return r; } |