diff options
author | Marek Olšák <[email protected]> | 2016-06-08 00:14:58 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-06-08 19:23:41 +0200 |
commit | a343ab55f725be997d9e74a0b3c715cc58fd5b86 (patch) | |
tree | e98c0be8cc789c29b1bf471e1afc080a70fd6695 /src/gallium/drivers/radeonsi | |
parent | 55b097d004a8d036c0176845122b044b6ddbca10 (diff) |
radeonsi: don't enable scratch just for SGPR spills
Diff from shader-db:
Scratch: 3221504 -> 17408 (-99.46 %) bytes per wave
v2: add "break;"
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6e8eefb2d86..4eb151f5872 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5825,6 +5825,20 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, unsigned i; const unsigned char *config = radeon_shader_binary_config_start(binary, symbol_offset); + bool really_needs_scratch = false; + + /* LLVM adds SGPR spills to the scratch size. + * Find out if we really need the scratch buffer. + */ + for (i = 0; i < binary->reloc_count; i++) { + const struct radeon_shader_reloc *reloc = &binary->relocs[i]; + + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || + !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { + really_needs_scratch = true; + break; + } + } /* XXX: We may be able to emit some of these values directly rather than * extracting fields to be emitted later. @@ -5859,8 +5873,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: /* WAVESIZE is in units of 256 dwords. */ - conf->scratch_bytes_per_wave = - G_00B860_WAVESIZE(value) * 256 * 4 * 1; + if (really_needs_scratch) + conf->scratch_bytes_per_wave = + G_00B860_WAVESIZE(value) * 256 * 4; break; default: { |