diff options
author | Marek Olšák <[email protected]> | 2016-06-08 13:21:25 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-06-13 18:13:51 +0200 |
commit | 6e1b12c7881fe663cb500cb2f7374f4862bae179 (patch) | |
tree | c2ccb937970fb27c70f364b528736d7bebfd1330 /src/gallium/drivers/radeonsi/si_shader.c | |
parent | 0c0f841e5de27d01312f8857641668ca439b1ab1 (diff) |
radeonsi: enable scratch coalescing
This makes one particular compute shader 8x faster.
Latest LLVM git is required.
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 754b4aff335..f2bd3370c8a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, unsigned i; uint32_t scratch_rsrc_dword0 = scratch_va; uint32_t scratch_rsrc_dword1 = - S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) - | S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); + + /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE + * correctly. + */ + if (HAVE_LLVM >= 0x0309) + scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1); + else + scratch_rsrc_dword1 |= + S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); for (i = 0 ; i < shader->binary.reloc_count; i++) { const struct radeon_shader_reloc *reloc = |