From 6e1b12c7881fe663cb500cb2f7374f4862bae179 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 8 Jun 2016 13:21:25 +0200 Subject: radeonsi: enable scratch coalescing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes one particular compute shader 8x faster. Latest LLVM git is required. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 754b4aff335..f2bd3370c8a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, unsigned i; uint32_t scratch_rsrc_dword0 = scratch_va; uint32_t scratch_rsrc_dword1 = - S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) - | S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); + + /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE + * correctly. + */ + if (HAVE_LLVM >= 0x0309) + scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1); + else + scratch_rsrc_dword1 |= + S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); for (i = 0 ; i < shader->binary.reloc_count; i++) { const struct radeon_shader_reloc *reloc = -- cgit v1.2.3