diff options
author | Tom Stellard <[email protected]> | 2014-12-09 20:05:44 -0500 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2015-01-20 09:55:44 -0500 |
commit | 73bc0fdb6f65ddb481cf525bf354e1a16bea4d75 (patch) | |
tree | e2fcba5e34fba1389fd181e834d212839ad49828 /src/gallium | |
parent | dfdaf3eb7edac939f5d1a32db2968944852b28fa (diff) |
radeonsi/compute: Use relocs for scratch pointer rather than user sgprs v2
Instead of passing a pointer to the scratch buffer via user sgprs, we
now patch the shader with the buffer address using reloc information
from the LLVM generated ELF.
v2:
- Make sure not to break older LLVM.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 20fec84ed77..4427d3bdbd5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -36,9 +36,18 @@ #if HAVE_LLVM < 0x0305 #define NUM_USER_SGPRS 2 #else +/* XXX: Even though we don't pass the scratch buffer via user sgprs any more + * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible + * with older mesa. */ #define NUM_USER_SGPRS 4 #endif +static const char *scratch_rsrc_dword0_symbol = + "SCRATCH_RSRC_DWORD0"; + +static const char *scratch_rsrc_dword1_symbol = + "SCRATCH_RSRC_DWORD1"; + struct si_compute { struct si_context *ctx; @@ -174,6 +183,35 @@ static unsigned compute_num_waves_for_scratch( return scratch_waves; } +static void apply_scratch_relocs(const struct si_screen *sscreen, + const struct radeon_shader_binary *binary, + struct si_shader *shader, uint64_t scratch_va) { + unsigned i; + char *ptr; + uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff; + uint32_t scratch_rsrc_dword1 = + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) + | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64); + + if (!binary->reloc_count) { + return; + } + + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, + PIPE_TRANSFER_READ_WRITE); + for (i = 0 ; i < binary->reloc_count; i++) { + const struct radeon_shader_reloc *reloc = &binary->relocs[i]; + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { + util_memcpy_cpu_to_le32(ptr + reloc->offset, + &scratch_rsrc_dword0, 4); + } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { + util_memcpy_cpu_to_le32(ptr + reloc->offset, + &scratch_rsrc_dword1, 4); + } + } + sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); +} + static void si_launch_grid( struct pipe_context *ctx, const uint *block_layout, const uint *grid_layout, @@ -256,6 +294,10 @@ static void si_launch_grid( RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + /* Patch the shader with the scratch buffer address. */ + apply_scratch_relocs(sctx->screen, + &program->binary, shader, scratch_buffer_va); + } for (i = 0; i < (kernel_args_size / 4); i++) { |