summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2014-12-09 20:05:44 -0500
committerTom Stellard <[email protected]>2015-01-20 09:55:44 -0500
commit73bc0fdb6f65ddb481cf525bf354e1a16bea4d75 (patch)
treee2fcba5e34fba1389fd181e834d212839ad49828 /src/gallium
parentdfdaf3eb7edac939f5d1a32db2968944852b28fa (diff)
radeonsi/compute: Use relocs for scratch pointer rather than user sgprs v2
Instead of passing a pointer to the scratch buffer via user sgprs, we now patch the shader with the buffer address using reloc information from the LLVM generated ELF. v2: - Make sure not to break older LLVM.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 20fec84ed77..4427d3bdbd5 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -36,9 +36,18 @@
#if HAVE_LLVM < 0x0305
#define NUM_USER_SGPRS 2
#else
+/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
+ * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
+ * with older mesa. */
#define NUM_USER_SGPRS 4
#endif
+static const char *scratch_rsrc_dword0_symbol =
+ "SCRATCH_RSRC_DWORD0";
+
+static const char *scratch_rsrc_dword1_symbol =
+ "SCRATCH_RSRC_DWORD1";
+
struct si_compute {
struct si_context *ctx;
@@ -174,6 +183,35 @@ static unsigned compute_num_waves_for_scratch(
return scratch_waves;
}
+static void apply_scratch_relocs(const struct si_screen *sscreen,
+ const struct radeon_shader_binary *binary,
+ struct si_shader *shader, uint64_t scratch_va) {
+ unsigned i;
+ char *ptr;
+ uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
+ uint32_t scratch_rsrc_dword1 =
+ S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
+ | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+
+ if (!binary->reloc_count) {
+ return;
+ }
+
+ ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
+ PIPE_TRANSFER_READ_WRITE);
+ for (i = 0 ; i < binary->reloc_count; i++) {
+ const struct radeon_shader_reloc *reloc = &binary->relocs[i];
+ if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
+ util_memcpy_cpu_to_le32(ptr + reloc->offset,
+ &scratch_rsrc_dword0, 4);
+ } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+ util_memcpy_cpu_to_le32(ptr + reloc->offset,
+ &scratch_rsrc_dword1, 4);
+ }
+ }
+ sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
+}
+
static void si_launch_grid(
struct pipe_context *ctx,
const uint *block_layout, const uint *grid_layout,
@@ -256,6 +294,10 @@ static void si_launch_grid(
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW);
+ /* Patch the shader with the scratch buffer address. */
+ apply_scratch_relocs(sctx->screen,
+ &program->binary, shader, scratch_buffer_va);
+
}
for (i = 0; i < (kernel_args_size / 4); i++) {