summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader.c
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2014-12-10 09:13:59 -0500
committerTom Stellard <[email protected]>2015-01-28 21:03:47 +0000
commit2397a721291457c146c7f4bcd48adcb3b2d979bd (patch)
tree016628c1fb29de41f0bbca307fa26eac43ae60dd /src/gallium/drivers/radeonsi/si_shader.c
parent5dcd97f25c217450c9e3e0441246187219a01eca (diff)
radeonsi: Enable VGPR spilling for all shader types v5
v2: - Only emit write SPI_TMPRING_SIZE once per packet. - Use context global scratch buffer. v3: - Patch shaders using WRITE_DATA packet instead of map/unmap. - Emit ICACHE_FLUSH, CS_PARTIAL_FLUSH, PS_PARTIAL_FLUSH, and VS_PARTIAL_FLUSH when patching shaders. v4: - Code cleanups. - Remove unnecessary multiplies. v5: - Patch shaders in system memory and re-upload to vram. Reviewed-by: Michel Dänzer <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c59
1 files changed, 50 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 571ce67c7c3..fb1419ddb4d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -32,6 +32,7 @@
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_flow.h"
+#include "radeon/r600_cs.h"
#include "radeon/radeon_llvm.h"
#include "radeon/radeon_elf_util.h"
#include "radeon/radeon_llvm_emit.h"
@@ -46,6 +47,12 @@
#include <errno.h>
+static const char *scratch_rsrc_dword0_symbol =
+ "SCRATCH_RSRC_DWORD0";
+
+static const char *scratch_rsrc_dword1_symbol =
+ "SCRATCH_RSRC_DWORD1";
+
struct si_shader_output_values
{
LLVMValueRef values[4];
@@ -2517,19 +2524,20 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
}
}
-void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+void si_shader_binary_read_config(const struct si_screen *sscreen,
struct si_shader *shader,
unsigned symbol_offset)
{
unsigned i;
const unsigned char *config =
- radeon_shader_binary_config_start(binary, symbol_offset);
+ radeon_shader_binary_config_start(&shader->binary,
+ symbol_offset);
/* XXX: We may be able to emit some of these values directly rather than
* extracting fields to be emitted later.
*/
- for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+ for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
@@ -2549,6 +2557,7 @@ void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
case R_0286CC_SPI_PS_INPUT_ENA:
shader->spi_ps_input_ena = value;
break;
+ case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
shader->scratch_bytes_per_wave =
@@ -2562,6 +2571,29 @@ void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
}
}
+void si_shader_apply_scratch_relocs(struct si_context *sctx,
+ struct si_shader *shader,
+ uint64_t scratch_va)
+{
+ unsigned i;
+ uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
+ uint32_t scratch_rsrc_dword1 =
+ S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
+ | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+
+ for (i = 0 ; i < shader->binary.reloc_count; i++) {
+ const struct radeon_shader_reloc *reloc =
+ &shader->binary.relocs[i];
+ if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
+ util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
+ &scratch_rsrc_dword0, 4);
+ } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+ util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
+ &scratch_rsrc_dword1, 4);
+ }
+ }
+}
+
int si_shader_binary_read(struct si_screen *sscreen,
struct si_shader *shader,
const struct radeon_shader_binary *binary)
@@ -2582,7 +2614,7 @@ int si_shader_binary_read(struct si_screen *sscreen,
}
}
- si_shader_binary_read_config(binary, shader, 0);
+ si_shader_binary_read_config(sscreen, shader, 0);
/* copy new shader */
code_size = binary->code_size + binary->rodata_size;
@@ -2610,18 +2642,24 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMModuleRef mod)
{
int r = 0;
- struct radeon_shader_binary binary;
bool dump = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
- memset(&binary, 0, sizeof(binary));
- r = radeon_llvm_compile(mod, &binary,
+ r = radeon_llvm_compile(mod, &shader->binary,
r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
if (r) {
return r;
}
- r = si_shader_binary_read(sscreen, shader, &binary);
- radeon_shader_binary_free_members(&binary, true);
+ r = si_shader_binary_read(sscreen, shader, &shader->binary);
+
+ FREE(shader->binary.config);
+ FREE(shader->binary.rodata);
+ FREE(shader->binary.global_symbol_offsets);
+ if (shader->scratch_bytes_per_wave == 0) {
+ FREE(shader->binary.code);
+ FREE(shader->binary.relocs);
+ memset(&shader->binary, 0, sizeof(shader->binary));
+ }
return r;
}
@@ -2861,4 +2899,7 @@ void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
r600_resource_reference(&shader->scratch_bo, NULL);
r600_resource_reference(&shader->bo, NULL);
+
+ FREE(shader->binary.code);
+ FREE(shader->binary.relocs);
}