diff options
author | Tom Stellard <[email protected]> | 2014-12-10 09:13:59 -0500 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2015-01-28 21:03:47 +0000 |
commit | 2397a721291457c146c7f4bcd48adcb3b2d979bd (patch) | |
tree | 016628c1fb29de41f0bbca307fa26eac43ae60dd /src/gallium/drivers/radeonsi/si_shader.c | |
parent | 5dcd97f25c217450c9e3e0441246187219a01eca (diff) |
radeonsi: Enable VGPR spilling for all shader types v5
v2:
- Only emit write SPI_TMPRING_SIZE once per packet.
- Use context global scratch buffer.
v3:
- Patch shaders using WRITE_DATA packet instead of map/unmap.
- Emit ICACHE_FLUSH, CS_PARTIAL_FLUSH, PS_PARTIAL_FLUSH, and
VS_PARTIAL_FLUSH when patching shaders.
v4:
- Code cleanups.
- Remove unnecessary multiplies.
v5:
- Patch shaders in system memory and re-upload to vram.
Reviewed-by: Michel Dänzer <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 59 |
1 files changed, 50 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 571ce67c7c3..fb1419ddb4d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -32,6 +32,7 @@ #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_flow.h" +#include "radeon/r600_cs.h" #include "radeon/radeon_llvm.h" #include "radeon/radeon_elf_util.h" #include "radeon/radeon_llvm_emit.h" @@ -46,6 +47,12 @@ #include <errno.h> +static const char *scratch_rsrc_dword0_symbol = + "SCRATCH_RSRC_DWORD0"; + +static const char *scratch_rsrc_dword1_symbol = + "SCRATCH_RSRC_DWORD1"; + struct si_shader_output_values { LLVMValueRef values[4]; @@ -2517,19 +2524,20 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx) } } -void si_shader_binary_read_config(const struct radeon_shader_binary *binary, +void si_shader_binary_read_config(const struct si_screen *sscreen, struct si_shader *shader, unsigned symbol_offset) { unsigned i; const unsigned char *config = - radeon_shader_binary_config_start(binary, symbol_offset); + radeon_shader_binary_config_start(&shader->binary, + symbol_offset); /* XXX: We may be able to emit some of these values directly rather than * extracting fields to be emitted later. */ - for (i = 0; i < binary->config_size_per_symbol; i+= 8) { + for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) { unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); switch (reg) { @@ -2549,6 +2557,7 @@ void si_shader_binary_read_config(const struct radeon_shader_binary *binary, case R_0286CC_SPI_PS_INPUT_ENA: shader->spi_ps_input_ena = value; break; + case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: /* WAVESIZE is in units of 256 dwords. */ shader->scratch_bytes_per_wave = @@ -2562,6 +2571,29 @@ void si_shader_binary_read_config(const struct radeon_shader_binary *binary, } } +void si_shader_apply_scratch_relocs(struct si_context *sctx, + struct si_shader *shader, + uint64_t scratch_va) +{ + unsigned i; + uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff; + uint32_t scratch_rsrc_dword1 = + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) + | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64); + + for (i = 0 ; i < shader->binary.reloc_count; i++) { + const struct radeon_shader_reloc *reloc = + &shader->binary.relocs[i]; + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { + util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset, + &scratch_rsrc_dword0, 4); + } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { + util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset, + &scratch_rsrc_dword1, 4); + } + } +} + int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader, const struct radeon_shader_binary *binary) @@ -2582,7 +2614,7 @@ int si_shader_binary_read(struct si_screen *sscreen, } } - si_shader_binary_read_config(binary, shader, 0); + si_shader_binary_read_config(sscreen, shader, 0); /* copy new shader */ code_size = binary->code_size + binary->rodata_size; @@ -2610,18 +2642,24 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, LLVMModuleRef mod) { int r = 0; - struct radeon_shader_binary binary; bool dump = r600_can_dump_shader(&sscreen->b, shader->selector ? shader->selector->tokens : NULL); - memset(&binary, 0, sizeof(binary)); - r = radeon_llvm_compile(mod, &binary, + r = radeon_llvm_compile(mod, &shader->binary, r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm); if (r) { return r; } - r = si_shader_binary_read(sscreen, shader, &binary); - radeon_shader_binary_free_members(&binary, true); + r = si_shader_binary_read(sscreen, shader, &shader->binary); + + FREE(shader->binary.config); + FREE(shader->binary.rodata); + FREE(shader->binary.global_symbol_offsets); + if (shader->scratch_bytes_per_wave == 0) { + FREE(shader->binary.code); + FREE(shader->binary.relocs); + memset(&shader->binary, 0, sizeof(shader->binary)); + } return r; } @@ -2861,4 +2899,7 @@ void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader) r600_resource_reference(&shader->scratch_bo, NULL); r600_resource_reference(&shader->bo, NULL); + + FREE(shader->binary.code); + FREE(shader->binary.relocs); } |