summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-04-07 21:41:10 +0200
committerMarek Olšák <[email protected]>2017-04-28 21:47:35 +0200
commita98c9ba5809bdd5a31e30caab41984d127966d51 (patch)
treea9b1c947a248590f6a647cb4358aeef5004766ef
parentcfb0798bb3e5f38cf3874083f0f25a2b694b7c54 (diff)
radeonsi/gfx9: add si_shader::previous_stage for merged shaders
Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_debug.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c35
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h1
3 files changed, 39 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 9d0c0c554cf..038c8b47931 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -639,6 +639,10 @@ static void si_print_annotated_shader(struct si_shader *shader,
si_add_split_disasm(shader->prolog->binary.disasm_string,
start_addr, &num_inst, instructions);
}
+ if (shader->previous_stage) {
+ si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+ start_addr, &num_inst, instructions);
+ }
si_add_split_disasm(shader->binary.disasm_string,
start_addr, &num_inst, instructions);
if (shader->epilog) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 446c8119677..9640d8a187e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6149,6 +6149,8 @@ static unsigned si_get_shader_binary_size(struct si_shader *shader)
if (shader->prolog)
size += shader->prolog->binary.code_size;
+ if (shader->previous_stage)
+ size += shader->previous_stage->binary.code_size;
if (shader->epilog)
size += shader->epilog->binary.code_size;
return size;
@@ -6158,6 +6160,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
{
const struct ac_shader_binary *prolog =
shader->prolog ? &shader->prolog->binary : NULL;
+ const struct ac_shader_binary *previous_stage =
+ shader->previous_stage ? &shader->previous_stage->binary : NULL;
const struct ac_shader_binary *epilog =
shader->epilog ? &shader->epilog->binary : NULL;
const struct ac_shader_binary *mainb = &shader->binary;
@@ -6166,7 +6170,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
unsigned char *ptr;
assert(!prolog || !prolog->rodata_size);
- assert((!prolog && !epilog) || !mainb->rodata_size);
+ assert(!previous_stage || !previous_stage->rodata_size);
+ assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
/* GFX9 can fetch at most 128 bytes past the end of the shader.
@@ -6192,6 +6197,11 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
ptr += prolog->code_size;
}
+ if (previous_stage) {
+ util_memcpy_cpu_to_le32(ptr, previous_stage->code,
+ previous_stage->code_size);
+ ptr += previous_stage->code_size;
+ }
util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
ptr += mainb->code_size;
@@ -6399,6 +6409,9 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
if (shader->prolog)
si_shader_dump_disassembly(&shader->prolog->binary,
debug, "prolog", file);
+ if (shader->previous_stage)
+ si_shader_dump_disassembly(&shader->previous_stage->binary,
+ debug, "previous stage", file);
si_shader_dump_disassembly(&shader->binary, debug, "main", file);
@@ -8573,6 +8586,26 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->prolog->config.num_vgprs);
}
+ if (shader->previous_stage) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->previous_stage->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->previous_stage->config.num_vgprs);
+ shader->config.spilled_sgprs =
+ MAX2(shader->config.spilled_sgprs,
+ shader->previous_stage->config.spilled_sgprs);
+ shader->config.spilled_vgprs =
+ MAX2(shader->config.spilled_vgprs,
+ shader->previous_stage->config.spilled_vgprs);
+ shader->config.private_mem_vgprs =
+ MAX2(shader->config.private_mem_vgprs,
+ shader->previous_stage->config.private_mem_vgprs);
+ shader->config.scratch_bytes_per_wave =
+ MAX2(shader->config.scratch_bytes_per_wave,
+ shader->previous_stage->config.scratch_bytes_per_wave);
+ shader->info.uses_instanceid |=
+ shader->previous_stage->info.uses_instanceid;
+ }
if (shader->epilog) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->epilog->config.num_sgprs);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 0673c6cfcf1..65da65469b9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -505,6 +505,7 @@ struct si_shader {
struct si_shader *next_variant;
struct si_shader_part *prolog;
+ struct si_shader *previous_stage; /* for GFX9 */
struct si_shader_part *epilog;
struct si_pm4_state *pm4;