aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTimur Kristóf <[email protected]>2020-03-26 19:36:05 +0100
committerMarge Bot <[email protected]>2020-03-30 13:09:08 +0000
commit0f847b18bc91dced5725169e8c96bef6c077db90 (patch)
treee6c85c37e25b57162912d5e1cca0c8f22cad7a5a /src
parent798dd98d6e530afc5dab2f973785fbbd4e598dee (diff)
aco: Don't store LS VS outputs to LDS when TCS doesn't need them.
Totals: Code Size: 254764624 -> 254745104 (-0.01 %) bytes Totals from affected shaders: VGPRS: 12132 -> 12112 (-0.16 %) Code Size: 573364 -> 553844 (-3.40 %) bytes Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Rhys Perry <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4165> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4165>
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp9
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp7
2 files changed, 14 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 716853d23ce..fa3d38e1be7 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3366,8 +3366,13 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
unsigned write_mask = nir_intrinsic_write_mask(instr);
unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8u;
- if (ctx->tcs_in_out_eq)
- store_output_to_temps(ctx, instr);
+ if (ctx->tcs_in_out_eq && store_output_to_temps(ctx, instr)) {
+ /* When the TCS only reads this output directly and for the same vertices as its invocation id, it is unnecessary to store the VS output to LDS. */
+ bool indirect_write;
+ bool temp_only_input = tcs_driver_location_matches_api_mask(ctx, instr, true, ctx->tcs_temp_only_inputs, &indirect_write);
+ if (temp_only_input && !indirect_write)
+ return;
+ }
if (ctx->stage == vertex_es || ctx->stage == tess_eval_es) {
/* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index bd90dcae83d..644bc151fcb 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -103,6 +103,7 @@ struct isel_context {
/* tessellation information */
unsigned tcs_tess_lvl_out_loc;
unsigned tcs_tess_lvl_in_loc;
+ uint64_t tcs_temp_only_inputs;
uint32_t tcs_num_inputs;
uint32_t tcs_num_patches;
bool tcs_in_out_eq = false;
@@ -908,6 +909,12 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
ctx->tcs_num_inputs = ctx->args->options->key.tcs.num_inputs;
} else if (ctx->stage == vertex_tess_control_hs) {
ctx->tcs_num_inputs = util_last_bit64(ctx->args->shader_info->vs.ls_outputs_written);
+
+ if (ctx->tcs_in_out_eq) {
+ ctx->tcs_temp_only_inputs = ~nir->info.tess.tcs_cross_invocation_inputs_read &
+ ~nir->info.inputs_read_indirectly &
+ nir->info.inputs_read;
+ }
} else {
unreachable("Unsupported TCS shader stage");
}