diff options
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b9d203a6391..a0455236e4e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3272,6 +3272,28 @@ std::pair<Temp, unsigned> get_tcs_per_patch_output_vmem_offset(isel_context *ctx return offs; } +bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex, uint64_t mask, bool *indirect) +{ + unsigned off = nir_intrinsic_base(instr) * 4u; + nir_src *off_src = nir_get_io_offset_src(instr); + + if (!nir_src_is_const(*off_src)) { + *indirect = true; + return false; + } + + *indirect = false; + off += nir_src_as_uint(*off_src) * 16u; + + while (mask) { + unsigned slot = u_bit_scan64(&mask) + (per_vertex ? 0 : VARYING_SLOT_PATCH0); + if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u) + return true; + } + + return false; +} + void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr) { Builder bld(ctx->program, ctx->block); @@ -3334,31 +3356,18 @@ bool should_write_tcs_patch_output_to_vmem(isel_context *ctx, nir_intrinsic_inst (off != (tess_index_outer * 16u)); } -bool should_write_tcs_patch_output_to_lds(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex) +bool should_write_tcs_output_to_lds(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex) { - unsigned off = nir_intrinsic_base(instr) * 4u; - nir_src *off_src = nir_get_io_offset_src(instr); - /* When none of the appropriate outputs are read, we are OK to never write to LDS */ if (per_vertex ? ctx->shader->info.outputs_read == 0U : ctx->shader->info.patch_outputs_read == 0u) return false; - /* Indirect offset, we can't be sure if this is read or not, always write to LDS */ - if (!nir_src_is_const(*off_src)) - return true; - - off += nir_src_as_uint(*off_src) * 16u; - - uint64_t out_rd = per_vertex - ? ctx->shader->info.outputs_read - : ctx->shader->info.patch_outputs_read; - while (out_rd) { - unsigned slot = u_bit_scan64(&out_rd) + (per_vertex ? 0 : VARYING_SLOT_PATCH0); - if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u) - return true; - } - - return false; + uint64_t mask = per_vertex + ? ctx->shader->info.outputs_read + : ctx->shader->info.patch_outputs_read; + bool indirect_write; + bool output_read = tcs_driver_location_matches_api_mask(ctx, instr, per_vertex, mask, &indirect_write); + return indirect_write || output_read; } void visit_store_tcs_output(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex) @@ -3375,7 +3384,7 @@ void visit_store_tcs_output(isel_context *ctx, nir_intrinsic_instr *instr, bool /* Only write to VMEM if the output is per-vertex or it's per-patch non tess factor */ bool write_to_vmem = per_vertex || should_write_tcs_patch_output_to_vmem(ctx, instr); /* Only write to LDS if the output is read by the shader, or it's per-patch tess factor */ - bool write_to_lds = !write_to_vmem || should_write_tcs_patch_output_to_lds(ctx, instr, per_vertex); + bool write_to_lds = !write_to_vmem || should_write_tcs_output_to_lds(ctx, instr, per_vertex); if (write_to_vmem) { std::pair<Temp, unsigned> vmem_offs = per_vertex |