1 files changed, 30 insertions, 21 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index b9d203a6391..a0455236e4e 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3272,6 +3272,28 @@ std::pair<Temp, unsigned> get_tcs_per_patch_output_vmem_offset(isel_context *ctx
    return offs;
 }
 
+bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex, uint64_t mask, bool *indirect)
+{
+   unsigned off = nir_intrinsic_base(instr) * 4u;
+   nir_src *off_src = nir_get_io_offset_src(instr);
+
+   if (!nir_src_is_const(*off_src)) {
+      *indirect = true;
+      return false;
+   }
+
+   *indirect = false;
+   off += nir_src_as_uint(*off_src) * 16u;
+
+   while (mask) {
+      unsigned slot = u_bit_scan64(&mask) + (per_vertex ? 0 : VARYING_SLOT_PATCH0);
+      if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u)
+         return true;
+   }
+
+   return false;
+}
+
 void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
 {
    Builder bld(ctx->program, ctx->block);
@@ -3334,31 +3356,18 @@ bool should_write_tcs_patch_output_to_vmem(isel_context *ctx, nir_intrinsic_inst
           (off != (tess_index_outer * 16u));
 }
 
-bool should_write_tcs_patch_output_to_lds(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex)
+bool should_write_tcs_output_to_lds(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex)
 {
-   unsigned off = nir_intrinsic_base(instr) * 4u;
-   nir_src *off_src = nir_get_io_offset_src(instr);
-
    /* When none of the appropriate outputs are read, we are OK to never write to LDS */
    if (per_vertex ? ctx->shader->info.outputs_read == 0U : ctx->shader->info.patch_outputs_read == 0u)
       return false;
 
-   /* Indirect offset, we can't be sure if this is read or not, always write to LDS */
-   if (!nir_src_is_const(*off_src))
-      return true;
-
-   off += nir_src_as_uint(*off_src) * 16u;
-
-   uint64_t out_rd = per_vertex
-                     ? ctx->shader->info.outputs_read
-                     : ctx->shader->info.patch_outputs_read;
-   while (out_rd) {
-      unsigned slot = u_bit_scan64(&out_rd) + (per_vertex ? 0 : VARYING_SLOT_PATCH0);
-      if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u)
-         return true;
-   }
-
-   return false;
+   uint64_t mask = per_vertex
+                   ? ctx->shader->info.outputs_read
+                   : ctx->shader->info.patch_outputs_read;
+   bool indirect_write;
+   bool output_read = tcs_driver_location_matches_api_mask(ctx, instr, per_vertex, mask, &indirect_write);
+   return indirect_write || output_read;
 }
 
 void visit_store_tcs_output(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex)
@@ -3375,7 +3384,7 @@ void visit_store_tcs_output(isel_context *ctx, nir_intrinsic_instr *instr, bool
    /* Only write to VMEM if the output is per-vertex or it's per-patch non tess factor */
    bool write_to_vmem = per_vertex || should_write_tcs_patch_output_to_vmem(ctx, instr);
    /* Only write to LDS if the output is read by the shader, or it's per-patch tess factor */
-   bool write_to_lds = !write_to_vmem || should_write_tcs_patch_output_to_lds(ctx, instr, per_vertex);
+   bool write_to_lds = !write_to_vmem || should_write_tcs_output_to_lds(ctx, instr, per_vertex);
 
    if (write_to_vmem) {
       std::pair<Temp, unsigned> vmem_offs = per_vertex