1 files changed, 24 insertions, 3 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 0be69aa47e8..069f3d24f62 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6315,10 +6315,31 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
       visit_get_buffer_size(ctx, instr);
       break;
    case nir_intrinsic_control_barrier: {
-      unsigned* bsize = ctx->program->info->cs.block_size;
-      unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];
-      if (workgroup_size > ctx->program->wave_size)
+      if (ctx->program->chip_class == GFX6 && ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+         /* GFX6 only (thanks to a hw bug workaround):
+          * The real barrier instruction isn’t needed, because an entire patch
+          * always fits into a single wave.
+          */
+         break;
+      }
+
+      if (ctx->shader->info.stage == MESA_SHADER_COMPUTE) {
+         unsigned* bsize = ctx->program->info->cs.block_size;
+         unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];
+         if (workgroup_size > ctx->program->wave_size)
+            bld.sopp(aco_opcode::s_barrier);
+      } else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+         /* For each patch provided during rendering, n TCS shader invocations will be processed,
+          * where n is the number of vertices in the output patch.
+          */
+         unsigned workgroup_size = ctx->tcs_num_patches * ctx->shader->info.tess.tcs_vertices_out;
+         if (workgroup_size > ctx->program->wave_size)
+            bld.sopp(aco_opcode::s_barrier);
+      } else {
+         /* We don't know the workgroup size, so always emit the s_barrier. */
          bld.sopp(aco_opcode::s_barrier);
+      }
+
       break;
    }
    case nir_intrinsic_group_memory_barrier: