diff options
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0be69aa47e8..069f3d24f62 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6315,10 +6315,31 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) visit_get_buffer_size(ctx, instr); break; case nir_intrinsic_control_barrier: { - unsigned* bsize = ctx->program->info->cs.block_size; - unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2]; - if (workgroup_size > ctx->program->wave_size) + if (ctx->program->chip_class == GFX6 && ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) { + /* GFX6 only (thanks to a hw bug workaround): + * The real barrier instruction isn’t needed, because an entire patch + * always fits into a single wave. + */ + break; + } + + if (ctx->shader->info.stage == MESA_SHADER_COMPUTE) { + unsigned* bsize = ctx->program->info->cs.block_size; + unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2]; + if (workgroup_size > ctx->program->wave_size) + bld.sopp(aco_opcode::s_barrier); + } else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) { + /* For each patch provided during rendering, n TCS shader invocations will be processed, + * where n is the number of vertices in the output patch. + */ + unsigned workgroup_size = ctx->tcs_num_patches * ctx->shader->info.tess.tcs_vertices_out; + if (workgroup_size > ctx->program->wave_size) + bld.sopp(aco_opcode::s_barrier); + } else { + /* We don't know the workgroup size, so always emit the s_barrier. */ bld.sopp(aco_opcode::s_barrier); + } + break; } case nir_intrinsic_group_memory_barrier: |