diff options
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 31 |
1 files changed, 30 insertions, 1 deletions
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 3e18303879e..7e623414adc 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -4448,8 +4448,37 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, declare_esgs_ring(&ctx); } - if (i) + bool nested_barrier = false; + + if (i) { + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY && + ctx.options->key.vs_common_out.as_ngg) { + nested_barrier = false; + } else { + nested_barrier = true; + } + } + + if (nested_barrier) { + /* Execute a barrier before the second shader in + * a merged shader. + * + * Execute the barrier inside the conditional block, + * so that empty waves can jump directly to s_endpgm, + * which will also signal the barrier. + * + * This is possible in gfx9, because an empty wave + * for the second shader does not participate in + * the epilogue. With NGG, empty waves may still + * be required to export data (e.g. GS output vertices), + * so we cannot let them exit early. + * + * If the shader is TCS and the TCS epilog is present + * and contains a barrier, it will wait there and then + * reach s_endpgm. + */ ac_emit_barrier(&ctx.ac, ctx.stage); + } nir_foreach_variable(variable, &shaders[i]->outputs) scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); |