aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2019-07-17 15:43:53 +0200
committerSamuel Pitoiset <[email protected]>2019-07-18 10:06:34 +0200
commit8315dbe419c822bc6624ffce7f608f70c1c60fa5 (patch)
treec8d4c6bcc6910021dd663caf93c257e96d14cfa2
parent63d670e350e5249ed91b4bebc59bd7920629eb6c (diff)
radv/gfx10: do not always execute a barrier before the second shader
With NGG, empty waves may still be required to export data. This fixes dEQP-VK.ycbcr.format.*_unorm.geometry_*. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
-rw-r--r--src/amd/vulkan/radv_nir_to_llvm.c31
1 files changed, 30 insertions, 1 deletions
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 3e18303879e..7e623414adc 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -4448,8 +4448,37 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
declare_esgs_ring(&ctx);
}
- if (i)
+ bool nested_barrier = false;
+
+ if (i) {
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY &&
+ ctx.options->key.vs_common_out.as_ngg) {
+ nested_barrier = false;
+ } else {
+ nested_barrier = true;
+ }
+ }
+
+ if (nested_barrier) {
+ /* Execute a barrier before the second shader in
+ * a merged shader.
+ *
+ * Execute the barrier inside the conditional block,
+ * so that empty waves can jump directly to s_endpgm,
+ * which will also signal the barrier.
+ *
+ * This is possible in gfx9, because an empty wave
+ * for the second shader does not participate in
+ * the epilogue. With NGG, empty waves may still
+ * be required to export data (e.g. GS output vertices),
+ * so we cannot let them exit early.
+ *
+ * If the shader is TCS and the TCS epilog is present
+ * and contains a barrier, it will wait there and then
+ * reach s_endpgm.
+ */
ac_emit_barrier(&ctx.ac, ctx.stage);
+ }
nir_foreach_variable(variable, &shaders[i]->outputs)
scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);