ir3: Fix bug with shaders that only exit via discard

discard is supposed to be a terminator, killing the thread, so that it's possible to exit main solely by a discard e.g. inside of an infinite loop. However, it currently isn't treated as a terminator in NIR due to workarounds turning it into demote (d3d-style kill) and even if that were fixed, we probably wouldn't want to treat discard_if as a jump since otherwise the scheduler wouldn't be able to schedule things around it. So, add this workaround which inserts jump instructions as necessary to guarantee that the program always terminates. This fixes a hang in dEQP-VK.graphicsfuzz.while-inside-switch, which conditionally does a discard inside an infinite loop. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4658>
author: Connor Abbott <[email protected]> 2020-04-21 11:36:40 +0200
committer: Marge Bot <[email protected]> 2020-04-22 09:49:40 +0000
commit: 4daa3917a38a6d18ba7cc66071342b9f7fa92f53 (patch)
tree: a86e6828fbe3eba97709701ee5f526449f880ad7 /src/freedreno
parent: 8cfa60eab85bfe08098ddd014da861b9e3f6aca3 (diff)
2 files changed, 71 insertions, 0 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 6cb882002f0..44175888200 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1168,6 +1168,8 @@ static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
 	list_for_each_entry(struct ir3_block, __block, __list, node)
 #define foreach_block_safe(__block, __list) \
 	list_for_each_entry_safe(struct ir3_block, __block, __list, node)
+#define foreach_block_rev(__block, __list) \
+	list_for_each_entry_rev(struct ir3_block, __block, __list, node)
 
 /* iterators for arrays: */
 #define foreach_array(__array, __list) \
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index bed8cd1c03a..d0b0dd83b46 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -592,6 +592,73 @@ block_sched(struct ir3 *ir)
 	}
 }
 
+/* Here we workaround the fact that kill doesn't actually kill the thread as
+ * GL expects. The last instruction always needs to be an end instruction,
+ * which means that if we're stuck in a loop where kill is the only way out,
+ * then we may have to jump out to the end. kill may also have the d3d
+ * semantics of converting the thread to a helper thread, rather than setting
+ * the exec mask to 0, in which case the helper thread could get stuck in an
+ * infinite loop.
+ *
+ * We do this late, both to give the scheduler the opportunity to reschedule
+ * kill instructions earlier and to avoid having to create a separate basic
+ * block.
+ *
+ * TODO: Assuming that the wavefront doesn't stop as soon as all threads are
+ * killed, we might benefit by doing this more aggressively when the remaining
+ * part of the program after the kill is large, since that would let us
+ * skip over the instructions when there are no non-killed threads left.
+ */
+static void
+kill_sched(struct ir3 *ir, struct ir3_shader_variant *so)
+{
+	/* True if we know that this block will always eventually lead to the end
+	 * block:
+	 */
+	bool always_ends = true;
+	bool added = false;
+	struct ir3_block *last_block =
+		list_last_entry(&ir->block_list, struct ir3_block, node);
+
+	foreach_block_rev (block, &ir->block_list) {
+		for (unsigned i = 0; i < 2 && block->successors[i]; i++) {
+			if (block->successors[i]->start_ip <= block->end_ip)
+				always_ends = false;
+		}
+
+		if (always_ends)
+			continue;
+
+		foreach_instr_safe (instr, &block->instr_list) {
+			if (instr->opc != OPC_KILL)
+				continue;
+
+			struct ir3_instruction *br = ir3_instr_create(block, OPC_BR);
+			br->regs[1] = instr->regs[1];
+			br->cat0.target =
+				list_last_entry(&ir->block_list, struct ir3_block, node);
+
+			list_del(&br->node);
+			list_add(&br->node, &instr->node);
+
+			added = true;
+		}
+	}
+
+	if (added) {
+		/* I'm not entirely sure how the branchstack works, but we probably
+		 * need to add at least one entry for the divergence which is resolved
+		 * at the end:
+		 */
+		so->branchstack++;
+
+		/* We don't update predecessors/successors, so we have to do this
+		 * manually:
+		 */
+		mark_jp(last_block);
+	}
+}
+
 /* Insert nop's required to make this a legal/valid shader program: */
 static void
 nop_sched(struct ir3 *ir)
@@ -669,6 +736,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
 	*max_bary = ctx->max_bary;
 
 	block_sched(ir);
+	if (so->type == MESA_SHADER_FRAGMENT)
+		kill_sched(ir, so);
 	nop_sched(ir);
 
 	do {
author	Connor Abbott <[email protected]>	2020-04-21 11:36:40 +0200
committer	Marge Bot <[email protected]>	2020-04-22 09:49:40 +0000
commit	4daa3917a38a6d18ba7cc66071342b9f7fa92f53 (patch)
tree	a86e6828fbe3eba97709701ee5f526449f880ad7 /src/freedreno
parent	8cfa60eab85bfe08098ddd014da861b9e3f6aca3 (diff)