summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorConnor Abbott <[email protected]>2019-09-25 14:02:48 +0200
committerConnor Abbott <[email protected]>2019-10-09 17:55:25 +0000
commitaf9296b8c075d109426ecd1686211088e618103e (patch)
tree4411287f689f0a50103bf45071f137f8bd5ebeae
parentb049ebcf90f498f6005701b4fceb9c43e115e4b1 (diff)
nir/sink: Rewrite loop handling logic
Previously, for code like: loop { loop { a = load_ubo() } use(a) } adjust_block_for_loops() would return the block before the first loop. Now we compute the range of allowed blocks and then walk the dominance tree directly, guaranteeing directly that we always choose a block that dominates all the uses and is dominated by the definition. Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r--src/compiler/nir/nir_opt_sink.c75
1 files changed, 40 insertions, 35 deletions
diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c
index bdeedde5f48..f6c7093a0d3 100644
--- a/src/compiler/nir/nir_opt_sink.c
+++ b/src/compiler/nir/nir_opt_sink.c
@@ -76,23 +76,36 @@ get_innermost_loop(nir_cf_node *node)
return NULL;
}
-/* return last block not after use_block with def_loop as it's innermost loop */
-static nir_block *
-adjust_block_for_loops(nir_block *use_block, nir_loop *def_loop)
+static bool
+loop_contains_block(nir_loop *loop, nir_block *block)
{
- nir_loop *use_loop = NULL;
+ nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+ nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
- for (nir_cf_node *node = &use_block->cf_node; node != NULL; node = node->parent) {
- if (def_loop && node == &def_loop->cf_node)
- break;
- if (node->type == nir_cf_node_loop)
- use_loop = nir_cf_node_as_loop(node);
- }
- if (use_loop) {
- return nir_block_cf_tree_prev(nir_loop_first_block(use_loop));
- } else {
- return use_block;
+ return block->index > before->index && block->index < after->index;
+}
+
+/* Given the LCA of all uses and the definition, find a block on the path
+ * between them in the dominance tree that is outside of as many loops as
+ * possible.
+ */
+
+static nir_block *
+adjust_block_for_loops(nir_block *use_block, nir_block *def_block)
+{
+ for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom;
+ cur_block = cur_block->imm_dom) {
+ nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node);
+ if (next && next->type == nir_cf_node_loop) {
+ nir_loop *following_loop = nir_cf_node_as_loop(next);
+ if (loop_contains_block(following_loop, use_block)) {
+ use_block = cur_block;
+ continue;
+ }
+ }
}
+
+ return use_block;
}
/* iterate a ssa def's use's and try to find a more optimal block to
@@ -106,10 +119,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
{
nir_block *lca = NULL;
- nir_loop *def_loop = NULL;
- if (!sink_into_loops)
- def_loop = get_innermost_loop(&def->parent_instr->block->cf_node);
-
nir_foreach_use(use, def) {
nir_instr *instr = use->parent_instr;
nir_block *use_block = instr->block;
@@ -131,18 +140,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
use_block = phi_lca;
}
- /* If we're moving a load_ubo or load_interpolated_input, we don't want to
- * sink it down into loops, which may result in accessing memory or shared
- * functions multiple times. Sink it just above the start of the loop
- * where it's used. For load_consts, undefs, and comparisons, we expect
- * the driver to be able to emit them as simple ALU ops, so sinking as far
- * in as we can go is probably worth it for register pressure.
- */
- if (!sink_into_loops) {
- use_block = adjust_block_for_loops(use_block, def_loop);
- assert(nir_block_dominates(def->parent_instr->block, use_block));
- }
-
lca = nir_dominance_lca(lca, use_block);
}
@@ -150,14 +147,22 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
nir_block *use_block =
nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
- if (!sink_into_loops) {
- use_block = adjust_block_for_loops(use_block, def_loop);
- assert(nir_block_dominates(def->parent_instr->block, use_block));
- }
-
lca = nir_dominance_lca(lca, use_block);
}
+ /* If we're moving a load_ubo or load_interpolated_input, we don't want to
+ * sink it down into loops, which may result in accessing memory or shared
+ * functions multiple times. Sink it just above the start of the loop
+ * where it's used. For load_consts, undefs, and comparisons, we expect
+ * the driver to be able to emit them as simple ALU ops, so sinking as far
+ * in as we can go is probably worth it for register pressure.
+ */
+ if (!sink_into_loops) {
+ lca = adjust_block_for_loops(lca, def->parent_instr->block);
+ assert(nir_block_dominates(def->parent_instr->block, lca));
+ }
+
+
return lca;
}