summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-05-22 20:23:03 +0100
committerRhys Perry <[email protected]>2019-08-12 22:01:30 +0000
commitda8ed68aca1b077fcbb8da429d8bcea785eeec10 (patch)
tree7dad83058b603b780f1016c0f43ea1b1326fecdf
parentc2fe7a0fb818578d87f12300523d796205edffd5 (diff)
nir: replace nir_move_load_const() with nir_opt_sink()
This is mostly the same as nir_move_load_const() but can also move undef instructions, comparisons and some intrinsics (being careful with loops). v2: actually delete nir_move_load_const.c v3: fix nir_opt_sink() usage in freedreno v3: update Makefile.sources v4: replace get_move_def with nir_can_move_instr and nir_instr_ssa_def v4: handle if uses v4: fix handling of nested loops v5: re-write adjust_block_for_loops v5: re-write setting of use_block for if uses Signed-off-by: Rhys Perry <[email protected]> Co-authored-by: Daniel Schürmann <[email protected]> Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r--src/compiler/Makefile.sources2
-rw-r--r--src/compiler/nir/meson.build2
-rw-r--r--src/compiler/nir/nir.h12
-rw-r--r--src/compiler/nir/nir_move_load_const.c141
-rw-r--r--src/compiler/nir/nir_opt_sink.c223
-rw-r--r--src/freedreno/ir3/ir3_nir.c2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/ir2_nir.c2
7 files changed, 238 insertions, 146 deletions
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index b59ce9b737f..c8dcf5ace10 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -286,7 +286,6 @@ NIR_FILES = \
nir/nir_lower_wpos_center.c \
nir/nir_lower_wpos_ytransform.c \
nir/nir_metadata.c \
- nir/nir_move_load_const.c \
nir/nir_move_vec_src_uses_to_dest.c \
nir/nir_normalize_cubemap_coords.c \
nir/nir_opt_combine_stores.c \
@@ -312,6 +311,7 @@ NIR_FILES = \
nir/nir_opt_rematerialize_compares.c \
nir/nir_opt_remove_phis.c \
nir/nir_opt_shrink_load.c \
+ nir/nir_opt_sink.c \
nir/nir_opt_trivial_continues.c \
nir/nir_opt_undef.c \
nir/nir_opt_vectorize.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index e6b11cb8762..4e3c714de0c 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -165,7 +165,6 @@ files_libnir = files(
'nir_lower_bit_size.c',
'nir_lower_uniforms_to_ubo.c',
'nir_metadata.c',
- 'nir_move_load_const.c',
'nir_move_vec_src_uses_to_dest.c',
'nir_normalize_cubemap_coords.c',
'nir_opt_combine_stores.c',
@@ -191,6 +190,7 @@ files_libnir = files(
'nir_opt_rematerialize_compares.c',
'nir_opt_remove_phis.c',
'nir_opt_shrink_load.c',
+ 'nir_opt_sink.c',
'nir_opt_trivial_continues.c',
'nir_opt_undef.c',
'nir_opt_vectorize.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 8bec9760767..bd6a3617f05 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3584,7 +3584,6 @@ bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes);
bool nir_lower_constant_initializers(nir_shader *shader,
nir_variable_mode modes);
-bool nir_move_load_const(nir_shader *shader);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
bool nir_lower_vec_to_movs(nir_shader *shader);
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
@@ -3965,6 +3964,17 @@ bool nir_opt_large_constants(nir_shader *shader,
bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+typedef enum {
+ nir_move_const_undef = (1 << 0),
+ nir_move_load_ubo = (1 << 1),
+ nir_move_load_input = (1 << 2),
+ nir_move_comparisons = (1 << 3),
+} nir_move_options;
+
+bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
+
+bool nir_opt_sink(nir_shader *shader, nir_move_options options);
+
bool nir_opt_move_comparisons(nir_shader *shader);
bool nir_opt_move_load_ubo(nir_shader *shader);
diff --git a/src/compiler/nir/nir_move_load_const.c b/src/compiler/nir/nir_move_load_const.c
deleted file mode 100644
index abc53fdce6a..00000000000
--- a/src/compiler/nir/nir_move_load_const.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright © 2018 Red Hat
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Rob Clark ([email protected]>
- *
- */
-
-#include "nir.h"
-
-
-/*
- * A simple pass that moves load_const's into consuming block if
- * they are only consumed in a single block, to try to counter-
- * act nir's tendency to move all load_const to the top of the
- * first block.
- */
-
-/* iterate a ssa def's use's and try to find a more optimal block to
- * move it to, using the dominance tree. In short, if all of the uses
- * are contained in a single block, the load will be moved there,
- * otherwise it will be move to the least common ancestor block of all
- * the uses
- */
-static nir_block *
-get_preferred_block(nir_ssa_def *def)
-{
- nir_block *lca = NULL;
-
- /* hmm, probably ignore if-uses: */
- if (!list_empty(&def->if_uses))
- return NULL;
-
- nir_foreach_use(use, def) {
- nir_instr *instr = use->parent_instr;
- nir_block *use_block = instr->block;
-
- /*
- * Kind of an ugly special-case, but phi instructions
- * need to appear first in the block, so by definition
- * we can't move a load_immed into a block where it is
- * consumed by a phi instruction. We could conceivably
- * move it into a dominator block.
- */
- if (instr->type == nir_instr_type_phi) {
- nir_phi_instr *phi = nir_instr_as_phi(instr);
- nir_block *phi_lca = NULL;
- nir_foreach_phi_src(src, phi)
- phi_lca = nir_dominance_lca(phi_lca, src->pred);
- use_block = phi_lca;
- }
-
- lca = nir_dominance_lca(lca, use_block);
- }
-
- return lca;
-}
-
-/* insert before first non-phi instruction: */
-static void
-insert_after_phi(nir_instr *instr, nir_block *block)
-{
- nir_foreach_instr(instr2, block) {
- if (instr2->type == nir_instr_type_phi)
- continue;
-
- exec_node_insert_node_before(&instr2->node,
- &instr->node);
-
- return;
- }
-
- /* if haven't inserted it, push to tail (ie. empty block or possibly
- * a block only containing phi's?)
- */
- exec_list_push_tail(&block->instr_list, &instr->node);
-}
-
-bool
-nir_move_load_const(nir_shader *shader)
-{
- bool progress = false;
-
- nir_foreach_function(function, shader) {
- if (!function->impl)
- continue;
-
- nir_foreach_block(block, function->impl) {
- nir_metadata_require(function->impl,
- nir_metadata_block_index | nir_metadata_dominance);
-
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_load_const)
- continue;
-
- nir_load_const_instr *load =
- nir_instr_as_load_const(instr);
- nir_block *use_block =
- get_preferred_block(&load->def);
-
- if (!use_block)
- continue;
-
- if (use_block == load->instr.block)
- continue;
-
- exec_node_remove(&load->instr.node);
-
- insert_after_phi(&load->instr, use_block);
-
- load->instr.block = use_block;
-
- progress = true;
- }
- }
-
- nir_metadata_preserve(function->impl,
- nir_metadata_block_index | nir_metadata_dominance);
- }
-
- return progress;
-}
diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c
new file mode 100644
index 00000000000..bdeedde5f48
--- /dev/null
+++ b/src/compiler/nir/nir_opt_sink.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2018 Red Hat
+ * Copyright © 2019 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark ([email protected]>
+ * Daniel Schürmann ([email protected])
+ * Rhys Perry ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+
+/*
+ * A simple pass that moves some instructions into the least common
+ * anscestor of consuming instructions.
+ */
+
+bool
+nir_can_move_instr(nir_instr *instr, nir_move_options options)
+{
+ if ((options & nir_move_const_undef) && instr->type == nir_instr_type_load_const) {
+ return true;
+ }
+
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if ((options & nir_move_load_ubo) && intrin->intrinsic == nir_intrinsic_load_ubo)
+ return true;
+
+ if ((options & nir_move_load_input) &&
+ (intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
+ intrin->intrinsic == nir_intrinsic_load_input))
+ return true;
+ }
+
+ if ((options & nir_move_const_undef) && instr->type == nir_instr_type_ssa_undef) {
+ return true;
+ }
+
+ if ((options & nir_move_comparisons) && instr->type == nir_instr_type_alu &&
+ nir_alu_instr_is_comparison(nir_instr_as_alu(instr))) {
+ return true;
+ }
+
+ return false;
+}
+
+static nir_loop *
+get_innermost_loop(nir_cf_node *node)
+{
+ for (; node != NULL; node = node->parent) {
+ if (node->type == nir_cf_node_loop)
+ return (nir_loop*)node;
+ }
+ return NULL;
+}
+
+/* return last block not after use_block with def_loop as it's innermost loop */
+static nir_block *
+adjust_block_for_loops(nir_block *use_block, nir_loop *def_loop)
+{
+ nir_loop *use_loop = NULL;
+
+ for (nir_cf_node *node = &use_block->cf_node; node != NULL; node = node->parent) {
+ if (def_loop && node == &def_loop->cf_node)
+ break;
+ if (node->type == nir_cf_node_loop)
+ use_loop = nir_cf_node_as_loop(node);
+ }
+ if (use_loop) {
+ return nir_block_cf_tree_prev(nir_loop_first_block(use_loop));
+ } else {
+ return use_block;
+ }
+}
+
+/* iterate a ssa def's use's and try to find a more optimal block to
+ * move it to, using the dominance tree. In short, if all of the uses
+ * are contained in a single block, the load will be moved there,
+ * otherwise it will be move to the least common ancestor block of all
+ * the uses
+ */
+static nir_block *
+get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
+{
+ nir_block *lca = NULL;
+
+ nir_loop *def_loop = NULL;
+ if (!sink_into_loops)
+ def_loop = get_innermost_loop(&def->parent_instr->block->cf_node);
+
+ nir_foreach_use(use, def) {
+ nir_instr *instr = use->parent_instr;
+ nir_block *use_block = instr->block;
+
+ /*
+ * Kind of an ugly special-case, but phi instructions
+ * need to appear first in the block, so by definition
+ * we can't move an instruction into a block where it is
+ * consumed by a phi instruction. We could conceivably
+ * move it into a dominator block.
+ */
+ if (instr->type == nir_instr_type_phi) {
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_block *phi_lca = NULL;
+ nir_foreach_phi_src(src, phi) {
+ if (&src->src == use)
+ phi_lca = nir_dominance_lca(phi_lca, src->pred);
+ }
+ use_block = phi_lca;
+ }
+
+ /* If we're moving a load_ubo or load_interpolated_input, we don't want to
+ * sink it down into loops, which may result in accessing memory or shared
+ * functions multiple times. Sink it just above the start of the loop
+ * where it's used. For load_consts, undefs, and comparisons, we expect
+ * the driver to be able to emit them as simple ALU ops, so sinking as far
+ * in as we can go is probably worth it for register pressure.
+ */
+ if (!sink_into_loops) {
+ use_block = adjust_block_for_loops(use_block, def_loop);
+ assert(nir_block_dominates(def->parent_instr->block, use_block));
+ }
+
+ lca = nir_dominance_lca(lca, use_block);
+ }
+
+ nir_foreach_if_use(use, def) {
+ nir_block *use_block =
+ nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
+
+ if (!sink_into_loops) {
+ use_block = adjust_block_for_loops(use_block, def_loop);
+ assert(nir_block_dominates(def->parent_instr->block, use_block));
+ }
+
+ lca = nir_dominance_lca(lca, use_block);
+ }
+
+ return lca;
+}
+
+/* insert before first non-phi instruction: */
+static void
+insert_after_phi(nir_instr *instr, nir_block *block)
+{
+ nir_foreach_instr(instr2, block) {
+ if (instr2->type == nir_instr_type_phi)
+ continue;
+
+ exec_node_insert_node_before(&instr2->node,
+ &instr->node);
+
+ return;
+ }
+
+ /* if haven't inserted it, push to tail (ie. empty block or possibly
+ * a block only containing phi's?)
+ */
+ exec_list_push_tail(&block->instr_list, &instr->node);
+}
+
+bool
+nir_opt_sink(nir_shader *shader, nir_move_options options)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ nir_metadata_require(function->impl,
+ nir_metadata_block_index | nir_metadata_dominance);
+
+ nir_foreach_block_reverse(block, function->impl) {
+ nir_foreach_instr_reverse_safe(instr, block) {
+ if (!nir_can_move_instr(instr, options))
+ continue;
+
+ nir_ssa_def *def = nir_instr_ssa_def(instr);
+ nir_block *use_block =
+ get_preferred_block(def, instr->type != nir_instr_type_intrinsic);
+
+ if (!use_block || use_block == instr->block)
+ continue;
+
+ exec_node_remove(&instr->node);
+
+ insert_after_phi(instr, use_block);
+
+ instr->block = use_block;
+
+ progress = true;
+ }
+ }
+
+ nir_metadata_preserve(function->impl,
+ nir_metadata_block_index | nir_metadata_dominance);
+ }
+
+ return progress;
+}
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 6a4c60b0c1f..260702ba3c7 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -264,7 +264,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp);
- OPT_V(s, nir_move_load_const);
+ OPT_V(s, nir_opt_sink, nir_move_const_undef);
if (ir3_shader_debug & IR3_DBG_DISASM) {
debug_printf("----------------------\n");
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 21ecf3a7003..402b79691fb 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -122,7 +122,7 @@ ir2_optimize_nir(nir_shader *s, bool lower)
ir2_optimize_loop(s);
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp);
- OPT_V(s, nir_move_load_const);
+ OPT_V(s, nir_opt_sink, nir_move_const_undef);
/* TODO we dont want to get shaders writing to depth for depth textures */
if (s->info.stage == MESA_SHADER_FRAGMENT) {