summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/vulkan/radv_shader.c2
-rw-r--r--src/compiler/nir/nir.h2
-rw-r--r--src/compiler/nir/nir_opt_if.c87
-rw-r--r--src/freedreno/ir3/ir3_nir.c2
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/ir2_nir.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_nir.c2
-rw-r--r--src/intel/compiler/brw_nir.c2
-rw-r--r--src/mesa/state_tracker/st_glsl_to_nir.cpp2
9 files changed, 61 insertions, 42 deletions
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index d3d073d1db8..7cde5e728e4 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -158,7 +158,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
NIR_PASS(progress, shader, nir_opt_remove_phis);
NIR_PASS(progress, shader, nir_opt_dce);
}
- NIR_PASS(progress, shader, nir_opt_if);
+ NIR_PASS(progress, shader, nir_opt_if, true);
NIR_PASS(progress, shader, nir_opt_dead_cf);
NIR_PASS(progress, shader, nir_opt_cse);
NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index bc72d8f83f5..c1ecf5ad561 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3434,7 +3434,7 @@ bool nir_opt_gcm(nir_shader *shader, bool value_number);
bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
-bool nir_opt_if(nir_shader *shader);
+bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
bool nir_opt_intrinsics(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 4d3183ed151..713bdf0c38a 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -824,47 +824,60 @@ nir_block_ends_in_continue(nir_block *block)
* The continue should then be removed by nir_opt_trivial_continues() and the
* loop can potentially be unrolled.
*
- * Note: do_work_2() is only ever blocks and nested loops. We could also nest
- * other if-statments in the branch which would allow further continues to
- * be removed. However in practice this can result in increased register
- * pressure.
+ * Note: Unless the function param aggressive_last_continue==true do_work_2()
+ * is only ever blocks and nested loops. We avoid nesting other if-statments
+ * in the branch as this can result in increased register pressure, and in
+ * the i965 driver it causes a large amount of spilling in shader-db.
+ * For RADV however nesting these if-statements allows further continues to be
+ * remove and provides a significant FPS boost in Doom, which is why we have
+ * opted for this special bool to enable more aggresive optimisations.
+ * TODO: The GCM pass solves most of the spilling regressions in i965, if it
+ * is ever enabled we should consider removing the aggressive_last_continue
+ * param.
*/
static bool
-opt_if_loop_last_continue(nir_loop *loop)
+opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
{
- /* Get the last if-stament in the loop */
+ nir_if *nif;
+ bool then_ends_in_continue;
+ bool else_ends_in_continue;
+
+ /* Scan the control flow of the loop from the last to the first node
+ * looking for an if-statement we can optimise.
+ */
nir_block *last_block = nir_loop_last_block(loop);
nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
while (if_node) {
- if (if_node->type == nir_cf_node_if)
- break;
+ if (if_node->type == nir_cf_node_if) {
+ nif = nir_cf_node_as_if(if_node);
+ nir_block *then_block = nir_if_last_then_block(nif);
+ nir_block *else_block = nir_if_last_else_block(nif);
- if_node = nir_cf_node_prev(if_node);
- }
+ then_ends_in_continue = nir_block_ends_in_continue(then_block);
+ else_ends_in_continue = nir_block_ends_in_continue(else_block);
- if (!if_node || if_node->type != nir_cf_node_if)
- return false;
-
- nir_if *nif = nir_cf_node_as_if(if_node);
- nir_block *then_block = nir_if_last_then_block(nif);
- nir_block *else_block = nir_if_last_else_block(nif);
+ /* If both branches end in a jump do nothing, this should be handled
+ * by nir_opt_dead_cf().
+ */
+ if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
+ (else_ends_in_continue || nir_block_ends_in_break(else_block)))
+ return false;
- bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
- bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
+ /* If continue found stop scanning and attempt optimisation, or
+ */
+ if (then_ends_in_continue || else_ends_in_continue ||
+ !aggressive_last_continue)
+ break;
+ }
- /* If both branches end in a continue do nothing, this should be handled
- * by nir_opt_dead_cf().
- */
- if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
- (else_ends_in_continue || nir_block_ends_in_break(else_block)))
- return false;
+ if_node = nir_cf_node_prev(if_node);
+ }
+ /* If we didn't find an if to optimise return */
if (!then_ends_in_continue && !else_ends_in_continue)
return false;
- /* if the block after the if/else is empty we bail, otherwise we might end
- * up looping forever
- */
+ /* If there is nothing after the if-statement we bail */
if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
exec_list_is_empty(&last_block->instr_list))
return false;
@@ -1327,7 +1340,8 @@ opt_if_merge(nir_if *nif)
}
static bool
-opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
+opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
+ bool aggressive_last_continue)
{
bool progress = false;
foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
@@ -1337,8 +1351,10 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
case nir_cf_node_if: {
nir_if *nif = nir_cf_node_as_if(cf_node);
- progress |= opt_if_cf_list(b, &nif->then_list);
- progress |= opt_if_cf_list(b, &nif->else_list);
+ progress |= opt_if_cf_list(b, &nif->then_list,
+ aggressive_last_continue);
+ progress |= opt_if_cf_list(b, &nif->else_list,
+ aggressive_last_continue);
progress |= opt_if_loop_terminator(nif);
progress |= opt_if_merge(nif);
progress |= opt_if_simplification(b, nif);
@@ -1347,10 +1363,12 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(cf_node);
- progress |= opt_if_cf_list(b, &loop->body);
+ progress |= opt_if_cf_list(b, &loop->body,
+ aggressive_last_continue);
progress |= opt_simplify_bcsel_of_phi(b, loop);
progress |= opt_peel_loop_initial_if(loop);
- progress |= opt_if_loop_last_continue(loop);
+ progress |= opt_if_loop_last_continue(loop,
+ aggressive_last_continue);
break;
}
@@ -1399,7 +1417,7 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
}
bool
-nir_opt_if(nir_shader *shader)
+nir_opt_if(nir_shader *shader, bool aggressive_last_continue)
{
bool progress = false;
@@ -1416,7 +1434,8 @@ nir_opt_if(nir_shader *shader)
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
- if (opt_if_cf_list(&b, &function->impl->body)) {
+ if (opt_if_cf_list(&b, &function->impl->body,
+ aggressive_last_continue)) {
nir_metadata_preserve(function->impl, nir_metadata_none);
/* If that made progress, we're no longer really in SSA form. We
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 8b66615a6e0..7a8b9753643 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -147,7 +147,7 @@ ir3_optimize_loop(nir_shader *s)
OPT(s, nir_copy_prop);
OPT(s, nir_opt_dce);
}
- progress |= OPT(s, nir_opt_if);
+ progress |= OPT(s, nir_opt_if, false);
progress |= OPT(s, nir_opt_remove_phis);
progress |= OPT(s, nir_opt_undef);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 09e40977fd8..e3cc5560033 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -2066,7 +2066,7 @@ ttn_optimize_nir(nir_shader *nir, bool scalar)
NIR_PASS(progress, nir, nir_opt_dce);
}
- NIR_PASS(progress, nir, nir_opt_if);
+ NIR_PASS(progress, nir, nir_opt_if, false);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 6aaff393167..3d4145fccdc 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -94,7 +94,7 @@ ir2_optimize_loop(nir_shader *s)
OPT(s, nir_opt_dce);
}
progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
- progress |= OPT(s, nir_opt_if);
+ progress |= OPT(s, nir_opt_if, false);
progress |= OPT(s, nir_opt_remove_phis);
progress |= OPT(s, nir_opt_undef);
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5ac18e2ebc8..938b0efcb76 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -880,7 +880,7 @@ si_lower_nir(struct si_shader_selector* sel)
NIR_PASS(progress, sel->nir, nir_copy_prop);
NIR_PASS(progress, sel->nir, nir_opt_dce);
}
- NIR_PASS(progress, sel->nir, nir_opt_if);
+ NIR_PASS(progress, sel->nir, nir_opt_if, true);
NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
NIR_PASS(progress, sel->nir, nir_opt_cse);
NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 238db902b47..2e63efdc427 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -609,7 +609,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_copy_prop);
OPT(nir_opt_dce);
}
- OPT(nir_opt_if);
+ OPT(nir_opt_if, false);
if (nir->options->max_unroll_iterations != 0) {
OPT(nir_opt_loop_unroll, indirect_mask);
}
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 9a4e030413b..fb10869c9f9 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir, bool scalar)
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_dce);
}
- NIR_PASS(progress, nir, nir_opt_if);
+ NIR_PASS(progress, nir, nir_opt_if, false);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);