diff options
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir.h | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_if.c | 87 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a2xx/ir2_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_nir.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir.c | 2 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 |
9 files changed, 61 insertions, 42 deletions
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index d3d073d1db8..7cde5e728e4 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -158,7 +158,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_remove_phis); NIR_PASS(progress, shader, nir_opt_dce); } - NIR_PASS(progress, shader, nir_opt_if); + NIR_PASS(progress, shader, nir_opt_if, true); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index bc72d8f83f5..c1ecf5ad561 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3434,7 +3434,7 @@ bool nir_opt_gcm(nir_shader *shader, bool value_number); bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); -bool nir_opt_if(nir_shader *shader); +bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue); bool nir_opt_intrinsics(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index 4d3183ed151..713bdf0c38a 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -824,47 +824,60 @@ nir_block_ends_in_continue(nir_block *block) * The continue should then be removed by nir_opt_trivial_continues() and the * loop can potentially be unrolled. * - * Note: do_work_2() is only ever blocks and nested loops. We could also nest - * other if-statments in the branch which would allow further continues to - * be removed. However in practice this can result in increased register - * pressure. + * Note: Unless the function param aggressive_last_continue==true do_work_2() + * is only ever blocks and nested loops. We avoid nesting other if-statments + * in the branch as this can result in increased register pressure, and in + * the i965 driver it causes a large amount of spilling in shader-db. + * For RADV however nesting these if-statements allows further continues to be + * remove and provides a significant FPS boost in Doom, which is why we have + * opted for this special bool to enable more aggresive optimisations. + * TODO: The GCM pass solves most of the spilling regressions in i965, if it + * is ever enabled we should consider removing the aggressive_last_continue + * param. */ static bool -opt_if_loop_last_continue(nir_loop *loop) +opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue) { - /* Get the last if-stament in the loop */ + nir_if *nif; + bool then_ends_in_continue; + bool else_ends_in_continue; + + /* Scan the control flow of the loop from the last to the first node + * looking for an if-statement we can optimise. + */ nir_block *last_block = nir_loop_last_block(loop); nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node); while (if_node) { - if (if_node->type == nir_cf_node_if) - break; + if (if_node->type == nir_cf_node_if) { + nif = nir_cf_node_as_if(if_node); + nir_block *then_block = nir_if_last_then_block(nif); + nir_block *else_block = nir_if_last_else_block(nif); - if_node = nir_cf_node_prev(if_node); - } + then_ends_in_continue = nir_block_ends_in_continue(then_block); + else_ends_in_continue = nir_block_ends_in_continue(else_block); - if (!if_node || if_node->type != nir_cf_node_if) - return false; - - nir_if *nif = nir_cf_node_as_if(if_node); - nir_block *then_block = nir_if_last_then_block(nif); - nir_block *else_block = nir_if_last_else_block(nif); + /* If both branches end in a jump do nothing, this should be handled + * by nir_opt_dead_cf(). + */ + if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) && + (else_ends_in_continue || nir_block_ends_in_break(else_block))) + return false; - bool then_ends_in_continue = nir_block_ends_in_continue(then_block); - bool else_ends_in_continue = nir_block_ends_in_continue(else_block); + /* If continue found stop scanning and attempt optimisation, or + */ + if (then_ends_in_continue || else_ends_in_continue || + !aggressive_last_continue) + break; + } - /* If both branches end in a continue do nothing, this should be handled - * by nir_opt_dead_cf(). - */ - if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) && - (else_ends_in_continue || nir_block_ends_in_break(else_block))) - return false; + if_node = nir_cf_node_prev(if_node); + } + /* If we didn't find an if to optimise return */ if (!then_ends_in_continue && !else_ends_in_continue) return false; - /* if the block after the if/else is empty we bail, otherwise we might end - * up looping forever - */ + /* If there is nothing after the if-statement we bail */ if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) && exec_list_is_empty(&last_block->instr_list)) return false; @@ -1327,7 +1340,8 @@ opt_if_merge(nir_if *nif) } static bool -opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) +opt_if_cf_list(nir_builder *b, struct exec_list *cf_list, + bool aggressive_last_continue) { bool progress = false; foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { @@ -1337,8 +1351,10 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) case nir_cf_node_if: { nir_if *nif = nir_cf_node_as_if(cf_node); - progress |= opt_if_cf_list(b, &nif->then_list); - progress |= opt_if_cf_list(b, &nif->else_list); + progress |= opt_if_cf_list(b, &nif->then_list, + aggressive_last_continue); + progress |= opt_if_cf_list(b, &nif->else_list, + aggressive_last_continue); progress |= opt_if_loop_terminator(nif); progress |= opt_if_merge(nif); progress |= opt_if_simplification(b, nif); @@ -1347,10 +1363,12 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(cf_node); - progress |= opt_if_cf_list(b, &loop->body); + progress |= opt_if_cf_list(b, &loop->body, + aggressive_last_continue); progress |= opt_simplify_bcsel_of_phi(b, loop); progress |= opt_peel_loop_initial_if(loop); - progress |= opt_if_loop_last_continue(loop); + progress |= opt_if_loop_last_continue(loop, + aggressive_last_continue); break; } @@ -1399,7 +1417,7 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list) } bool -nir_opt_if(nir_shader *shader) +nir_opt_if(nir_shader *shader, bool aggressive_last_continue) { bool progress = false; @@ -1416,7 +1434,8 @@ nir_opt_if(nir_shader *shader) nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); - if (opt_if_cf_list(&b, &function->impl->body)) { + if (opt_if_cf_list(&b, &function->impl->body, + aggressive_last_continue)) { nir_metadata_preserve(function->impl, nir_metadata_none); /* If that made progress, we're no longer really in SSA form. We diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 8b66615a6e0..7a8b9753643 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -147,7 +147,7 @@ ir3_optimize_loop(nir_shader *s) OPT(s, nir_copy_prop); OPT(s, nir_opt_dce); } - progress |= OPT(s, nir_opt_if); + progress |= OPT(s, nir_opt_if, false); progress |= OPT(s, nir_opt_remove_phis); progress |= OPT(s, nir_opt_undef); diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 09e40977fd8..e3cc5560033 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -2066,7 +2066,7 @@ ttn_optimize_nir(nir_shader *nir, bool scalar) NIR_PASS(progress, nir, nir_opt_dce); } - NIR_PASS(progress, nir, nir_opt_if); + NIR_PASS(progress, nir, nir_opt_if, false); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 6aaff393167..3d4145fccdc 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -94,7 +94,7 @@ ir2_optimize_loop(nir_shader *s) OPT(s, nir_opt_dce); } progress |= OPT(s, nir_opt_loop_unroll, nir_var_all); - progress |= OPT(s, nir_opt_if); + progress |= OPT(s, nir_opt_if, false); progress |= OPT(s, nir_opt_remove_phis); progress |= OPT(s, nir_opt_undef); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 5ac18e2ebc8..938b0efcb76 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -880,7 +880,7 @@ si_lower_nir(struct si_shader_selector* sel) NIR_PASS(progress, sel->nir, nir_copy_prop); NIR_PASS(progress, sel->nir, nir_opt_dce); } - NIR_PASS(progress, sel->nir, nir_opt_if); + NIR_PASS(progress, sel->nir, nir_opt_if, true); NIR_PASS(progress, sel->nir, nir_opt_dead_cf); NIR_PASS(progress, sel->nir, nir_opt_cse); NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 238db902b47..2e63efdc427 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -609,7 +609,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_copy_prop); OPT(nir_opt_dce); } - OPT(nir_opt_if); + OPT(nir_opt_if, false); if (nir->options->max_unroll_iterations != 0) { OPT(nir_opt_loop_unroll, indirect_mask); } diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 9a4e030413b..fb10869c9f9 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir, bool scalar) NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_dce); } - NIR_PASS(progress, nir, nir_opt_if); + NIR_PASS(progress, nir, nir_opt_if, false); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); |