summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir/nir_opt_if.c
diff options
context:
space:
mode:
authorTimothy Arceri <[email protected]>2019-04-08 20:13:49 +1000
committerTimothy Arceri <[email protected]>2019-04-09 11:29:41 +1000
commite30804c6024f9b97e3a73266ee08ae6655eea5e2 (patch)
tree89e325b0553434bf011eb3fd86f27db46f1f854e /src/compiler/nir/nir_opt_if.c
parentc6cf602121bec6e2c7447eb4a6302e06e9d902c9 (diff)
nir/radv: remove restrictions on opt_if_loop_last_continue()
When I implemented opt_if_loop_last_continue() I had restricted this pass from moving other if-statements inside the branch opposite the continue. At the time it was causing a bunch of spilling in shader-db for i965. However Samuel Pitoiset noticed that making this pass more aggressive significantly improved the performance of Doom on RADV. Below are the statistics he gathered. 28717 shaders in 14931 tests Totals: SGPRS: 1267317 -> 1267549 (0.02 %) VGPRS: 896876 -> 895920 (-0.11 %) Spilled SGPRs: 24701 -> 26367 (6.74 %) Code Size: 48379452 -> 48507880 (0.27 %) bytes Max Waves: 241159 -> 241190 (0.01 %) Totals from affected shaders: SGPRS: 23584 -> 23816 (0.98 %) VGPRS: 25908 -> 24952 (-3.69 %) Spilled SGPRs: 503 -> 2169 (331.21 %) Code Size: 2471392 -> 2599820 (5.20 %) bytes Max Waves: 586 -> 617 (5.29 %) The codesize increases is related to Wolfenstein II it seems largely due to an increase in phis rather than the existing jumps. This gives +10% FPS with Doom on my Vega56. Rhys Perry also benchmarked Doom on his VEGA64: Before: 72.53 FPS After: 80.77 FPS v2: disable pass on non-AMD drivers Reviewed-by: Ian Romanick <[email protected]> (v1) Acked-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_opt_if.c')
-rw-r--r--src/compiler/nir/nir_opt_if.c87
1 files changed, 53 insertions, 34 deletions
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 4d3183ed151..713bdf0c38a 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -824,47 +824,60 @@ nir_block_ends_in_continue(nir_block *block)
* The continue should then be removed by nir_opt_trivial_continues() and the
* loop can potentially be unrolled.
*
- * Note: do_work_2() is only ever blocks and nested loops. We could also nest
- * other if-statments in the branch which would allow further continues to
- * be removed. However in practice this can result in increased register
- * pressure.
+ * Note: Unless the function param aggressive_last_continue==true do_work_2()
+ * is only ever blocks and nested loops. We avoid nesting other if-statments
+ * in the branch as this can result in increased register pressure, and in
+ * the i965 driver it causes a large amount of spilling in shader-db.
+ * For RADV however nesting these if-statements allows further continues to be
+ * remove and provides a significant FPS boost in Doom, which is why we have
+ * opted for this special bool to enable more aggresive optimisations.
+ * TODO: The GCM pass solves most of the spilling regressions in i965, if it
+ * is ever enabled we should consider removing the aggressive_last_continue
+ * param.
*/
static bool
-opt_if_loop_last_continue(nir_loop *loop)
+opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
{
- /* Get the last if-stament in the loop */
+ nir_if *nif;
+ bool then_ends_in_continue;
+ bool else_ends_in_continue;
+
+ /* Scan the control flow of the loop from the last to the first node
+ * looking for an if-statement we can optimise.
+ */
nir_block *last_block = nir_loop_last_block(loop);
nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
while (if_node) {
- if (if_node->type == nir_cf_node_if)
- break;
+ if (if_node->type == nir_cf_node_if) {
+ nif = nir_cf_node_as_if(if_node);
+ nir_block *then_block = nir_if_last_then_block(nif);
+ nir_block *else_block = nir_if_last_else_block(nif);
- if_node = nir_cf_node_prev(if_node);
- }
+ then_ends_in_continue = nir_block_ends_in_continue(then_block);
+ else_ends_in_continue = nir_block_ends_in_continue(else_block);
- if (!if_node || if_node->type != nir_cf_node_if)
- return false;
-
- nir_if *nif = nir_cf_node_as_if(if_node);
- nir_block *then_block = nir_if_last_then_block(nif);
- nir_block *else_block = nir_if_last_else_block(nif);
+ /* If both branches end in a jump do nothing, this should be handled
+ * by nir_opt_dead_cf().
+ */
+ if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
+ (else_ends_in_continue || nir_block_ends_in_break(else_block)))
+ return false;
- bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
- bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
+ /* If continue found stop scanning and attempt optimisation, or
+ */
+ if (then_ends_in_continue || else_ends_in_continue ||
+ !aggressive_last_continue)
+ break;
+ }
- /* If both branches end in a continue do nothing, this should be handled
- * by nir_opt_dead_cf().
- */
- if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
- (else_ends_in_continue || nir_block_ends_in_break(else_block)))
- return false;
+ if_node = nir_cf_node_prev(if_node);
+ }
+ /* If we didn't find an if to optimise return */
if (!then_ends_in_continue && !else_ends_in_continue)
return false;
- /* if the block after the if/else is empty we bail, otherwise we might end
- * up looping forever
- */
+ /* If there is nothing after the if-statement we bail */
if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
exec_list_is_empty(&last_block->instr_list))
return false;
@@ -1327,7 +1340,8 @@ opt_if_merge(nir_if *nif)
}
static bool
-opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
+opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
+ bool aggressive_last_continue)
{
bool progress = false;
foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
@@ -1337,8 +1351,10 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
case nir_cf_node_if: {
nir_if *nif = nir_cf_node_as_if(cf_node);
- progress |= opt_if_cf_list(b, &nif->then_list);
- progress |= opt_if_cf_list(b, &nif->else_list);
+ progress |= opt_if_cf_list(b, &nif->then_list,
+ aggressive_last_continue);
+ progress |= opt_if_cf_list(b, &nif->else_list,
+ aggressive_last_continue);
progress |= opt_if_loop_terminator(nif);
progress |= opt_if_merge(nif);
progress |= opt_if_simplification(b, nif);
@@ -1347,10 +1363,12 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(cf_node);
- progress |= opt_if_cf_list(b, &loop->body);
+ progress |= opt_if_cf_list(b, &loop->body,
+ aggressive_last_continue);
progress |= opt_simplify_bcsel_of_phi(b, loop);
progress |= opt_peel_loop_initial_if(loop);
- progress |= opt_if_loop_last_continue(loop);
+ progress |= opt_if_loop_last_continue(loop,
+ aggressive_last_continue);
break;
}
@@ -1399,7 +1417,7 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
}
bool
-nir_opt_if(nir_shader *shader)
+nir_opt_if(nir_shader *shader, bool aggressive_last_continue)
{
bool progress = false;
@@ -1416,7 +1434,8 @@ nir_opt_if(nir_shader *shader)
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
- if (opt_if_cf_list(&b, &function->impl->body)) {
+ if (opt_if_cf_list(&b, &function->impl->body,
+ aggressive_last_continue)) {
nir_metadata_preserve(function->impl, nir_metadata_none);
/* If that made progress, we're no longer really in SSA form. We