summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2016-04-29 22:06:37 -0700
committerKenneth Graunke <[email protected]>2016-06-06 14:14:31 -0700
commit0756e3a25ce5e81773a54942d737ba4643e2f7dc (patch)
tree97e136540ef99ab55219f5b295aadbaa93c8d791 /src
parent08ddfe7b2fa9f577ba00c8c05c5604460942f5a8 (diff)
glsl: Make opt_copy_propagation actually propagate into loops.
We've had a FINISHME here since Eric originally wrote the code in 2010. This patch implements his suggested approach, which makes us actually able to copy propagate into the loops, at the unfortunate cost of making this pass even more expensive. The shader-db statistics are not terribly impressive: total instructions in shared programs: 9008589 -> 9008613 (0.00%) instructions in affected programs: 4293 -> 4317 (0.56%) helped: 0 HURT: 10 total cycles in shared programs: 78550978 -> 78575760 (0.03%) cycles in affected programs: 655426 -> 680208 (3.78%) helped: 75 HURT: 88 GAINED: 2 Most of the "regressions" appear to be us successfully copy propagating uniforms, which i965 is loading as pull constants instead of push, so we occasionally have two pulls instead of one. That doesn't seem like this pass's job - it's propagating correctly, and we should be smarter about pull loads in the backend. This patch is also useful for a couple of reasons: 1. It can clean up copies created by varying packing (previously, we couldn't if the uses were inside a loop). This fixes a bug when interpolateAt*() is used on a packed varying inside a loop: glsl_to_nir struggles to see through the extra copy and mistakenly believed the variable was not an input. 2. It will help propagate uniform array access created by lower_const_array_to_uniforms(). Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/compiler/glsl/opt_copy_propagation.cpp30
1 files changed, 24 insertions, 6 deletions
diff --git a/src/compiler/glsl/opt_copy_propagation.cpp b/src/compiler/glsl/opt_copy_propagation.cpp
index ae62921a0df..807ba8f5738 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -83,6 +83,7 @@ public:
}
virtual ir_visitor_status visit(class ir_dereference_variable *);
+ void handle_loop(class ir_loop *, bool keep_acp);
virtual ir_visitor_status visit_enter(class ir_loop *);
virtual ir_visitor_status visit_enter(class ir_function_signature *);
virtual ir_visitor_status visit_enter(class ir_function *);
@@ -252,21 +253,24 @@ ir_copy_propagation_visitor::visit_enter(ir_if *ir)
return visit_continue_with_parent;
}
-ir_visitor_status
-ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
+void
+ir_copy_propagation_visitor::handle_loop(ir_loop *ir, bool keep_acp)
{
exec_list *orig_acp = this->acp;
exec_list *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
- /* FINISHME: For now, the initial acp for loops is totally empty.
- * We could go through once, then go through again with the acp
- * cloned minus the killed entries after the first run through.
- */
this->acp = new(mem_ctx) exec_list;
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
+ if (keep_acp) {
+ /* Populate the initial acp with a copy of the original */
+ foreach_in_list(acp_entry, a, orig_acp) {
+ this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs));
+ }
+ }
+
visit_list_elements(this, &ir->body_instructions);
if (this->killed_all) {
@@ -284,6 +288,20 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
}
ralloc_free(new_kills);
+}
+
+ir_visitor_status
+ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
+{
+ /* Make a conservative first pass over the loop with an empty ACP set.
+ * This also removes any killed entries from the original ACP set.
+ */
+ handle_loop(ir, false);
+
+ /* Then, run it again with the real ACP set, minus any killed entries.
+ * This takes care of propagating values from before the loop into it.
+ */
+ handle_loop(ir, true);
/* already descended into the children. */
return visit_continue_with_parent;