summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-02-23 11:51:04 -0800
committerEric Anholt <[email protected]>2012-05-17 10:05:23 -0700
commit05c200bac049ed4760f0af6786e463e2520b9c9c (patch)
tree3628b7cc2b72e515027218f05c8dec523f9e0b40
parentf220f73b9c5aca16ca21ea8bbbbf8718703b12cf (diff)
glsl: Improve the local dead code optimization to eliminate unused channels.
Total instructions: 261582 -> 261316 135/2147 programs affected (6.3%) 36752 -> 36486 instructions in affected programs (0.7% reduction) This excludes a tropics shader that now gets 16-wide mode and throws off the numbers. 5 shaders are hurt: two extra MOVs in 4 tropics shaders it looks like because we don't split register names according to independent webs, and one gstreamer shader where it looks like try_rewrite_rhs_to_dst() is falling on its face. This should also help avoid a regression in VSes from idr's ARB programs to GLSL work.
-rw-r--r--src/glsl/opt_dead_code_local.cpp149
1 files changed, 126 insertions, 23 deletions
diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp
index a81a38fff0f..4af78a72cc3 100644
--- a/src/glsl/opt_dead_code_local.cpp
+++ b/src/glsl/opt_dead_code_local.cpp
@@ -43,16 +43,20 @@ static bool debug = false;
class assignment_entry : public exec_node
{
public:
- assignment_entry(ir_variable *lhs, ir_instruction *ir)
+ assignment_entry(ir_variable *lhs, ir_assignment *ir)
{
assert(lhs);
assert(ir);
this->lhs = lhs;
this->ir = ir;
+ this->available = ir->write_mask;
}
ir_variable *lhs;
- ir_instruction *ir;
+ ir_assignment *ir;
+
+ /* bitmask of xyzw channels written that haven't been used so far. */
+ int available;
};
class kill_for_derefs_visitor : public ir_hierarchical_visitor {
@@ -62,23 +66,52 @@ public:
this->assignments = assignments;
}
- virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ void kill_channels(ir_variable *const var, int used)
{
- ir_variable *const var = ir->variable_referenced();
-
foreach_iter(exec_list_iterator, iter, *this->assignments) {
assignment_entry *entry = (assignment_entry *)iter.get();
if (entry->lhs == var) {
- if (debug)
- printf("kill %s\n", entry->lhs->name);
- entry->remove();
+ if (var->type->is_scalar() || var->type->is_vector()) {
+ if (debug)
+ printf("kill %s (0x%01x - 0x%01x)\n", entry->lhs->name,
+ entry->available, used);
+ entry->available &= ~used;
+ if (!entry->available)
+ entry->remove();
+ } else {
+ if (debug)
+ printf("kill %s\n", entry->lhs->name);
+ entry->remove();
+ }
}
}
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ {
+ kill_channels(ir->var, ~0);
return visit_continue;
}
+ virtual ir_visitor_status visit(ir_swizzle *ir)
+ {
+ ir_dereference_variable *deref = ir->val->as_dereference_variable();
+ if (!deref)
+ return visit_continue;
+
+ int used = 0;
+ used |= 1 << ir->mask.x;
+ used |= 1 << ir->mask.y;
+ used |= 1 << ir->mask.z;
+ used |= 1 << ir->mask.w;
+
+ kill_channels(deref->var, used);
+
+ return visit_continue_with_parent;
+ }
+
private:
exec_list *assignments;
};
@@ -130,21 +163,91 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
assert(var);
/* Now, check if we did a whole-variable assignment. */
- if (!ir->condition && (ir->whole_variable_written() != NULL)) {
- /* We did a whole-variable assignment. So, any instruction in
- * the assignment list with the same LHS is dead.
- */
- if (debug)
- printf("looking for %s to remove\n", var->name);
- foreach_iter(exec_list_iterator, iter, *assignments) {
- assignment_entry *entry = (assignment_entry *)iter.get();
+ if (!ir->condition) {
+ ir_dereference_variable *deref_var = ir->lhs->as_dereference_variable();
- if (entry->lhs == var) {
- if (debug)
- printf("removing %s\n", var->name);
- entry->ir->remove();
- entry->remove();
- progress = true;
+ /* If it's a vector type, we can do per-channel elimination of
+ * use of the RHS.
+ */
+ if (deref_var && (deref_var->var->type->is_scalar() ||
+ deref_var->var->type->is_vector())) {
+
+ if (debug)
+ printf("looking for %s.0x%01x to remove\n", var->name,
+ ir->write_mask);
+
+ foreach_iter(exec_list_iterator, iter, *assignments) {
+ assignment_entry *entry = (assignment_entry *)iter.get();
+
+ if (entry->lhs != var)
+ continue;
+
+ int remove = entry->available & ir->write_mask;
+ if (debug) {
+ printf("%s 0x%01x - 0x%01x = 0x%01x\n",
+ var->name,
+ entry->ir->write_mask,
+ remove, entry->ir->write_mask & ~remove);
+ }
+ if (remove) {
+ progress = true;
+
+ if (debug) {
+ printf("rewriting:\n ");
+ entry->ir->print();
+ printf("\n");
+ }
+
+ entry->ir->write_mask &= ~remove;
+ entry->available &= ~remove;
+ if (entry->ir->write_mask == 0) {
+ /* Delete the dead assignment. */
+ entry->ir->remove();
+ entry->remove();
+ } else {
+ void *mem_ctx = ralloc_parent(entry->ir);
+ /* Reswizzle the RHS arguments according to the new
+ * write_mask.
+ */
+ unsigned components[4];
+ unsigned channels = 0;
+ unsigned next = 0;
+
+ for (int i = 0; i < 4; i++) {
+ if ((entry->ir->write_mask | remove) & (1 << i)) {
+ if (!(remove & (1 << i)))
+ components[channels++] = next;
+ next++;
+ }
+ }
+
+ entry->ir->rhs = new(mem_ctx) ir_swizzle(entry->ir->rhs,
+ components,
+ channels);
+ if (debug) {
+ printf("to:\n ");
+ entry->ir->print();
+ printf("\n");
+ }
+ }
+ }
+ }
+ } else if (ir->whole_variable_written() != NULL) {
+ /* We did a whole-variable assignment. So, any instruction in
+ * the assignment list with the same LHS is dead.
+ */
+ if (debug)
+ printf("looking for %s to remove\n", var->name);
+ foreach_iter(exec_list_iterator, iter, *assignments) {
+ assignment_entry *entry = (assignment_entry *)iter.get();
+
+ if (entry->lhs == var) {
+ if (debug)
+ printf("removing %s\n", var->name);
+ entry->ir->remove();
+ entry->remove();
+ progress = true;
+ }
}
}
}
@@ -160,7 +263,7 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
foreach_iter(exec_list_iterator, iter, *assignments) {
assignment_entry *entry = (assignment_entry *)iter.get();
- printf(" %s\n", entry->lhs->name);
+ printf(" %s (0x%01x)\n", entry->lhs->name, entry->available);
}
}