summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2014-03-02 08:59:50 -0800
committerMatt Turner <[email protected]>2014-06-19 16:11:52 -0700
commite974781301601f411c376f9a7f5574adf9f0238a (patch)
tree7320f031bcf7739f72ecce894826137a82f83e5d
parentf043971097f2c707c3104845066ad2cc72f65708 (diff)
glsl: Optimize (v.x + v.y) + (v.z + v.w) into dot(v, 1.0).
Cuts five instructions out of SynMark's Gl32VSInstancing benchmark.
-rw-r--r--src/glsl/opt_algebraic.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index d57c3e8bde4..448af6b505b 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -119,6 +119,44 @@ update_type(ir_expression *ir)
ir->type = ir->operands[1]->type;
}
+/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
+static ir_expression *
+try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
+{
+ if (expr0 && expr0->operation == ir_binop_add &&
+ expr1 && expr1->operation == ir_binop_add) {
+ ir_swizzle *x = expr0->operands[0]->as_swizzle();
+ ir_swizzle *y = expr0->operands[1]->as_swizzle();
+ ir_swizzle *z = expr1->operands[0]->as_swizzle();
+ ir_swizzle *w = expr1->operands[1]->as_swizzle();
+
+ if (!x || x->mask.num_components != 1 ||
+ !y || y->mask.num_components != 1 ||
+ !z || z->mask.num_components != 1 ||
+ !w || w->mask.num_components != 1) {
+ return NULL;
+ }
+
+ bool swiz_seen[4] = {false, false, false, false};
+ swiz_seen[x->mask.x] = true;
+ swiz_seen[y->mask.x] = true;
+ swiz_seen[z->mask.x] = true;
+ swiz_seen[w->mask.x] = true;
+
+ if (!swiz_seen[0] || !swiz_seen[1] ||
+ !swiz_seen[2] || !swiz_seen[3]) {
+ return NULL;
+ }
+
+ if (x->val->equals(y->val) &&
+ x->val->equals(z->val) &&
+ x->val->equals(w->val)) {
+ return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
+ }
+ }
+ return NULL;
+}
+
void
ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
int op1,
@@ -332,6 +370,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
if (op_const[1] && !op_const[0])
reassociate_constant(ir, 1, op_const[1], op_expr[0]);
+ /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
+ if (options->OptimizeForAOS) {
+ ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
+ mem_ctx);
+ if (expr)
+ return expr;
+ }
+
/* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
*
* (-x + y) * a + x