summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-09-02 15:18:29 -0700
committerEric Anholt <[email protected]>2011-09-20 11:28:56 -0700
commitf0c04e6c22babf2aee2ad1ee85dbd6f996be3712 (patch)
tree23cc17ad4b994b3e488d290e4aeb6adb4d1fbd8f
parent7288d010e0afeade5357502fdc018fc4928330f6 (diff)
i965/vs: Add support for simple algebraic optimizations.
We generate silly code for array access, and it's easier to generally support the cleanup than to specifically avoid the bad code in each place we might generate it. Removes 4.6% of instructions from 41.6% of shaders in shader-db, particularly savage2/hon and unigine. v2: Fixes by Ken: Make is_zero/one member functions, and fix a progress flag. Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp92
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp1
3 files changed, 96 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 436de2fea8e..1f2cebe157c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -306,6 +306,98 @@ vec4_visitor::pack_uniform_registers()
}
}
+bool
+src_reg::is_zero() const
+{
+ if (file != IMM)
+ return false;
+
+ if (type == BRW_REGISTER_TYPE_F) {
+ return imm.f == 0.0;
+ } else {
+ return imm.i == 0;
+ }
+}
+
+bool
+src_reg::is_one() const
+{
+ if (file != IMM)
+ return false;
+
+ if (type == BRW_REGISTER_TYPE_F) {
+ return imm.f == 1.0;
+ } else {
+ return imm.i == 1;
+ }
+}
+
+/**
+ * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
+ *
+ * While GLSL IR also performs this optimization, we end up with it in
+ * our instruction stream for a couple of reasons. One is that we
+ * sometimes generate silly instructions, for example in array access
+ * where we'll generate "ADD offset, index, base" even if base is 0.
+ * The other is that GLSL IR's constant propagation doesn't track the
+ * components of aggregates, so some VS patterns (initialize matrix to
+ * 0, accumulate in vertex blending factors) end up breaking down to
+ * instructions involving 0.
+ */
+bool
+vec4_visitor::opt_algebraic()
+{
+ bool progress = false;
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_ADD:
+ if (inst->src[1].is_zero()) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[1] = src_reg();
+ progress = true;
+ }
+ break;
+
+ case BRW_OPCODE_MUL:
+ if (inst->src[1].is_zero()) {
+ inst->opcode = BRW_OPCODE_MOV;
+ switch (inst->src[0].type) {
+ case BRW_REGISTER_TYPE_F:
+ inst->src[0] = src_reg(0.0f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ inst->src[0] = src_reg(0);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ inst->src[0] = src_reg(0u);
+ break;
+ default:
+ assert(!"not reached");
+ inst->src[0] = src_reg(0.0f);
+ break;
+ }
+ inst->src[1] = src_reg();
+ progress = true;
+ } else if (inst->src[1].is_one()) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[1] = src_reg();
+ progress = true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (progress)
+ this->live_intervals_valid = false;
+
+ return progress;
+}
+
/**
* Only a limited number of hardware registers may be used for push
* constants, so this turns access to the overflowed constants into
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7739a151e49..058615f089e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,8 @@ public:
}
bool equals(src_reg *r);
+ bool is_zero() const;
+ bool is_one() const;
src_reg(class vec4_visitor *v, const struct glsl_type *type);
@@ -401,6 +403,7 @@ public:
bool dead_code_eliminate();
bool virtual_grf_interferes(int a, int b);
bool opt_copy_propagation();
+ bool opt_algebraic();
vec4_instruction *emit(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index c40c41f7a0c..7031d2a82da 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -615,6 +615,7 @@ vec4_visitor::run()
progress = false;
progress = dead_code_eliminate() || progress;
progress = opt_copy_propagation() || progress;
+ progress = opt_algebraic() || progress;
} while (progress);