diff options
author | Eric Anholt <[email protected]> | 2011-09-02 15:18:29 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2011-09-20 11:28:56 -0700 |
commit | f0c04e6c22babf2aee2ad1ee85dbd6f996be3712 (patch) | |
tree | 23cc17ad4b994b3e488d290e4aeb6adb4d1fbd8f /src | |
parent | 7288d010e0afeade5357502fdc018fc4928330f6 (diff) |
i965/vs: Add support for simple algebraic optimizations.
We generate silly code for array access, and it's easier to generally
support the cleanup than to specifically avoid the bad code in each
place we might generate it.
Removes 4.6% of instructions from 41.6% of shaders in shader-db,
particularly savage2/hon and unigine.
v2: Fixes by Ken: Make is_zero/one member functions, and fix a
progress flag.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 92 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 |
3 files changed, 96 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 436de2fea8e..1f2cebe157c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -306,6 +306,98 @@ vec4_visitor::pack_uniform_registers() } } +bool +src_reg::is_zero() const +{ + if (file != IMM) + return false; + + if (type == BRW_REGISTER_TYPE_F) { + return imm.f == 0.0; + } else { + return imm.i == 0; + } +} + +bool +src_reg::is_one() const +{ + if (file != IMM) + return false; + + if (type == BRW_REGISTER_TYPE_F) { + return imm.f == 1.0; + } else { + return imm.i == 1; + } +} + +/** + * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a). + * + * While GLSL IR also performs this optimization, we end up with it in + * our instruction stream for a couple of reasons. One is that we + * sometimes generate silly instructions, for example in array access + * where we'll generate "ADD offset, index, base" even if base is 0. + * The other is that GLSL IR's constant propagation doesn't track the + * components of aggregates, so some VS patterns (initialize matrix to + * 0, accumulate in vertex blending factors) end up breaking down to + * instructions involving 0. + */ +bool +vec4_visitor::opt_algebraic() +{ + bool progress = false; + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + switch (inst->opcode) { + case BRW_OPCODE_ADD: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + + case BRW_OPCODE_MUL: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_F: + inst->src[0] = src_reg(0.0f); + break; + case BRW_REGISTER_TYPE_D: + inst->src[0] = src_reg(0); + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0] = src_reg(0u); + break; + default: + assert(!"not reached"); + inst->src[0] = src_reg(0.0f); + break; + } + inst->src[1] = src_reg(); + progress = true; + } else if (inst->src[1].is_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + default: + break; + } + } + + if (progress) + this->live_intervals_valid = false; + + return progress; +} + /** * Only a limited number of hardware registers may be used for push * constants, so this turns access to the overflowed constants into diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 7739a151e49..058615f089e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -163,6 +163,8 @@ public: } bool equals(src_reg *r); + bool is_zero() const; + bool is_one() const; src_reg(class vec4_visitor *v, const struct glsl_type *type); @@ -401,6 +403,7 @@ public: bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); + bool opt_algebraic(); vec4_instruction *emit(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index c40c41f7a0c..7031d2a82da 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -615,6 +615,7 @@ vec4_visitor::run() progress = false; progress = dead_code_eliminate() || progress; progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; } while (progress); |