diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 92 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 |
3 files changed, 96 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 436de2fea8e..1f2cebe157c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -306,6 +306,98 @@ vec4_visitor::pack_uniform_registers() } } +bool +src_reg::is_zero() const +{ + if (file != IMM) + return false; + + if (type == BRW_REGISTER_TYPE_F) { + return imm.f == 0.0; + } else { + return imm.i == 0; + } +} + +bool +src_reg::is_one() const +{ + if (file != IMM) + return false; + + if (type == BRW_REGISTER_TYPE_F) { + return imm.f == 1.0; + } else { + return imm.i == 1; + } +} + +/** + * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a). + * + * While GLSL IR also performs this optimization, we end up with it in + * our instruction stream for a couple of reasons. One is that we + * sometimes generate silly instructions, for example in array access + * where we'll generate "ADD offset, index, base" even if base is 0. + * The other is that GLSL IR's constant propagation doesn't track the + * components of aggregates, so some VS patterns (initialize matrix to + * 0, accumulate in vertex blending factors) end up breaking down to + * instructions involving 0. + */ +bool +vec4_visitor::opt_algebraic() +{ + bool progress = false; + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + switch (inst->opcode) { + case BRW_OPCODE_ADD: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + + case BRW_OPCODE_MUL: + if (inst->src[1].is_zero()) { + inst->opcode = BRW_OPCODE_MOV; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_F: + inst->src[0] = src_reg(0.0f); + break; + case BRW_REGISTER_TYPE_D: + inst->src[0] = src_reg(0); + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0] = src_reg(0u); + break; + default: + assert(!"not reached"); + inst->src[0] = src_reg(0.0f); + break; + } + inst->src[1] = src_reg(); + progress = true; + } else if (inst->src[1].is_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + default: + break; + } + } + + if (progress) + this->live_intervals_valid = false; + + return progress; +} + /** * Only a limited number of hardware registers may be used for push * constants, so this turns access to the overflowed constants into diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 7739a151e49..058615f089e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -163,6 +163,8 @@ public: } bool equals(src_reg *r); + bool is_zero() const; + bool is_one() const; src_reg(class vec4_visitor *v, const struct glsl_type *type); @@ -401,6 +403,7 @@ public: bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); + bool opt_algebraic(); vec4_instruction *emit(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index c40c41f7a0c..7031d2a82da 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -615,6 +615,7 @@ vec4_visitor::run() progress = false; progress = dead_code_eliminate() || progress; progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; } while (progress); |