diff options
author | Eric Anholt <[email protected]> | 2013-06-07 18:29:50 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2013-06-10 14:04:24 -0700 |
commit | 9a0bd682f958b642519709b879d4ceb39efbcaff (patch) | |
tree | aef2a574074dc5055c0a8f628badeeefd22d22ff /src | |
parent | d28e285d41bbb482384fb2174c92b9f63aa1175f (diff) |
i965/vs: Avoid the MUL/MACH/MOV sequence for small integer multiplies.
We do a lot of multiplies by 3 or 4 for skinning shaders, and we can avoid
the sequence if we just move them into the right argument of the MUL.
On pre-IVB, this means reliably putting a constant in a position where it
can't be constant folded, but that's still better than MUL/MACH/MOV.
Improves GLB 2.7 trex performance by 0.788648% +/- 0.23865% (n=29/30)
v2: Fix test for pre-sandybridge.
Reviewed-by: Kenneth Graunke <[email protected]>
Reviewed-by: Matt Turner <[email protected]> (v1)
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 50 |
1 files changed, 37 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 451f7d5991b..02ba603d018 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1313,6 +1313,20 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst, } } +static bool +is_16bit_constant(ir_rvalue *rvalue) +{ + ir_constant *constant = rvalue->as_constant(); + if (!constant) + return false; + + if (constant->type != glsl_type::int_type && + constant->type != glsl_type::uint_type) + return false; + + return constant->value.u[0] < (1 << 16); +} + void vec4_visitor::visit(ir_expression *ir) { @@ -1472,19 +1486,29 @@ vec4_visitor::visit(ir_expression *ir) case ir_binop_mul: if (ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits - * of one of the operands (src0 on gen6, src1 on gen7). The - * MACH accumulates in the contribution of the upper 16 bits - * of that operand. - * - * FINISHME: Emit just the MUL if we know an operand is small - * enough. - */ - struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(dst_null_d(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); + /* For integer multiplication, the MUL uses the low 16 bits of one of + * the operands (src0 through SNB, src1 on IVB and later). The MACH + * accumulates in the contribution of the upper 16 bits of that + * operand. If we can determine that one of the args is in the low + * 16 bits, though, we can just emit a single MUL. + */ + if (is_16bit_constant(ir->operands[0])) { + if (intel->gen < 7) + emit(MUL(result_dst, op[0], op[1])); + else + emit(MUL(result_dst, op[1], op[0])); + } else if (is_16bit_constant(ir->operands[1])) { + if (intel->gen < 7) + emit(MUL(result_dst, op[1], op[0])); + else + emit(MUL(result_dst, op[0], op[1])); + } else { + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst_null_d(), op[0], op[1])); + emit(MOV(result_dst, src_reg(acc))); + } } else { emit(MUL(result_dst, op[0], op[1])); } |