diff options
author | Juha-Pekka Heikkila <[email protected]> | 2014-03-28 15:28:31 +0200 |
---|---|---|
committer | Matt Turner <[email protected]> | 2014-04-16 22:46:45 -0700 |
commit | 2dfbbeca50b95ccdd714d9baa4411c779f6a20d9 (patch) | |
tree | 3e6808e76e24c799fcef726e127787e4cf4a5a3b /src/mesa | |
parent | 09747066714a341b85907c474f18a0d05bbc7071 (diff) |
i965/vec4: Change vec4_visitor::emit_lrp to use MAC for gen<6
This allows us to emit ADD/MUL/MAC instead of MUL/ADD/MUL/ADD,
saving one instruction and two temporary registers.
Reviewed-by: Kenneth Graunke <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Signed-off-by: Juha-Pekka Heikkila <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 22 |
1 files changed, 6 insertions, 16 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 059dc73adcc..8fa0aee996b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1165,24 +1165,14 @@ vec4_visitor::emit_lrp(const dst_reg &dst, } else { /* Earlier generations don't support three source operations, so we * need to emit x*(1-a) + y*a. - * - * A better way to do this would be: - * ADD one_minus_a, negate(a), 1.0f - * MUL null, y, a - * MAC dst, x, one_minus_a - * but we would need to support MAC and implicit accumulator. */ - dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); - dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); - dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); - y_times_a.writemask = dst.writemask; - one_minus_a.writemask = dst.writemask; - x_times_one_minus_a.writemask = dst.writemask; - - emit(MUL(y_times_a, y, a)); + dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); + one_minus_a.writemask = dst.writemask; + emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); + vec4_instruction *mul = emit(MUL(dst_null_f(), y, a)); + mul->writes_accumulator = true; + emit(MAC(dst, x, src_reg(one_minus_a))); } } |