aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuha-Pekka Heikkila <[email protected]>2014-03-28 15:28:31 +0200
committerMatt Turner <[email protected]>2014-04-16 22:46:45 -0700
commit2dfbbeca50b95ccdd714d9baa4411c779f6a20d9 (patch)
tree3e6808e76e24c799fcef726e127787e4cf4a5a3b
parent09747066714a341b85907c474f18a0d05bbc7071 (diff)
i965/vec4: Change vec4_visitor::emit_lrp to use MAC for gen<6
This allows us to emit ADD/MUL/MAC instead of MUL/ADD/MUL/ADD, saving one instruction and two temporary registers. Reviewed-by: Kenneth Graunke <[email protected]> Reviewed-by: Matt Turner <[email protected]> Signed-off-by: Juha-Pekka Heikkila <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp22
1 files changed, 6 insertions, 16 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 059dc73adcc..8fa0aee996b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1165,24 +1165,14 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
} else {
/* Earlier generations don't support three source operations, so we
* need to emit x*(1-a) + y*a.
- *
- * A better way to do this would be:
- * ADD one_minus_a, negate(a), 1.0f
- * MUL null, y, a
- * MAC dst, x, one_minus_a
- * but we would need to support MAC and implicit accumulator.
*/
- dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
- y_times_a.writemask = dst.writemask;
- one_minus_a.writemask = dst.writemask;
- x_times_one_minus_a.writemask = dst.writemask;
-
- emit(MUL(y_times_a, y, a));
+ dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
+ one_minus_a.writemask = dst.writemask;
+
emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
- emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
- emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
+ vec4_instruction *mul = emit(MUL(dst_null_f(), y, a));
+ mul->writes_accumulator = true;
+ emit(MAC(dst, x, src_reg(one_minus_a)));
}
}