aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-02-07 00:59:11 +0100
committerEric Anholt <[email protected]>2012-02-10 18:18:52 -0800
commit7d55f37b0e87db9b3806088797075161a1c9a8bb (patch)
treecc93d22538105f60cd4b920188219429472235bb /src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
parenta1bfafc5a96012c14db9b0d28223ab54feae131c (diff)
i965/fs: Add support for generating MADs.
Improves nexuiz performance 0.65% +/- .10% (n=5) on my gen6, and .39% +/- .11% (n=10) on gen7. No statistically significant performance difference on warsow (n=5, but only one shader has MADs). v2: Add support for MADs in 16-wide by using compression control. v3: Don't generate MADs when it will force an immediate to be moved to a temp. (it's not clear whether this is a win or not, but it should result in less questionable change to codegen compared to v2). Reviewed-by: Kenneth Graunke <[email protected]> (v2)
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_visitor.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp41
1 files changed, 41 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index ea8cd371786..e670ba79c18 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -182,6 +182,43 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
return true;
}
+bool
+fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
+{
+ /* 3-src instructions were introduced in gen6. */
+ if (intel->gen < 6)
+ return false;
+
+ /* MAD can only handle floating-point data. */
+ if (ir->type != glsl_type::float_type)
+ return false;
+
+ ir_rvalue *nonmul = ir->operands[1 - mul_arg];
+ ir_expression *mul = ir->operands[mul_arg]->as_expression();
+
+ if (!mul || mul->operation != ir_binop_mul)
+ return false;
+
+ if (nonmul->as_constant() ||
+ mul->operands[0]->as_constant() ||
+ mul->operands[1]->as_constant())
+ return false;
+
+ nonmul->accept(this);
+ fs_reg src0 = this->result;
+
+ mul->operands[0]->accept(this);
+ fs_reg src1 = this->result;
+
+ mul->operands[1]->accept(this);
+ fs_reg src2 = this->result;
+
+ this->result = fs_reg(this, ir->type);
+ emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
+
+ return true;
+}
+
void
fs_visitor::visit(ir_expression *ir)
{
@@ -193,6 +230,10 @@ fs_visitor::visit(ir_expression *ir)
if (try_emit_saturate(ir))
return;
+ if (ir->operation == ir_binop_add) {
+ if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
+ return;
+ }
for (operand = 0; operand < ir->get_num_operands(); operand++) {
ir->operands[operand]->accept(this);