diff options
author | Eric Anholt <[email protected]> | 2012-02-07 00:59:11 +0100 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-02-10 18:18:52 -0800 |
commit | 7d55f37b0e87db9b3806088797075161a1c9a8bb (patch) | |
tree | cc93d22538105f60cd4b920188219429472235bb | |
parent | a1bfafc5a96012c14db9b0d28223ab54feae131c (diff) |
i965/fs: Add support for generating MADs.
Improves nexuiz performance 0.65% +/- .10% (n=5) on my gen6, and .39%
+/- .11% (n=10) on gen7. No statistically significant performance
difference on warsow (n=5, but only one shader has MADs).
v2: Add support for MADs in 16-wide by using compression control.
v3: Don't generate MADs when it will force an immediate to be moved to a temp.
(it's not clear whether this is a win or not, but it should result in less
questionable change to codegen compared to v2).
Reviewed-by: Kenneth Graunke <[email protected]> (v2)
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 41 |
3 files changed, 56 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5fdc055770a..060aa363746 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -529,6 +529,7 @@ public: fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); + bool try_emit_mad(ir_expression *ir, int mul_arg); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index b68d8cb8679..0c32f085114 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -725,6 +725,20 @@ fs_visitor::generate_code() brw_set_acc_write_control(p, 0); break; + case BRW_OPCODE_MAD: + brw_set_access_mode(p, BRW_ALIGN_16); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MAD(p, dst, src[0], src[1], src[2]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else { + brw_MAD(p, dst, src[0], src[1], src[2]); + } + brw_set_access_mode(p, BRW_ALIGN_1); + break; + case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ea8cd371786..e670ba79c18 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -182,6 +182,43 @@ fs_visitor::try_emit_saturate(ir_expression *ir) return true; } +bool +fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) +{ + /* 3-src instructions were introduced in gen6. */ + if (intel->gen < 6) + return false; + + /* MAD can only handle floating-point data. */ + if (ir->type != glsl_type::float_type) + return false; + + ir_rvalue *nonmul = ir->operands[1 - mul_arg]; + ir_expression *mul = ir->operands[mul_arg]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) + return false; + + if (nonmul->as_constant() || + mul->operands[0]->as_constant() || + mul->operands[1]->as_constant()) + return false; + + nonmul->accept(this); + fs_reg src0 = this->result; + + mul->operands[0]->accept(this); + fs_reg src1 = this->result; + + mul->operands[1]->accept(this); + fs_reg src2 = this->result; + + this->result = fs_reg(this, ir->type); + emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); + + return true; +} + void fs_visitor::visit(ir_expression *ir) { @@ -193,6 +230,10 @@ fs_visitor::visit(ir_expression *ir) if (try_emit_saturate(ir)) return; + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) + return; + } for (operand = 0; operand < ir->get_num_operands(); operand++) { ir->operands[operand]->accept(this); |