summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-06-06 10:14:41 -0700
committerEric Anholt <[email protected]>2013-06-10 14:04:24 -0700
commit263a7e4cd992738814575b04d2de24ca0a0ad08a (patch)
tree6c5e756aa0c4af3f830faff693e57aecf3ea4876 /src
parent1ff10f92e77625bd17f5825b8f5aee4d355587fc (diff)
i965/vs: Use the MAD instruction when possible.
This is different from how we do it in the FS - we are using MAD even when some of the args are constants, because with the relatively unrestrained ability to schedule a MOV to prepare a temporary with that data, we can get lower latency for the sequence of instructions. No significant performance difference on GLB2.7 trex (n=33/34), though it doesn't have that many MADs. I noticed MAD opportunities while reading the code for the DOTA2 bug. Reviewed-by: Kenneth Graunke <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp37
4 files changed, 43 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index e6e59bc9af9..a72d6941394 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -468,6 +468,7 @@ public:
int base_offset);
bool try_emit_sat(ir_expression *ir);
+ bool try_emit_mad(ir_expression *ir, int mul_arg);
void resolve_ud_negate(src_reg *reg);
src_reg get_timestamp();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 39eef4b0d65..1a667ebf2b2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -216,6 +216,7 @@ vec4_visitor::try_copy_propagation(struct intel_context *intel,
return false;
bool is_3src_inst = (inst->opcode == BRW_OPCODE_LRP ||
+ inst->opcode == BRW_OPCODE_MAD ||
inst->opcode == BRW_OPCODE_BFE ||
inst->opcode == BRW_OPCODE_BFI2);
if (is_3src_inst && value.file == UNIFORM)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 91101f29b0d..fbb93db6cdd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -772,6 +772,10 @@ vec4_generator::generate_code(exec_list *instructions)
brw_set_acc_write_control(p, 0);
break;
+ case BRW_OPCODE_MAD:
+ brw_MAD(p, dst, src[0], src[1], src[2]);
+ break;
+
case BRW_OPCODE_FRC:
brw_FRC(p, dst, src[0]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 33c1b2483c4..451f7d5991b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1250,6 +1250,38 @@ vec4_visitor::try_emit_sat(ir_expression *ir)
return true;
}
+bool
+vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
+{
+ /* 3-src instructions were introduced in gen6. */
+ if (intel->gen < 6)
+ return false;
+
+ /* MAD can only handle floating-point data. */
+ if (ir->type->base_type != GLSL_TYPE_FLOAT)
+ return false;
+
+ ir_rvalue *nonmul = ir->operands[1 - mul_arg];
+ ir_expression *mul = ir->operands[mul_arg]->as_expression();
+
+ if (!mul || mul->operation != ir_binop_mul)
+ return false;
+
+ nonmul->accept(this);
+ src_reg src0 = fix_3src_operand(this->result);
+
+ mul->operands[0]->accept(this);
+ src_reg src1 = fix_3src_operand(this->result);
+
+ mul->operands[1]->accept(this);
+ src_reg src2 = fix_3src_operand(this->result);
+
+ this->result = src_reg(this, ir->type);
+ emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
+
+ return true;
+}
+
void
vec4_visitor::emit_bool_comparison(unsigned int op,
dst_reg dst, src_reg src0, src_reg src1)
@@ -1293,6 +1325,11 @@ vec4_visitor::visit(ir_expression *ir)
if (try_emit_sat(ir))
return;
+ if (ir->operation == ir_binop_add) {
+ if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
+ return;
+ }
+
for (operand = 0; operand < ir->get_num_operands(); operand++) {
this->result.file = BAD_FILE;
ir->operands[operand]->accept(this);