summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-03-10 13:48:42 -0800
committerEric Anholt <[email protected]>2012-04-11 18:08:21 -0700
commit32ae8d3b321185a85b73ff703d8fc26bd5f48fa7 (patch)
treed26b5636eb9d17c2e5b88b8c9636add8c4745f17 /src/mesa
parent3bdccbc3e0185fbca16eada2a76f55c6e3f867b5 (diff)
i965/fs: Try to avoid generating extra MOVs to do saturates.
This change (before the previous two) produced a .23% +/- .11% performance improvement in Unigine Tropics at 1024x768 on IVB. Total instructions: 269270 -> 262649 614/2148 programs affected (28.6%) 179386 -> 172765 instructions in affected programs (3.7% reduction) v2: Move some of the logic of finding the instruction that produced the result of an expression tree to a helper.
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp29
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp34
3 files changed, 54 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5f3d79d1d09..f9c1483fef8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1717,6 +1717,35 @@ fs_visitor::virtual_grf_interferes(int a, int b)
return start < end;
}
+/**
+ * Possibly returns an instruction that set up @param reg.
+ *
+ * Sometimes we want to take the result of some expression/variable
+ * dereference tree and rewrite the instruction generating the result
+ * of the tree. When processing the tree, we know that the
+ * instructions generated are all writing temporaries that are dead
+ * outside of this tree. So, if we have some instructions that write
+ * a temporary, we're free to point that temp write somewhere else.
+ *
+ * Note that this doesn't guarantee that the instruction generated
+ * only reg -- it might be the size=4 destination of a texture instruction.
+ */
+fs_inst *
+fs_visitor::get_instruction_generating_reg(fs_inst *start,
+ fs_inst *end,
+ fs_reg reg)
+{
+ if (end == start ||
+ end->predicated ||
+ end->force_uncompressed ||
+ end->force_sechalf ||
+ !reg.equals(&end->dst)) {
+ return NULL;
+ } else {
+ return end;
+ }
+}
+
bool
fs_visitor::run()
{
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 7aebffa699d..d3a1045a604 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -487,6 +487,9 @@ public:
}
int type_size(const struct glsl_type *type);
+ fs_inst *get_instruction_generating_reg(fs_inst *start,
+ fs_inst *end,
+ fs_reg reg);
bool run();
void setup_paramvalues_refs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 00524288f6d..0cb75f9d5b3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -172,12 +172,25 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
if (!sat_val)
return false;
+ fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
+
sat_val->accept(this);
fs_reg src = this->result;
- this->result = fs_reg(this, ir->type);
- fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
- inst->saturate = true;
+ fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
+
+ /* If the last instruction from our accept() didn't generate our
+ * src, generate a saturated MOV
+ */
+ fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
+ if (!modify || modify->regs_written() != 1) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
+ inst->saturate = true;
+ } else {
+ modify->saturate = true;
+ this->result = src;
+ }
+
return true;
}
@@ -591,9 +604,6 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
fs_inst *pre_rhs_inst,
fs_inst *last_rhs_inst)
{
- if (pre_rhs_inst == last_rhs_inst)
- return false; /* No instructions generated to work with. */
-
/* Only attempt if we're doing a direct assignment. */
if (ir->condition ||
!(ir->lhs->type->is_scalar() ||
@@ -602,20 +612,20 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
return false;
/* Make sure the last instruction generated our source reg. */
- if (last_rhs_inst->predicated ||
- last_rhs_inst->force_uncompressed ||
- last_rhs_inst->force_sechalf ||
- !src.equals(&last_rhs_inst->dst))
+ fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
+ last_rhs_inst,
+ src);
+ if (!modify)
return false;
/* If last_rhs_inst wrote a different number of components than our LHS,
* we can't safely rewrite it.
*/
- if (ir->lhs->type->vector_elements != last_rhs_inst->regs_written())
+ if (ir->lhs->type->vector_elements != modify->regs_written())
return false;
/* Success! Rewrite the instruction. */
- last_rhs_inst->dst = dst;
+ modify->dst = dst;
return true;
}