diff options
author | Eric Anholt <[email protected]> | 2012-03-10 13:48:42 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-04-11 18:08:21 -0700 |
commit | 32ae8d3b321185a85b73ff703d8fc26bd5f48fa7 (patch) | |
tree | d26b5636eb9d17c2e5b88b8c9636add8c4745f17 /src/mesa | |
parent | 3bdccbc3e0185fbca16eada2a76f55c6e3f867b5 (diff) |
i965/fs: Try to avoid generating extra MOVs to do saturates.
This change (before the previous two) produced a .23% +/- .11%
performance improvement in Unigine Tropics at 1024x768 on IVB.
Total instructions: 269270 -> 262649
614/2148 programs affected (28.6%)
179386 -> 172765 instructions in affected programs (3.7% reduction)
v2: Move some of the logic of finding the instruction that produced
the result of an expression tree to a helper.
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 29 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 34 |
3 files changed, 54 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5f3d79d1d09..f9c1483fef8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1717,6 +1717,35 @@ fs_visitor::virtual_grf_interferes(int a, int b) return start < end; } +/** + * Possibly returns an instruction that set up @param reg. + * + * Sometimes we want to take the result of some expression/variable + * dereference tree and rewrite the instruction generating the result + * of the tree. When processing the tree, we know that the + * instructions generated are all writing temporaries that are dead + * outside of this tree. So, if we have some instructions that write + * a temporary, we're free to point that temp write somewhere else. + * + * Note that this doesn't guarantee that the instruction generated + * only reg -- it might be the size=4 destination of a texture instruction. + */ +fs_inst * +fs_visitor::get_instruction_generating_reg(fs_inst *start, + fs_inst *end, + fs_reg reg) +{ + if (end == start || + end->predicated || + end->force_uncompressed || + end->force_sechalf || + !reg.equals(&end->dst)) { + return NULL; + } else { + return end; + } +} + bool fs_visitor::run() { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7aebffa699d..d3a1045a604 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -487,6 +487,9 @@ public: } int type_size(const struct glsl_type *type); + fs_inst *get_instruction_generating_reg(fs_inst *start, + fs_inst *end, + fs_reg reg); bool run(); void setup_paramvalues_refs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 00524288f6d..0cb75f9d5b3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -172,12 +172,25 @@ fs_visitor::try_emit_saturate(ir_expression *ir) if (!sat_val) return false; + fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); + sat_val->accept(this); fs_reg src = this->result; - this->result = fs_reg(this, ir->type); - fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); - inst->saturate = true; + fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); + + /* If the last instruction from our accept() didn't generate our + * src, generate a saturated MOV + */ + fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); + if (!modify || modify->regs_written() != 1) { + fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); + inst->saturate = true; + } else { + modify->saturate = true; + this->result = src; + } + return true; } @@ -591,9 +604,6 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, fs_inst *pre_rhs_inst, fs_inst *last_rhs_inst) { - if (pre_rhs_inst == last_rhs_inst) - return false; /* No instructions generated to work with. */ - /* Only attempt if we're doing a direct assignment. */ if (ir->condition || !(ir->lhs->type->is_scalar() || @@ -602,20 +612,20 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, return false; /* Make sure the last instruction generated our source reg. */ - if (last_rhs_inst->predicated || - last_rhs_inst->force_uncompressed || - last_rhs_inst->force_sechalf || - !src.equals(&last_rhs_inst->dst)) + fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, + last_rhs_inst, + src); + if (!modify) return false; /* If last_rhs_inst wrote a different number of components than our LHS, * we can't safely rewrite it. */ - if (ir->lhs->type->vector_elements != last_rhs_inst->regs_written()) + if (ir->lhs->type->vector_elements != modify->regs_written()) return false; /* Success! Rewrite the instruction. */ - last_rhs_inst->dst = dst; + modify->dst = dst; return true; } |