summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-06-07 08:48:50 -0700
committerEric Anholt <[email protected]>2012-07-03 12:57:10 -0700
commitdd4282e38fd92c081875da6bce0b2345bd472532 (patch)
tree049a9dddc9c9e1e8dfa0ba5dcb653fa66fbaac4c /src
parent0c4630bae001139dea42b78cd08157de4d90542b (diff)
i965/fs: Allow copy propagation on uniforms.
This is a big win for savage2, hon and yofrankie. 62 new programs for savage2/hon get 16-wide mode, along with one for humus demos and two for tropics. Even a few shaders from tropics see reductions of 15% or more. total instructions in shared programs: 216536 -> 207353 (-4.24%) instructions in affected programs: 123941 -> 114758 (-7.41%) In benchmarking Tropics, only a .040% +/- 034% performance improvement was observed (n=90). Rather disappointing, but I was primarily motivated to do this patch by a regression in the number of 16-wide shaders compiled after a GRF texturing on IVB patch I'm working on. Hopefully this helps avoid that regression. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp11
1 files changed, 7 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index a019cb5814c..bf26d86bd7c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -48,9 +48,11 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
bool has_source_modifiers = entry->src.abs || entry->src.negate;
- if (intel->gen == 6 && inst->is_math() && has_source_modifiers)
+ if (intel->gen == 6 && inst->is_math() &&
+ (has_source_modifiers || entry->src.file == UNIFORM))
return false;
+ inst->src[arg].file = entry->src.file;
inst->src[arg].reg = entry->src.reg;
inst->src[arg].reg_offset = entry->src.reg_offset;
@@ -121,9 +123,10 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
/* If this instruction is a raw copy, add it to the ACP. */
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == GRF &&
- inst->src[0].file == GRF &&
- (inst->src[0].reg != inst->dst.reg ||
- inst->src[0].reg_offset != inst->dst.reg_offset) &&
+ ((inst->src[0].file == GRF &&
+ (inst->src[0].reg != inst->dst.reg ||
+ inst->src[0].reg_offset != inst->dst.reg_offset)) ||
+ inst->src[0].file == UNIFORM) &&
inst->src[0].type == inst->dst.type &&
!inst->saturate &&
!inst->predicated &&