aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-06-06 11:06:51 -0700
committerEric Anholt <[email protected]>2012-07-03 12:57:04 -0700
commit0c4630bae001139dea42b78cd08157de4d90542b (patch)
treed43f4455c178b0085fdfac277ba74b0de527fdd4 /src
parent458f7f014139deb48a4cf0a9e6bdca3a57d24208 (diff)
i965/fs: Allow copy propagation with source modifiers.
This shaves a few instructions off of a ton of programs. For 12 shaders from tropics and sanctuary, it's enough reduction in register pressure to get 16-wide mode. 7 shaders from heroes of newerth and savage2 are hurt by about 1.1%, where copy propagation of negates ends up preventing coalescing, but we could regain that by doing dataflow analysis in our copy propagation. No significant performance difference in tropics (n=11) Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index d510e5b3609..a019cb5814c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -40,9 +40,25 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
return false;
}
+ /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
+ if (inst->conditional_mod &&
+ inst->src[arg].type == BRW_REGISTER_TYPE_UD &&
+ entry->src.negate)
+ return false;
+
+ bool has_source_modifiers = entry->src.abs || entry->src.negate;
+
+ if (intel->gen == 6 && inst->is_math() && has_source_modifiers)
+ return false;
+
inst->src[arg].reg = entry->src.reg;
inst->src[arg].reg_offset = entry->src.reg_offset;
+ if (!inst->src[arg].abs) {
+ inst->src[arg].abs = entry->src.abs;
+ inst->src[arg].negate ^= entry->src.negate;
+ }
+
return true;
}
@@ -113,9 +129,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
!inst->predicated &&
!inst->force_uncompressed &&
!inst->force_sechalf &&
- inst->src[0].smear == -1 &&
- !inst->src[0].abs &&
- !inst->src[0].negate) {
+ inst->src[0].smear == -1) {
acp_entry *entry = ralloc(mem_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];