summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2014-04-17 15:13:00 -0700
committerMatt Turner <[email protected]>2014-06-17 09:40:30 -0700
commit18372a710028fcbe1ff74f2f727e986c223957ba (patch)
tree19db83e52cffd93adbb71e3a66396c648fe6b53a /src
parent31ae9c25ff07681f59e6ffc53c039e842cb39464 (diff)
i965/fs: Copy propagate from load_payload.
But only into non-load_payload instructions. Otherwise we would prevent register coalescing from combining identical payloads.
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp22
1 files changed, 22 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 158d0bad507..cc6e86f55e4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -42,6 +42,7 @@ namespace { /* avoid conflict with opt_copy_propagation_elements */
struct acp_entry : public exec_node {
fs_reg dst;
fs_reg src;
+ enum opcode opcode;
};
struct block_data {
@@ -287,6 +288,10 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.file == IMM)
return false;
+ if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
+ inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD)
+ return false;
+
/* Bail if inst is reading more than entry is writing. */
if ((inst->regs_read(this, arg) * inst->src[arg].stride *
type_sz(inst->src[arg].type)) > type_sz(entry->dst.type))
@@ -569,7 +574,24 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
+ entry->opcode = inst->opcode;
acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+ } else if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
+ inst->dst.file == GRF) {
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == GRF) {
+ acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
+ entry->dst = inst->dst;
+ entry->dst.reg_offset = i;
+ entry->src = inst->src[i];
+ entry->opcode = inst->opcode;
+ if (!entry->dst.equals(inst->src[i])) {
+ acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+ } else {
+ ralloc_free(entry);
+ }
+ }
+ }
}
}