diff options
author | Iago Toral Quiroga <[email protected]> | 2016-03-11 14:35:07 +0100 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2016-05-16 09:55:32 +0200 |
commit | cf375a3333e54a01462f192202d609436e5fbec8 (patch) | |
tree | a43182aa9bfeada61952bc47b6b5f82402939bbd /src/mesa/drivers/dri | |
parent | ea1ef49a16dc429c50ece388e92bf206ccf282a7 (diff) |
i965/fs: fix copy propagation of partially invalidated entries
We were not invalidating entries with a src that reads more than one register
when we find writes that overwrite any register read by entry->src after
the first. This leads to incorrect copy propagation because we re-use
entries from the ACP that have been partially invalidated. Same thing for
entries with a dst that writes to more than one register.
v2 (Sam):
- Improve code by defining regions_overlap() and using it instead of a
loop (Curro).
Reviewed-by: Francisco Jerez <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 23c545b7a6a..4b73b07a240 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -44,6 +44,7 @@ struct acp_entry : public exec_node { fs_reg dst; fs_reg src; uint8_t regs_written; + uint8_t regs_read; enum opcode opcode; bool saturate; }; @@ -702,6 +703,14 @@ can_propagate_from(fs_inst *inst) !inst->is_partial_write()); } +inline bool +regions_overlap(const fs_reg &r, unsigned n, const fs_reg &s, unsigned m) +{ + return r.file == s.file && r.nr == s.nr && + !(r.reg_offset + n < s.reg_offset || + s.reg_offset + m < r.reg_offset); +} + /* Walks a basic block and does copy propagation on it using the acp * list. */ @@ -728,18 +737,26 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, /* kill the destination from the ACP */ if (inst->dst.file == VGRF) { foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) { - if (inst->overwrites_reg(entry->dst)) { - entry->remove(); - } - } + if (regions_overlap(entry->dst, entry->regs_written, + inst->dst, inst->regs_written)) { + entry->remove(); + break; + } + } /* Oops, we only have the chaining hash based on the destination, not * the source, so walk across the entire table. */ for (int i = 0; i < ACP_HASH_SIZE; i++) { foreach_in_list_safe(acp_entry, entry, &acp[i]) { - if (inst->overwrites_reg(entry->src)) + /* Make sure we kill the entry if this instruction overwrites + * _any_ of the registers that it reads + */ + if (regions_overlap(entry->src, entry->regs_read, + inst->dst, inst->regs_written)) { entry->remove(); + continue; + } } } } @@ -748,10 +765,11 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, * operand of another instruction, add it to the ACP. */ if (can_propagate_from(inst)) { - acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); - entry->dst = inst->dst; - entry->src = inst->src[0]; + acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); + entry->dst = inst->dst; + entry->src = inst->src[0]; entry->regs_written = inst->regs_written; + entry->regs_read = inst->regs_read(0); entry->opcode = inst->opcode; entry->saturate = inst->saturate; acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); @@ -769,6 +787,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, entry->dst.reg_offset = offset; entry->src = inst->src[i]; entry->regs_written = regs_written; + entry->regs_read = inst->regs_read(i); entry->opcode = inst->opcode; if (!entry->dst.equals(inst->src[i])) { acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); |