summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-03-06 17:50:50 -0800
committerEric Anholt <[email protected]>2013-03-11 12:11:53 -0700
commit4dc7e6dcbf0d9c360e257c704774c9b083511b47 (patch)
tree9b1542fe6d69e350f53e58550c59c4c10d488d7b /src
parent4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6 (diff)
i965/fs: Also do the gen4 SEND dependency workaround against other SENDs.
We were handling the the dependency workaround for the first written reg of a send preceding the one we're fixing up, but didn't consider the other regs. Thus if you had two sampler calls that got allocated to the same set of regs, one might, rarely, ovewrite the other. This was occurring in XBMC's GLSL shaders. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44567 NOTE: This is a candidate for the stable branches. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp24
1 files changed, 15 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5380abfe2f4..8ce39543002 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2300,7 +2300,8 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
void
fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
{
- int write_len = inst->regs_written() * dispatch_width / 8;
+ int reg_size = dispatch_width / 8;
+ int write_len = inst->regs_written() * reg_size;
int first_write_grf = inst->dst.reg;
bool needs_dep[BRW_MAX_MRF];
assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2339,14 +2340,19 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
* instruction but a MOV that might have left us an outstanding
* dependency has more latency than a MOV.
*/
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg >= first_write_grf &&
- scan_inst->dst.reg < first_write_grf + write_len &&
- needs_dep[scan_inst->dst.reg - first_write_grf]) {
- inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
- needs_dep[scan_inst->dst.reg - first_write_grf] = false;
- if (scan_inst_16wide)
- needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false;
+ if (scan_inst->dst.file == GRF) {
+ for (int i = 0; i < scan_inst->regs_written(); i++) {
+ int reg = scan_inst->dst.reg + i * reg_size;
+
+ if (reg >= first_write_grf &&
+ reg < first_write_grf + write_len &&
+ needs_dep[reg - first_write_grf]) {
+ inst->insert_before(DEP_RESOLVE_MOV(reg));
+ needs_dep[reg - first_write_grf] = false;
+ if (scan_inst_16wide)
+ needs_dep[reg - first_write_grf + 1] = false;
+ }
+ }
}
/* Clear the flag for registers that actually got read (as expected). */