summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-03-06 16:38:10 -0800
committerEric Anholt <[email protected]>2013-03-11 12:11:53 -0700
commitf10f5e498008636b110d01b2613e552da7793708 (patch)
tree573266a35378edc7e50c4aea48d91a6efc10f8c8 /src
parentf09a8e17e5a2d78c14d588db405e098b9b910a8a (diff)
i965/fs: Fix register allocation for uniform pull constants in 16-wide.
We were allowing a compressed instruction to write a register that contained the last use of a uniform pull constant (either UBO load or push constant spillover), so it would get half its values smashed. Since we need to see the actual instruction to decide this, move the pre-gen6 pixel_x/y logic here, which should improve the performance of register allocation since virtual_grf_interferes() is called more than once per instruction. NOTE: This is a candidate for the stable branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61317 Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp54
1 files changed, 31 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index db8f39732d2..4c7991dc54d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -190,6 +190,37 @@ fs_visitor::calculate_live_intervals()
int reg = inst->src[i].reg;
use[reg] = ip;
+
+ /* In most cases, a register can be written over safely by the
+ * same instruction that is its last use. For a single
+ * instruction, the sources are dereferenced before writing of the
+ * destination starts (naturally). This gets more complicated for
+ * simd16, because the instruction:
+ *
+ * mov(16) g4<1>F g4<8,8,1>F g6<8,8,1>F
+ *
+ * is actually decoded in hardware as:
+ *
+ * mov(8) g4<1>F g4<8,8,1>F g6<8,8,1>F
+ * mov(8) g5<1>F g5<8,8,1>F g7<8,8,1>F
+ *
+ * Which is safe. However, if we have uniform accesses
+ * happening, we get into trouble:
+ *
+ * mov(8) g4<1>F g4<0,1,0>F g6<8,8,1>F
+ * mov(8) g5<1>F g4<0,1,0>F g7<8,8,1>F
+ *
+ * Now our destination for the first instruction overwrote the
+ * second instruction's src0, and we get garbage for those 8
+ * pixels. There's a similar issue for the pre-gen6
+ * pixel_x/pixel_y, which are registers of 16-bit values and thus
+ * would get stomped by the first decode as well.
+ */
+ if (dispatch_width == 16 && (inst->src[i].smear ||
+ (this->pixel_x.reg == reg ||
+ this->pixel_y.reg == reg))) {
+ use[reg]++;
+ }
}
}
@@ -264,28 +295,5 @@ fs_visitor::virtual_grf_interferes(int a, int b)
int start = MAX2(a_def, b_def);
int end = MIN2(a_use, b_use);
- /* If the register is used to store 16 values of less than float
- * size (only the case for pixel_[xy]), then we can't allocate
- * another dword-sized thing to that register that would be used in
- * the same instruction. This is because when the GPU decodes (for
- * example):
- *
- * (declare (in ) vec4 gl_FragCoord@0x97766a0)
- * add(16) g6<1>F g6<8,8,1>UW 0.5F { align1 compr };
- *
- * it's actually processed as:
- * add(8) g6<1>F g6<8,8,1>UW 0.5F { align1 };
- * add(8) g7<1>F g6.8<8,8,1>UW 0.5F { align1 sechalf };
- *
- * so our second half values in g6 got overwritten in the first
- * half.
- */
- if (dispatch_width == 16 && (this->pixel_x.reg == a ||
- this->pixel_x.reg == b ||
- this->pixel_y.reg == a ||
- this->pixel_y.reg == b)) {
- return start <= end;
- }
-
return start < end;
}