summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-11-13 15:54:41 -0800
committerEric Anholt <[email protected]>2012-11-25 18:25:26 -0800
commit244db0855c3c604211d7a868240ec927610881dd (patch)
treeeafd907c32d369a65f87866c9bc53dc1cf0bc4d1
parentcff4c948ed2708a6eb4b090ae87443a707cbd67f (diff)
i965/gen4: Fix LOD bias texturing since my fixed reg classes change.
We have a special case where non-shadow comparison with LOD requires using a SIMD16 vec4 in an 8-wide shader, which appears in the register allocator as a size 8 vgrf. Fixes assertions in various piglit tests and webgl conformance. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56521
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp28
1 files changed, 18 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index f87cbbcd68a..e83193e24ff 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -90,14 +90,15 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width)
* less some day.
*
* Additionally, on gen5 we need aligned pairs of registers for the PLN
- * instruction.
+ * instruction, and on gen4 we need 8 contiguous regs for workaround simd16
+ * texturing.
*
- * So we have a need for classes for 1, 2, and 4 registers currently, and
- * we add in '3' to make indexing the array easier (since we'll probably
- * want it for texturing later).
+ * So we have a need for classes for 1, 2, 4, and 8 registers currently,
+ * and we add in '3' to make indexing the array easier for the common case
+ * (since we'll probably want it for texturing later).
*/
- const int class_sizes[4] = {1, 2, 3, 4};
- const int class_count = 4;
+ const int class_count = 5;
+ const int class_sizes[class_count] = {1, 2, 3, 4, 8};
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
@@ -410,10 +411,17 @@ fs_visitor::assign_regs()
node_count);
for (int i = 0; i < this->virtual_grf_count; i++) {
- assert(this->virtual_grf_sizes[i] >= 1 &&
- this->virtual_grf_sizes[i] <= 4 &&
- "Register allocation relies on split_virtual_grfs()");
- int c = brw->wm.reg_sets[rsi].classes[this->virtual_grf_sizes[i] - 1];
+ int size = this->virtual_grf_sizes[i];
+ int c;
+
+ if (size == 8) {
+ c = 4;
+ } else {
+ assert(size >= 1 &&
+ size <= 4 &&
+ "Register allocation relies on split_virtual_grfs()");
+ c = brw->wm.reg_sets[rsi].classes[size - 1];
+ }
/* Special case: on pre-GEN6 hardware that supports PLN, the
* second operand of a PLN instruction needs to be an