summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2012-04-26 06:35:56 -0700
committerPaul Berry <[email protected]>2012-05-07 10:50:33 -0700
commit714b4f6184db84a738cf2d063980f0e19ab03b4b (patch)
tree6410db6577da33deaa752202a85c50376f769e00
parenta683012a80a3408b3b71f22b2a97d9eaaac11a46 (diff)
i965/Gen7: Work around GPU hangs due to misaligned depth coordinate offsets.
In i965 Gen7, Mesa has for a long time used the "depth coordinate offset X/Y" settings (in 3DSTATE_DEPTH_BUFFER) to cause the GPU to render to miplevels other than 0. Unfortunately, this doesn't work, because these offsets must be aligned to multiples of 8, and miplevels in the depth buffer are only guaranteed to be aligned to multiples of 4. When the offsets aren't aligned to a multiple of 8, the GPU sometimes hangs. As a temporary measure, to avoid GPU hangs, this patch smashes the 3 LSB's of "depth coordinate offset X/Y" to 0. This results in incorrect rendering to mipmapped depth textures, but that seems like a reasonable stopgap while we figure out a better solution. Avoids GPU hangs in piglit test "depthstencil-render-miplevels" at texture sizes that are not powers of 2. Reviewed-by: Chad Verace <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/gen7_hiz.c18
-rw-r--r--src/mesa/drivers/dri/i965/gen7_misc_state.c36
2 files changed, 54 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.c b/src/mesa/drivers/dri/i965/gen7_hiz.c
index 4538ec9f203..18272491be8 100644
--- a/src/mesa/drivers/dri/i965/gen7_hiz.c
+++ b/src/mesa/drivers/dri/i965/gen7_hiz.c
@@ -364,6 +364,24 @@ gen7_hiz_exec(struct intel_context *intel,
draw_x & ~tile_mask_x,
draw_y & ~tile_mask_y);
+ /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+ * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+ * Coordinate Offset X/Y":
+ *
+ * "The 3 LSBs of both offsets must be zero to ensure correct
+ * alignment"
+ *
+ * We have no guarantee that tile_x and tile_y are correctly aligned,
+ * since they are determined by the mipmap layout, which is only aligned
+ * to multiples of 4.
+ *
+ * So, to avoid hanging the GPU, just smash the low order 3 bits of
+ * tile_x and tile_y to 0. This is a temporary workaround until we come
+ * up with a better solution.
+ */
+ tile_x &= ~7;
+ tile_y &= ~7;
+
intel_emit_depth_stall_flushes(intel);
BEGIN_BATCH(7);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index d9beda8f36e..4a5b5a6ae2e 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -118,6 +118,24 @@ static void emit_depthbuffer(struct brw_context *brw)
tile_x = draw_x & tile_mask_x;
tile_y = draw_y & tile_mask_y;
+ /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+ * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+ * Coordinate Offset X/Y":
+ *
+ * "The 3 LSBs of both offsets must be zero to ensure correct
+ * alignment"
+ *
+ * We have no guarantee that tile_x and tile_y are correctly aligned,
+ * since they are determined by the mipmap layout, which is only
+ * aligned to multiples of 4.
+ *
+ * So, to avoid hanging the GPU, just smash the low order 3 bits of
+ * tile_x and tile_y to 0. This is a temporary workaround until we
+ * come up with a better solution.
+ */
+ tile_x &= ~7;
+ tile_y &= ~7;
+
/* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
dw1 |= (BRW_SURFACE_2D << 29);
dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
@@ -142,6 +160,24 @@ static void emit_depthbuffer(struct brw_context *brw)
tile_x = draw_x & tile_mask_x;
tile_y = draw_y & tile_mask_y;
+ /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+ * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+ * Coordinate Offset X/Y":
+ *
+ * "The 3 LSBs of both offsets must be zero to ensure correct
+ * alignment"
+ *
+ * We have no guarantee that tile_x and tile_y are correctly aligned,
+ * since they are determined by the mipmap layout, which is only aligned
+ * to multiples of 4.
+ *
+ * So, to avoid hanging the GPU, just smash the low order 3 bits of
+ * tile_x and tile_y to 0. This is a temporary workaround until we come
+ * up with a better solution.
+ */
+ tile_x &= ~7;
+ tile_y &= ~7;
+
offset = intel_region_get_aligned_offset(region,
draw_x & ~tile_mask_x,
draw_y & ~tile_mask_y);