summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2012-07-10 11:23:25 -0700
committerPaul Berry <[email protected]>2012-07-24 14:52:57 -0700
commitbac43b8bb7ace5401a2cc0d92f416340344df1bd (patch)
tree5d7d9ab3d8f4d74d453ae8fc3387b4612c923fe0
parent0aeb87023e64807734aee323e76f81796d525a36 (diff)
i965/msaa: Work around problems with null render targets on Gen6.
On Gen6, multisampled null render targets don't seem to work properly--they cause the GPU to hang. So, as a workaround, we render into a dummy color buffer. Fortunately this situation (multisampled rendering without a color buffer) is rare, and we don't have to waste too much memory, because we can give the workaround buffer a very small pitch. Fixes piglit test "EXT_framebuffer_multisample/no-color {2,4} depth-computed *" on Gen6. Reviewed-by: Chad Versace <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c47
2 files changed, 49 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index b4868fe3c26..c179c698b62 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -971,6 +971,12 @@ struct brw_context
drm_intel_bo *scratch_bo;
+ /**
+ * Buffer object used in place of multisampled null render targets on
+ * Gen6. See brw_update_null_renderbuffer_surface().
+ */
+ drm_intel_bo *multisampled_null_render_target_bo;
+
/** Offset in the program cache to the WM program */
uint32_t prog_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index c2e629ce225..9607828fd13 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -982,6 +982,10 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
uint32_t *surf;
+ unsigned surface_type = BRW_SURFACE_NULL;
+ drm_intel_bo *bo = NULL;
+ unsigned pitch_minus_1 = 0;
+ uint32_t multisampling_state = 0;
/* _NEW_BUFFERS */
const struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -989,7 +993,34 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
6 * 4, 32, &brw->wm.surf_offset[unit]);
- surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
+ if (fb->Visual.samples > 0) {
+ /* On Gen6, null render targets seem to cause GPU hangs when
+ * multisampling. So work around this problem by rendering into dummy
+ * color buffer.
+ *
+ * To decrease the amount of memory needed by the workaround buffer, we
+ * set its pitch to 128 bytes (the width of a Y tile). This means that
+ * the amount of memory needed for the workaround buffer is
+ * (width_in_tiles + height_in_tiles - 1) tiles.
+ *
+ * Note that since the workaround buffer will be interpreted by the
+ * hardware as an interleaved multisampled buffer, we need to compute
+ * width_in_tiles and height_in_tiles by dividing the width and height
+ * by 16 rather than the normal Y-tile size of 32.
+ */
+ unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
+ unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
+ unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
+ brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo,
+ size_needed);
+ bo = brw->wm.multisampled_null_render_target_bo;
+ surface_type = BRW_SURFACE_2D;
+ pitch_minus_1 = 127;
+ multisampling_state =
+ brw_get_surface_num_multisamples(fb->Visual.samples);
+ }
+
+ surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
if (intel->gen < 6) {
surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
@@ -997,7 +1028,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
}
- surf[1] = 0;
+ surf[1] = bo ? bo->offset : 0;
surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
(fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -1006,9 +1037,17 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
*
* If Surface Type is SURFTYPE_NULL, this field must be TRUE
*/
- surf[3] = BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
- surf[4] = 0;
+ surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
+ pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
+ surf[4] = multisampling_state;
surf[5] = 0;
+
+ if (bo) {
+ drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+ brw->wm.surf_offset[unit] + 4,
+ bo, 0,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+ }
}
/**