diff options
author | Paul Berry <[email protected]> | 2012-07-10 11:23:25 -0700 |
---|---|---|
committer | Paul Berry <[email protected]> | 2012-07-24 14:52:57 -0700 |
commit | bac43b8bb7ace5401a2cc0d92f416340344df1bd (patch) | |
tree | 5d7d9ab3d8f4d74d453ae8fc3387b4612c923fe0 /src/mesa | |
parent | 0aeb87023e64807734aee323e76f81796d525a36 (diff) |
i965/msaa: Work around problems with null render targets on Gen6.
On Gen6, multisampled null render targets don't seem to work
properly--they cause the GPU to hang. So, as a workaround, we render
into a dummy color buffer.
Fortunately this situation (multisampled rendering without a color
buffer) is rare, and we don't have to waste too much memory, because
we can give the workaround buffer a very small pitch.
Fixes piglit test "EXT_framebuffer_multisample/no-color {2,4}
depth-computed *" on Gen6.
Reviewed-by: Chad Versace <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 47 |
2 files changed, 49 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index b4868fe3c26..c179c698b62 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -971,6 +971,12 @@ struct brw_context drm_intel_bo *scratch_bo; + /** + * Buffer object used in place of multisampled null render targets on + * Gen6. See brw_update_null_renderbuffer_surface(). + */ + drm_intel_bo *multisampled_null_render_target_bo; + /** Offset in the program cache to the WM program */ uint32_t prog_offset; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c2e629ce225..9607828fd13 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -982,6 +982,10 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t *surf; + unsigned surface_type = BRW_SURFACE_NULL; + drm_intel_bo *bo = NULL; + unsigned pitch_minus_1 = 0; + uint32_t multisampling_state = 0; /* _NEW_BUFFERS */ const struct gl_framebuffer *fb = ctx->DrawBuffer; @@ -989,7 +993,34 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &brw->wm.surf_offset[unit]); - surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | + if (fb->Visual.samples > 0) { + /* On Gen6, null render targets seem to cause GPU hangs when + * multisampling. So work around this problem by rendering into dummy + * color buffer. + * + * To decrease the amount of memory needed by the workaround buffer, we + * set its pitch to 128 bytes (the width of a Y tile). This means that + * the amount of memory needed for the workaround buffer is + * (width_in_tiles + height_in_tiles - 1) tiles. + * + * Note that since the workaround buffer will be interpreted by the + * hardware as an interleaved multisampled buffer, we need to compute + * width_in_tiles and height_in_tiles by dividing the width and height + * by 16 rather than the normal Y-tile size of 32. + */ + unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16; + unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16; + unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; + brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo, + size_needed); + bo = brw->wm.multisampled_null_render_target_bo; + surface_type = BRW_SURFACE_2D; + pitch_minus_1 = 127; + multisampling_state = + brw_get_surface_num_multisamples(fb->Visual.samples); + } + + surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); if (intel->gen < 6) { surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT | @@ -997,7 +1028,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); } - surf[1] = 0; + surf[1] = bo ? bo->offset : 0; surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); @@ -1006,9 +1037,17 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) * * If Surface Type is SURFTYPE_NULL, this field must be TRUE */ - surf[3] = BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; - surf[4] = 0; + surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | + pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); + surf[4] = multisampling_state; surf[5] = 0; + + if (bo) { + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + brw->wm.surf_offset[unit] + 4, + bo, 0, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + } } /** |