From a27c7d80afc3160a0face4b8781bf921229bc3cc Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 6 Apr 2012 12:14:28 -0700 Subject: i965: Stop lying about cpp and height of a stencil buffer. When using a separate stencil buffer, i965 requires that the pitch of the buffer (in the 3DSTATE_STENCIL_BUFFER command) be specified as 2x the actual pitch. Previously this was accomplished by doubling the "cpp" and "pitch" values stored in the intel_region data structure, and halving the height. However, this was confusing, and it led to a subtle (but benign) bug: since a stencil buffer is W-tiled, its true height must be aligned to a multiple of 64; we were accidentally aligning its faux height to a multiple of 64, causing memory to be wasted. Note that for window system stencil buffers, the DDX also doubles the cpp and pitch values. To facilitate fixing this DDX server bug in the future, we fix the cpp and pitch values we receive from the X server only if cpp has the "incorrect" value of 2. Acked-by: Kenneth Graunke Reviewed-by: Chad Versace v2: Clarify comments about the DDX. --- src/mesa/drivers/dri/i965/brw_misc_state.c | 11 ++++---- src/mesa/drivers/dri/i965/gen7_misc_state.c | 16 +++++++++++- src/mesa/drivers/dri/intel/intel_context.c | 36 +++++++++++++++++++------- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 24 +++-------------- src/mesa/drivers/dri/intel/intel_screen.c | 24 ++++++++++------- 5 files changed, 66 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 34793336111..62bcc93eed2 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -326,10 +326,6 @@ static void emit_depthbuffer(struct brw_context *brw) * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and * height. * - * Since the stencil buffer has quirky pitch requirements, its region - * was allocated with half height and double cpp. So we need - * a multiplier of 2 to obtain the surface's real height. - * * Enable the hiz bit because it and the separate stencil bit must have * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit * 1.21 "Separate Stencil Enable": @@ -435,7 +431,12 @@ static void emit_depthbuffer(struct brw_context *brw) struct intel_region *region = stencil_mt->region; BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(region->pitch * region->cpp - 1); + /* The stencil buffer has quirky pitch requirements. From Vol 2a, + * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": + * The pitch must be set to 2x the value computed based on width, as + * the stencil buffer is stored with two rows interleaved. + */ + OUT_BATCH(2 * region->pitch * region->cpp - 1); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 5da64349e5a..3a6144f2838 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -143,8 +143,22 @@ static void emit_depthbuffer(struct brw_context *brw) BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); + /* The stencil buffer has quirky pitch requirements. From the Graphics + * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing + * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+], + * field "Surface Pitch": + * + * The pitch must be set to 2x the value computed based on width, as + * the stencil buffer is stored with two rows interleaved. + * + * (Note that it is not 100% clear whether this intended to apply to + * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would + * imply that it doesn't), however the comment appears on a "DevIVB+" + * page (which would imply that it does). Experiments with the hardware + * indicate that it does. + */ OUT_BATCH(enabled | - (stencil_mt->region->pitch * stencil_mt->region->cpp - 1)); + (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1)); OUT_RELOC(stencil_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 16a98878881..b8472b6fd38 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1251,17 +1251,35 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, int buffer_width; int buffer_height; + int buffer_cpp = buffer->cpp; + int buffer_pitch = buffer->pitch; if (buffer->attachment == __DRI_BUFFER_STENCIL) { - /* The stencil buffer has quirky pitch requirements. From Section - * 2.11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": - * The pitch must be set to 2x the value computed based on width, as - * the stencil buffer is stored with two rows interleaved. + /* Stencil buffers use W tiling, a tiling format that the DRM functions + * don't properly account for. Therefore, when we allocate a stencil + * buffer that is private to Mesa (see intel_miptree_create), we round + * the height and width up to the next multiple of the tile size (64x64) + * and then ask DRM to allocate an untiled buffer. Consequently, the + * height and the width stored in the stencil buffer's region structure + * are always multiples of 64, even if the stencil buffer itself is + * smaller. * - * To satisfy the pitch requirement, the X driver allocated the region - * with the following dimensions. + * To avoid inconsistencies between how we represent private buffers and + * buffers shared with the window system, round up the height and width + * for window system buffers too. */ buffer_width = ALIGN(drawable->w, 64); - buffer_height = ALIGN(ALIGN(drawable->h, 2) / 2, 64); + buffer_height = ALIGN(drawable->h, 64); + + /* Versions 2.17.0 and earlier of xf86-video-intel (a.k.a. the DDX) lie + * and send cpp and pitch values that are two times too large for + * stencil buffers. Hopefully this will be fixed in a future version + * of xf86-video-intel, but for now detect the bug by checking if cpp + * is 2, and fixing cpp and pitch if it is. + */ + if (buffer_cpp == 2) { + buffer_cpp = 1; + buffer_pitch /= 2; + } } else { buffer_width = drawable->w; buffer_height = drawable->h; @@ -1279,10 +1297,10 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, struct intel_region *region = intel_region_alloc_for_handle(intel->intelScreen, - buffer->cpp, + buffer_cpp, buffer_width, buffer_height, - buffer->pitch / buffer->cpp, + buffer_pitch / buffer_cpp, buffer->name, buffer_name); if (!region) diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 636ee1797b0..91ebc8d4e4d 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -102,16 +102,6 @@ intel_miptree_create_internal(struct intel_context *intel, mt->depth0 = depth0; } - if (format == MESA_FORMAT_S8) { - /* The stencil buffer has quirky pitch requirements. From Vol 2a, - * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": - * The pitch must be set to 2x the value computed based on width, as - * the stencil buffer is stored with two rows interleaved. - */ - assert(intel->has_separate_stencil); - mt->cpp = 2; - } - if (!for_region && _mesa_is_depthstencil_format(_mesa_get_format_base_format(format)) && (intel->must_use_separate_stencil || @@ -188,20 +178,12 @@ intel_miptree_create(struct intel_context *intel, if (format == MESA_FORMAT_S8) { /* The stencil buffer is W tiled. However, we request from the kernel a - * non-tiled buffer because the GTT is incapable of W fencing. - * - * The stencil buffer has quirky pitch requirements. From Vol 2a, - * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": - * The pitch must be set to 2x the value computed based on width, as - * the stencil buffer is stored with two rows interleaved. - * To accomplish this, we resort to the nasty hack of doubling the drm - * region's cpp and halving its height. - * - * If we neglect to double the pitch, then render corruption occurs. + * non-tiled buffer because the GTT is incapable of W fencing. So round + * up the width and height to match the size of W tiles (64x64). */ tiling = I915_TILING_NONE; width0 = ALIGN(width0, 64); - height0 = ALIGN((height0 + 1) / 2, 64); + height0 = ALIGN(height0, 64); } mt = intel_miptree_create_internal(intel, target, format, diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 49e208c84bd..3fc1d3c630b 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -956,22 +956,28 @@ intelAllocateBuffer(__DRIscreen *screen, return NULL; if (attachment == __DRI_BUFFER_STENCIL) { - /* The stencil buffer has quirky pitch requirements. From Vol 2a, - * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": - * The pitch must be set to 2x the value computed based on width, as - * the stencil buffer is stored with two rows interleaved. - * To accomplish this, we resort to the nasty hack of doubling the - * region's cpp and halving its height. + /* Stencil buffers use W tiling, a tiling format that the DRM functions + * don't properly account for. Therefore, when we allocate a stencil + * buffer that is private to Mesa (see intel_miptree_create), we round + * the height and width up to the next multiple of the tile size (64x64) + * and then ask DRM to allocate an untiled buffer. Consequently, the + * height and the width stored in the stencil buffer's region structure + * are always multiples of 64, even if the stencil buffer itself is + * smaller. + * + * To avoid inconsistencies between how we represent private buffers and + * buffers shared with the window system, round up the height and width + * for window system buffers too. */ region_width = ALIGN(width, 64); - region_height = ALIGN(ALIGN(height, 2) / 2, 64); - region_cpp = format / 4; + region_height = ALIGN(height, 64); } else { region_width = width; region_height = height; - region_cpp = format / 8; } + region_cpp = format / 8; + intelBuffer->region = intel_region_alloc(intelScreen, tiling, region_cpp, -- cgit v1.2.3