/* * Mesa 3-D graphics library * * Copyright (C) 2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Chia-I Wu */ #include "ilo_layout.h" enum { LAYOUT_TILING_NONE = 1 << GEN6_TILING_NONE, LAYOUT_TILING_X = 1 << GEN6_TILING_X, LAYOUT_TILING_Y = 1 << GEN6_TILING_Y, LAYOUT_TILING_W = 1 << GEN8_TILING_W, LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE | LAYOUT_TILING_X | LAYOUT_TILING_Y | LAYOUT_TILING_W) }; struct ilo_layout_params { const struct ilo_dev_info *dev; const struct pipe_resource *templ; bool compressed; unsigned h0, h1; unsigned max_x, max_y; }; static void layout_get_slice_size(const struct ilo_layout *layout, const struct ilo_layout_params *params, unsigned level, unsigned *width, unsigned *height) { const struct pipe_resource *templ = params->templ; unsigned w, h; w = u_minify(layout->width0, level); h = u_minify(layout->height0, level); /* * From the Sandy Bridge PRM, volume 1 part 1, page 114: * * "The dimensions of the mip maps are first determined by applying the * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, * if necessary, they are padded out to compression block boundaries." */ w = align(w, layout->block_width); h = align(h, layout->block_height); /* * From the Sandy Bridge PRM, volume 1 part 1, page 111: * * "If the surface is multisampled (4x), these values must be adjusted * as follows before proceeding: * * W_L = ceiling(W_L / 2) * 4 * H_L = ceiling(H_L / 2) * 4" * * From the Ivy Bridge PRM, volume 1 part 1, page 108: * * "If the surface is multisampled and it is a depth or stencil surface * or Multisampled Surface StorageFormat in SURFACE_STATE is * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before * proceeding: * * #samples W_L = H_L = * 2 ceiling(W_L / 2) * 4 HL [no adjustment] * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" * * For interleaved samples (4x), where pixels * * (x, y ) (x+1, y ) * (x, y+1) (x+1, y+1) * * would be is occupied by * * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) * * Thus the need to * * w = align(w, 2) * 2; * y = align(y, 2) * 2; */ if (layout->interleaved_samples) { switch (templ->nr_samples) { case 0: case 1: break; case 2: w = align(w, 2) * 2; break; case 4: w = align(w, 2) * 2; h = align(h, 2) * 2; break; case 8: w = align(w, 2) * 4; h = align(h, 2) * 2; break; case 16: w = align(w, 2) * 4; h = align(h, 2) * 4; break; default: assert(!"unsupported sample count"); break; } } /* * From the Ivy Bridge PRM, volume 1 part 1, page 108: * * "For separate stencil buffer, the width must be mutiplied by 2 and * height divided by 2..." * * To make things easier (for transfer), we will just double the stencil * stride in 3DSTATE_STENCIL_BUFFER. */ w = align(w, layout->align_i); h = align(h, layout->align_j); *width = w; *height = h; } static unsigned layout_get_num_layers(const struct ilo_layout *layout, const struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; unsigned num_layers = templ->array_size; /* samples of the same index are stored in a layer */ if (templ->nr_samples > 1 && !layout->interleaved_samples) num_layers *= templ->nr_samples; return num_layers; } static void layout_init_layer_height(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; unsigned num_layers; if (layout->walk != ILO_LAYOUT_WALK_LAYER) return; num_layers = layout_get_num_layers(layout, params); if (num_layers <= 1) return; /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The following equation is used for surface formats other than * compressed textures: * * QPitch = (h0 + h1 + 11j)" * * "The equation for compressed textures (BC* and FXT1 surface formats) * follows: * * QPitch = (h0 + h1 + 11j) / 4" * * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the * value calculated in the equation above, for every other odd Surface * Height starting from 1 i.e. 1,5,9,13" * * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: * * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth * buffer and stencil buffer have an implied value of ARYSPC_FULL): * * QPitch = (h0 + h1 + 12j) * QPitch = (h0 + h1 + 12j) / 4 (compressed) * * (There are many typos or missing words here...)" * * To access the N-th slice, an offset of (Stride * QPitch * N) is added to * the base address. The PRM divides QPitch by 4 for compressed formats * because the block height for those formats are 4, and it wants QPitch to * mean the number of memory rows, as opposed to texel rows, between * slices. Since we use texel rows everywhere, we do not need to divide * QPitch by 4. */ layout->layer_height = params->h0 + params->h1 + ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 && layout->height0 % 4 == 1) layout->layer_height += 4; params->max_y += layout->layer_height * (num_layers - 1); } static void layout_init_lods(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; unsigned cur_x, cur_y; unsigned lv; cur_x = 0; cur_y = 0; for (lv = 0; lv <= templ->last_level; lv++) { unsigned lod_w, lod_h; layout_get_slice_size(layout, params, lv, &lod_w, &lod_h); layout->lods[lv].x = cur_x; layout->lods[lv].y = cur_y; layout->lods[lv].slice_width = lod_w; layout->lods[lv].slice_height = lod_h; switch (layout->walk) { case ILO_LAYOUT_WALK_LOD: lod_h *= layout_get_num_layers(layout, params); if (lv == 1) cur_x += lod_w; else cur_y += lod_h; /* every LOD begins at tile boundaries */ if (templ->last_level > 0) { assert(layout->format == PIPE_FORMAT_S8_UINT); cur_x = align(cur_x, 64); cur_y = align(cur_y, 64); } break; case ILO_LAYOUT_WALK_LAYER: /* MIPLAYOUT_BELOW */ if (lv == 1) cur_x += lod_w; else cur_y += lod_h; break; case ILO_LAYOUT_WALK_3D: { const unsigned num_slices = u_minify(templ->depth0, lv); const unsigned num_slices_per_row = 1 << lv; const unsigned num_rows = (num_slices + num_slices_per_row - 1) / num_slices_per_row; lod_w *= num_slices_per_row; lod_h *= num_rows; cur_y += lod_h; } break; } if (params->max_x < layout->lods[lv].x + lod_w) params->max_x = layout->lods[lv].x + lod_w; if (params->max_y < layout->lods[lv].y + lod_h) params->max_y = layout->lods[lv].y + lod_h; } if (layout->walk == ILO_LAYOUT_WALK_LAYER) { params->h0 = layout->lods[0].slice_height; if (templ->last_level > 0) params->h1 = layout->lods[1].slice_height; else layout_get_slice_size(layout, params, 1, &cur_x, ¶ms->h1); } } static void layout_init_alignments(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; /* * From the Sandy Bridge PRM, volume 1 part 1, page 113: * * "surface format align_i align_j * YUV 4:2:2 formats 4 *see below * BC1-5 4 4 * FXT1 8 4 * all other formats 4 *see below" * * "- align_j = 4 for any depth buffer * - align_j = 2 for separate stencil buffer * - align_j = 4 for any render target surface is multisampled (4x) * - align_j = 4 for any render target surface with Surface Vertical * Alignment = VALIGN_4 * - align_j = 2 for any render target surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 2 for all other render target surface * - align_j = 2 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 4 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_4" * * From the Sandy Bridge PRM, volume 4 part 1, page 86: * * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if * the Surface Format is 96 bits per element (BPE)." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * PIPE_FORMAT_S8_UINT 4 2 * other depth/stencil formats 4 4 * 4x multisampled 4 4 * bpp 96 4 2 * others 4 2 or 4 */ /* * From the Ivy Bridge PRM, volume 1 part 1, page 110: * * "surface defined by surface format align_i align_j * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 * not D16_UNORM 4 4 * 3DSTATE_STENCIL_BUFFER N/A 8 8 * SURFACE_STATE BC*, ETC*, EAC* 4 4 * FXT1 8 4 * all others (set by SURFACE_STATE)" * * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "- This field (Surface Vertical Aligment) is intended to be set to * VALIGN_4 if the surface was rendered as a depth buffer, for a * multisampled (4x) render target, or for a multisampled (8x) * render target, since these surfaces support only alignment of 4. * - Use of VALIGN_4 for other surfaces is supported, but uses more * memory. * - This field must be set to VALIGN_4 for all tiled Y Render Target * surfaces. * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field * must be set to VALIGN_4." * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." * * "- This field (Surface Horizontal Aligment) is intended to be set to * HALIGN_8 only if the surface was rendered as a depth buffer with * Z16 format or a stencil buffer, since these surfaces support only * alignment of 8. * - Use of HALIGN_8 for other surfaces is supported, but uses more * memory. * - This field must be set to HALIGN_4 if the Surface Format is BC*. * - This field must be set to HALIGN_8 if the Surface Format is * FXT1." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * PIPE_FORMAT_Z16_UNORM 8 4 * PIPE_FORMAT_S8_UINT 8 8 * other depth/stencil formats 4 4 * 2x or 4x multisampled 4 or 8 4 * tiled Y 4 or 8 4 (if rt) * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 * others 4 or 8 2 or 4 */ if (params->compressed) { /* this happens to be the case */ layout->align_i = layout->block_width; layout->align_j = layout->block_height; } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) { switch (layout->format) { case PIPE_FORMAT_Z16_UNORM: layout->align_i = 8; layout->align_j = 4; break; case PIPE_FORMAT_S8_UINT: layout->align_i = 8; layout->align_j = 8; break; default: layout->align_i = 4; layout->align_j = 4; break; } } else { switch (layout->format) { case PIPE_FORMAT_S8_UINT: layout->align_i = 4; layout->align_j = 2; break; default: layout->align_i = 4; layout->align_j = 4; break; } } } else { const bool valign_4 = (templ->nr_samples > 1) || (ilo_dev_gen(params->dev) >= ILO_GEN(8)) || (ilo_dev_gen(params->dev) >= ILO_GEN(7) && layout->tiling == GEN6_TILING_Y && (templ->bind & PIPE_BIND_RENDER_TARGET)); if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4) assert(layout->format != PIPE_FORMAT_R32G32B32_FLOAT); layout->align_i = 4; layout->align_j = (valign_4) ? 4 : 2; } /* * the fact that align i and j are multiples of block width and height * respectively is what makes the size of the bo a multiple of the block * size, slices start at block boundaries, and many of the computations * work. */ assert(layout->align_i % layout->block_width == 0); assert(layout->align_j % layout->block_height == 0); /* make sure align() works */ assert(util_is_power_of_two(layout->align_i) && util_is_power_of_two(layout->align_j)); assert(util_is_power_of_two(layout->block_width) && util_is_power_of_two(layout->block_height)); } static unsigned layout_get_valid_tilings(const struct ilo_layout *layout, const struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; const enum pipe_format format = layout->format; unsigned valid_tilings = LAYOUT_TILING_ALL; /* * From the Sandy Bridge PRM, volume 1 part 2, page 32: * * "Display/Overlay Y-Major not supported. * X-Major required for Async Flips" */ if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) valid_tilings &= LAYOUT_TILING_X; /* * From the Sandy Bridge PRM, volume 3 part 2, page 158: * * "The cursor surface address must be 4K byte aligned. The cursor must * be in linear memory, it cannot be tiled." */ if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR))) valid_tilings &= LAYOUT_TILING_NONE; /* * From the Sandy Bridge PRM, volume 2 part 1, page 318: * * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear * Depth Buffer is not supported." * * "The Depth Buffer, if tiled, must use Y-Major tiling." * * From the Sandy Bridge PRM, volume 1 part 2, page 22: * * "W-Major Tile Format is used for separate stencil." */ if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_S8_UINT: valid_tilings &= LAYOUT_TILING_W; break; default: valid_tilings &= LAYOUT_TILING_Y; break; } } if (templ->bind & PIPE_BIND_RENDER_TARGET) { /* * From the Sandy Bridge PRM, volume 1 part 2, page 32: * * "NOTE: 128BPE Format Color buffer ( render target ) MUST be * either TileX or Linear." * * From the Haswell PRM, volume 5, page 32: * * "NOTE: 128 BPP format color buffer (render target) supports * Linear, TiledX and TiledY." */ if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && layout->block_size == 16) valid_tilings &= ~LAYOUT_TILING_Y; /* * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "This field (Surface Vertical Aligment) must be set to VALIGN_4 * for all tiled Y Render Target surfaces." * * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." */ if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && layout->format == PIPE_FORMAT_R32G32B32_FLOAT) valid_tilings &= ~LAYOUT_TILING_Y; valid_tilings &= ~LAYOUT_TILING_W; } if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { if (ilo_dev_gen(params->dev) < ILO_GEN(8)) valid_tilings &= ~LAYOUT_TILING_W; } /* no conflicting binding flags */ assert(valid_tilings); return valid_tilings; } static void layout_init_tiling(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; unsigned preferred_tilings; layout->valid_tilings = layout_get_valid_tilings(layout, params); preferred_tilings = layout->valid_tilings; /* no fencing nor BLT support */ if (preferred_tilings & ~LAYOUT_TILING_W) preferred_tilings &= ~LAYOUT_TILING_W; if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { /* * heuristically set a minimum width/height for enabling tiling */ if (layout->width0 < 64 && (preferred_tilings & ~LAYOUT_TILING_X)) preferred_tilings &= ~LAYOUT_TILING_X; if ((layout->width0 < 32 || layout->height0 < 16) && (layout->width0 < 16 || layout->height0 < 32) && (preferred_tilings & ~LAYOUT_TILING_Y)) preferred_tilings &= ~LAYOUT_TILING_Y; } else { /* force linear if we are not sure where the texture is bound to */ if (preferred_tilings & LAYOUT_TILING_NONE) preferred_tilings &= LAYOUT_TILING_NONE; } /* prefer tiled over linear */ if (preferred_tilings & LAYOUT_TILING_Y) layout->tiling = GEN6_TILING_Y; else if (preferred_tilings & LAYOUT_TILING_X) layout->tiling = GEN6_TILING_X; else if (preferred_tilings & LAYOUT_TILING_W) layout->tiling = GEN8_TILING_W; else layout->tiling = GEN6_TILING_NONE; } static void layout_init_walk_gen7(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; /* * It is not explicitly states, but render targets are expected to be * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected * to be IMS (samples interleaved). * * See "Multisampled Surface Storage Format" field of SURFACE_STATE. */ if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { /* * From the Ivy Bridge PRM, volume 1 part 1, page 111: * * "note that the depth buffer and stencil buffer have an implied * value of ARYSPC_FULL" */ layout->walk = (templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D : ILO_LAYOUT_WALK_LAYER; layout->interleaved_samples = true; } else { /* * From the Ivy Bridge PRM, volume 4 part 1, page 66: * * "If Multisampled Surface Storage Format is MSFMT_MSS and Number * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface * Array Spacing) must be set to ARYSPC_LOD0." * * As multisampled resources are not mipmapped, we never use * ARYSPC_FULL for them. */ if (templ->nr_samples > 1) assert(templ->last_level == 0); layout->walk = (templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D : (templ->last_level > 0) ? ILO_LAYOUT_WALK_LAYER : ILO_LAYOUT_WALK_LOD; layout->interleaved_samples = false; } } static void layout_init_walk_gen6(struct ilo_layout *layout, struct ilo_layout_params *params) { /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The separate stencil buffer does not support mip mapping, thus the * storage for LODs other than LOD 0 is not needed. The following * QPitch equation applies only to the separate stencil buffer: * * QPitch = h_0" * * GEN6 does not support compact spacing otherwise. */ layout->walk = (params->templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D : (layout->format == PIPE_FORMAT_S8_UINT) ? ILO_LAYOUT_WALK_LOD : ILO_LAYOUT_WALK_LAYER; /* GEN6 supports only interleaved samples */ layout->interleaved_samples = true; } static void layout_init_walk(struct ilo_layout *layout, struct ilo_layout_params *params) { if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) layout_init_walk_gen7(layout, params); else layout_init_walk_gen6(layout, params); } static void layout_init_size_and_format(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; enum pipe_format format = templ->format; bool require_separate_stencil = false; layout->width0 = templ->width0; layout->height0 = templ->height0; /* * From the Sandy Bridge PRM, volume 2 part 1, page 317: * * "This field (Separate Stencil Buffer Enable) must be set to the same * value (enabled or disabled) as Hierarchical Depth Buffer Enable." * * GEN7+ requires separate stencil buffers. */ if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) require_separate_stencil = true; else require_separate_stencil = (layout->aux == ILO_LAYOUT_AUX_HIZ); } switch (format) { case PIPE_FORMAT_ETC1_RGB8: format = PIPE_FORMAT_R8G8B8X8_UNORM; break; case PIPE_FORMAT_Z24_UNORM_S8_UINT: if (require_separate_stencil) { format = PIPE_FORMAT_Z24X8_UNORM; layout->separate_stencil = true; } break; case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: if (require_separate_stencil) { format = PIPE_FORMAT_Z32_FLOAT; layout->separate_stencil = true; } break; default: break; } layout->format = format; layout->block_width = util_format_get_blockwidth(format); layout->block_height = util_format_get_blockheight(format); layout->block_size = util_format_get_blocksize(format); params->compressed = util_format_is_compressed(format); } static bool layout_want_mcs(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; bool want_mcs = false; /* MCS is for RT on GEN7+ */ if (ilo_dev_gen(params->dev) < ILO_GEN(7)) return false; if (templ->target != PIPE_TEXTURE_2D || !(templ->bind & PIPE_BIND_RENDER_TARGET)) return false; /* * From the Ivy Bridge PRM, volume 4 part 1, page 77: * * "For Render Target and Sampling Engine Surfaces:If the surface is * multisampled (Number of Multisamples any value other than * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." * * "This field must be set to 0 for all SINT MSRTs when all RT channels * are not written" */ if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) { want_mcs = true; } else if (templ->nr_samples <= 1) { /* * From the Ivy Bridge PRM, volume 2 part 1, page 326: * * "When MCS is buffer is used for color clear of non-multisampler * render target, the following restrictions apply. * - Support is limited to tiled render targets. * - Support is for non-mip-mapped and non-array surface types * only. * - Clear is supported only on the full RT; i.e., no partial clear * or overlapping clears. * - MCS buffer for non-MSRT is supported only for RT formats * 32bpp, 64bpp and 128bpp. * ..." */ if (layout->tiling != GEN6_TILING_NONE && templ->last_level == 0 && templ->array_size == 1) { switch (layout->block_size) { case 4: case 8: case 16: want_mcs = true; break; default: break; } } } return want_mcs; } static bool layout_want_hiz(const struct ilo_layout *layout, const struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; const struct util_format_description *desc = util_format_description(templ->format); if (ilo_debug & ILO_DEBUG_NOHIZ) return false; if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) return false; if (!util_format_has_depth(desc)) return false; /* no point in having HiZ */ if (templ->usage == PIPE_USAGE_STAGING) return false; /* * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled * for every level. This is generally fine except on GEN6, where HiZ and * separate stencil are enabled and disabled at the same time. When the * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ * can result in incompatible formats. */ if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && templ->last_level) return false; return true; } static void layout_init_aux(struct ilo_layout *layout, struct ilo_layout_params *params) { if (layout_want_hiz(layout, params)) layout->aux = ILO_LAYOUT_AUX_HIZ; else if (layout_want_mcs(layout, params)) layout->aux = ILO_LAYOUT_AUX_MCS; } static void layout_align(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; int align_w = 1, align_h = 1, pad_h = 0; /* * From the Sandy Bridge PRM, volume 1 part 1, page 118: * * "To determine the necessary padding on the bottom and right side of * the surface, refer to the table in Section 7.18.3.4 for the i and j * parameters for the surface format in use. The surface must then be * extended to the next multiple of the alignment unit size in each * dimension, and all texels contained in this extended surface must * have valid GTT entries." * * "For cube surfaces, an additional two rows of padding are required * at the bottom of the surface. This must be ensured regardless of * whether the surface is stored tiled or linear. This is due to the * potential rotation of cache line orientation from memory to cache." * * "For compressed textures (BC* and FXT1 surface formats), padding at * the bottom of the surface is to an even compressed row, which is * equal to a multiple of 8 uncompressed texel rows. Thus, for padding * purposes, these surfaces behave as if j = 8 only for surface * padding purposes. The value of 4 for j still applies for mip level * alignment and QPitch calculation." */ if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { align_w = MAX2(align_w, layout->align_i); align_h = MAX2(align_h, layout->align_j); if (templ->target == PIPE_TEXTURE_CUBE) pad_h += 2; if (params->compressed) align_h = MAX2(align_h, layout->align_j * 2); } /* * From the Sandy Bridge PRM, volume 1 part 1, page 118: * * "If the surface contains an odd number of rows of data, a final row * below the surface must be allocated." */ if (templ->bind & PIPE_BIND_RENDER_TARGET) align_h = MAX2(align_h, 2); /* * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In * ilo_texture_can_enable_hiz(), we always return true for the first slice. * To avoid out-of-bound access, we have to pad. */ if (layout->aux == ILO_LAYOUT_AUX_HIZ && templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1) { align_w = MAX2(align_w, 8); align_h = MAX2(align_h, 4); } params->max_x = align(params->max_x, align_w); params->max_y = align(params->max_y + pad_h, align_h); } /* note that this may force the texture to be linear */ static void layout_calculate_bo_size(struct ilo_layout *layout, struct ilo_layout_params *params) { assert(params->max_x % layout->block_width == 0); assert(params->max_y % layout->block_height == 0); assert(layout->layer_height % layout->block_height == 0); layout->bo_stride = (params->max_x / layout->block_width) * layout->block_size; layout->bo_height = params->max_y / layout->block_height; while (true) { unsigned w = layout->bo_stride, h = layout->bo_height; unsigned align_w, align_h; /* * From the Haswell PRM, volume 5, page 163: * * "For linear surfaces, additional padding of 64 bytes is required * at the bottom of the surface. This is in addition to the padding * required above." */ if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) && (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) && layout->tiling == GEN6_TILING_NONE) h += (64 + layout->bo_stride - 1) / layout->bo_stride; /* * From the Sandy Bridge PRM, volume 4 part 1, page 81: * * "- For linear render target surfaces, the pitch must be a * multiple of the element size for non-YUV surface formats. * Pitch must be a multiple of 2 * element size for YUV surface * formats. * - For other linear surfaces, the pitch can be any multiple of * bytes. * - For tiled surfaces, the pitch must be a multiple of the tile * width." * * Different requirements may exist when the bo is used in different * places, but our alignments here should be good enough that we do not * need to check layout->templ->bind. */ switch (layout->tiling) { case GEN6_TILING_X: align_w = 512; align_h = 8; break; case GEN6_TILING_Y: align_w = 128; align_h = 32; break; case GEN8_TILING_W: /* * From the Sandy Bridge PRM, volume 1 part 2, page 22: * * "A 4KB tile is subdivided into 8-high by 8-wide array of * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 * bytes." */ align_w = 64; align_h = 64; break; default: assert(layout->tiling == GEN6_TILING_NONE); /* some good enough values */ align_w = 64; align_h = 2; break; } w = align(w, align_w); h = align(h, align_h); /* make sure the bo is mappable */ if (layout->tiling != GEN6_TILING_NONE) { /* * Usually only the first 256MB of the GTT is mappable. * * See also how intel_context::max_gtt_map_object_size is calculated. */ const size_t mappable_gtt_size = 256 * 1024 * 1024; /* * Be conservative. We may be able to switch from VALIGN_4 to * VALIGN_2 if the layout was Y-tiled, but let's keep it simple. */ if (mappable_gtt_size / w / 4 < h) { if (layout->valid_tilings & LAYOUT_TILING_NONE) { layout->tiling = GEN6_TILING_NONE; /* MCS support for non-MSRTs is limited to tiled RTs */ if (layout->aux == ILO_LAYOUT_AUX_MCS && params->templ->nr_samples <= 1) layout->aux = ILO_LAYOUT_AUX_NONE; continue; } else { ilo_warn("cannot force texture to be linear\n"); } } } layout->bo_stride = w; layout->bo_height = h; break; } } static void layout_calculate_hiz_size(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; const unsigned hz_align_j = 8; enum ilo_layout_walk_type hz_walk; unsigned hz_width, hz_height, lv; unsigned hz_clear_w, hz_clear_h; assert(layout->aux == ILO_LAYOUT_AUX_HIZ); assert(layout->walk == ILO_LAYOUT_WALK_LAYER || layout->walk == ILO_LAYOUT_WALK_3D); /* * From the Sandy Bridge PRM, volume 2 part 1, page 312: * * "The hierarchical depth buffer does not support the LOD field, it is * assumed by hardware to be zero. A separate hierarachical depth * buffer is required for each LOD used, and the corresponding * buffer's state delivered to hardware each time a new depth buffer * state with modified LOD is delivered." * * We will put all LODs in a single bo with ILO_LAYOUT_WALK_LOD. */ if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) hz_walk = layout->walk; else hz_walk = ILO_LAYOUT_WALK_LOD; /* * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge * PRM, volume 2 part 1, page 312-313. * * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a * memory row. */ switch (hz_walk) { case ILO_LAYOUT_WALK_LOD: { unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS]; unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS]; unsigned cur_tx, cur_ty; /* figure out the tile offsets of LODs */ hz_width = 0; hz_height = 0; cur_tx = 0; cur_ty = 0; for (lv = 0; lv <= templ->last_level; lv++) { unsigned tw, th; lod_tx[lv] = cur_tx; lod_ty[lv] = cur_ty; tw = align(layout->lods[lv].slice_width, 16); th = align(layout->lods[lv].slice_height, hz_align_j) * templ->array_size / 2; /* convert to Y-tiles */ tw = align(tw, 128) / 128; th = align(th, 32) / 32; if (hz_width < cur_tx + tw) hz_width = cur_tx + tw; if (hz_height < cur_ty + th) hz_height = cur_ty + th; if (lv == 1) cur_tx += tw; else cur_ty += th; } /* convert tile offsets to memory offsets */ for (lv = 0; lv <= templ->last_level; lv++) { layout->aux_offsets[lv] = (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; } hz_width *= 128; hz_height *= 32; } break; case ILO_LAYOUT_WALK_LAYER: { const unsigned h0 = align(params->h0, hz_align_j); const unsigned h1 = align(params->h1, hz_align_j); const unsigned htail = ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; const unsigned hz_qpitch = h0 + h1 + htail; hz_width = align(layout->lods[0].slice_width, 16); hz_height = hz_qpitch * templ->array_size / 2; if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) hz_height = align(hz_height, 8); layout->aux_layer_height = hz_qpitch; } break; case ILO_LAYOUT_WALK_3D: hz_width = align(layout->lods[0].slice_width, 16); hz_height = 0; for (lv = 0; lv <= templ->last_level; lv++) { const unsigned h = align(layout->lods[lv].slice_height, hz_align_j); /* according to the formula, slices are packed together vertically */ hz_height += h * u_minify(templ->depth0, lv); } hz_height /= 2; break; default: assert(!"unknown HiZ walk"); hz_width = 0; hz_height = 0; break; } /* * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks. * Experiments on Haswell show that aligning the RECTLIST primitive and * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be * aligned. */ hz_clear_w = 8; hz_clear_h = 4; switch (templ->nr_samples) { case 0: case 1: default: break; case 2: hz_clear_w /= 2; break; case 4: hz_clear_w /= 2; hz_clear_h /= 2; break; case 8: hz_clear_w /= 4; hz_clear_h /= 2; break; case 16: hz_clear_w /= 4; hz_clear_h /= 4; break; } for (lv = 0; lv <= templ->last_level; lv++) { if (u_minify(layout->width0, lv) % hz_clear_w || u_minify(layout->height0, lv) % hz_clear_h) break; layout->aux_enables |= 1 << lv; } /* we padded to allow this in layout_align() */ if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1) layout->aux_enables |= 0x1; /* align to Y-tile */ layout->aux_stride = align(hz_width, 128); layout->aux_height = align(hz_height, 32); } static void layout_calculate_mcs_size(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; int mcs_width, mcs_height, mcs_cpp; int downscale_x, downscale_y; assert(layout->aux == ILO_LAYOUT_AUX_MCS); if (templ->nr_samples > 1) { /* * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The * need of scale down could be that the clear rectangle is used to clear * the MCS instead of the RT. * * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The * 2x2 factor could come from that the hardware writes 128 bits (an * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 * pixel block in the RT. */ switch (templ->nr_samples) { case 2: case 4: downscale_x = 8; downscale_y = 2; mcs_cpp = 1; break; case 8: downscale_x = 2; downscale_y = 2; mcs_cpp = 4; break; case 16: downscale_x = 2; downscale_y = 1; mcs_cpp = 8; break; default: assert(!"unsupported sample count"); return; break; } /* * It also appears that the 2x2 subspans generated by the scaled-down * clear rectangle cannot be masked. The scale-down clear rectangle * thus must be aligned to 2x2, and we need to pad. */ mcs_width = align(layout->width0, downscale_x * 2); mcs_height = align(layout->height0, downscale_y * 2); } else { /* * From the Ivy Bridge PRM, volume 2 part 1, page 327: * * " Pixels Lines * TiledY RT CL * bpp * 32 8 4 * 64 4 4 * 128 2 4 * * TiledX RT CL * bpp * 32 16 2 * 64 8 2 * 128 4 2" * * This table and the two following tables define the RT alignments, the * clear rectangle alignments, and the clear rectangle scale factors. * Viewing the RT alignments as the sizes of 128-byte blocks, we can see * that the clear rectangle alignments are 16x32 blocks, and the clear * rectangle scale factors are 8x16 blocks. * * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the * RT. Similar to the MSAA cases, we can argue that an OWord maps to * 8x16 blocks. * * One problem with this reasoning is that a Y-tile in MCS has 8x32 * OWords and maps to 64x512 128-byte blocks. This differs from i965, * which says that a Y-tile maps to 128x256 blocks (\see * intel_get_non_msrt_mcs_alignment). It does not really change * anything except for the size of the allocated MCS. Let's see if we * hit out-of-bound access. */ switch (layout->tiling) { case GEN6_TILING_X: downscale_x = 64 / layout->block_size; downscale_y = 2; break; case GEN6_TILING_Y: downscale_x = 32 / layout->block_size; downscale_y = 4; break; default: assert(!"unsupported tiling mode"); return; break; } downscale_x *= 8; downscale_y *= 16; /* * From the Haswell PRM, volume 7, page 652: * * "Clear rectangle must be aligned to two times the number of * pixels in the table shown below due to 16X16 hashing across the * slice." * * The scaled-down clear rectangle must be aligned to 4x4 instead of * 2x2, and we need to pad. */ mcs_width = align(layout->width0, downscale_x * 4) / downscale_x; mcs_height = align(layout->height0, downscale_y * 4) / downscale_y; mcs_cpp = 16; /* an OWord */ } layout->aux_enables = (1 << (templ->last_level + 1)) - 1; /* align to Y-tile */ layout->aux_stride = align(mcs_width * mcs_cpp, 128); layout->aux_height = align(mcs_height, 32); } /** * The texutre is for transfer only. We can define our own layout to save * space. */ static void layout_init_for_transfer(struct ilo_layout *layout, const struct ilo_dev_info *dev, const struct pipe_resource *templ) { const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ? templ->depth0 : templ->array_size; unsigned layer_width, layer_height; assert(templ->last_level == 0); assert(templ->nr_samples <= 1); layout->aux = ILO_LAYOUT_AUX_NONE; layout->width0 = templ->width0; layout->height0 = templ->height0; layout->format = templ->format; layout->block_width = util_format_get_blockwidth(templ->format); layout->block_height = util_format_get_blockheight(templ->format); layout->block_size = util_format_get_blocksize(templ->format); layout->walk = ILO_LAYOUT_WALK_LOD; layout->valid_tilings = LAYOUT_TILING_NONE; layout->tiling = GEN6_TILING_NONE; layout->align_i = layout->block_width; layout->align_j = layout->block_height; assert(util_is_power_of_two(layout->block_width) && util_is_power_of_two(layout->block_height)); /* use packed layout */ layer_width = align(templ->width0, layout->align_i); layer_height = align(templ->height0, layout->align_j); layout->lods[0].slice_width = layer_width; layout->lods[0].slice_height = layer_height; layout->bo_stride = (layer_width / layout->block_width) * layout->block_size; layout->bo_stride = align(layout->bo_stride, 64); layout->bo_height = (layer_height / layout->block_height) * num_layers; } /** * Initialize the layout. Callers should zero-initialize \p layout first. */ void ilo_layout_init(struct ilo_layout *layout, const struct ilo_dev_info *dev, const struct pipe_resource *templ) { struct ilo_layout_params params; bool transfer_only; /* use transfer layout when the texture is never bound to GPU */ transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_TRANSFER_READ)); if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) { layout_init_for_transfer(layout, dev, templ); return; } memset(¶ms, 0, sizeof(params)); params.dev = dev; params.templ = templ; /* note that there are dependencies between these functions */ layout_init_aux(layout, ¶ms); layout_init_size_and_format(layout, ¶ms); layout_init_walk(layout, ¶ms); layout_init_tiling(layout, ¶ms); layout_init_alignments(layout, ¶ms); layout_init_lods(layout, ¶ms); layout_init_layer_height(layout, ¶ms); layout_align(layout, ¶ms); layout_calculate_bo_size(layout, ¶ms); switch (layout->aux) { case ILO_LAYOUT_AUX_HIZ: layout_calculate_hiz_size(layout, ¶ms); break; case ILO_LAYOUT_AUX_MCS: layout_calculate_mcs_size(layout, ¶ms); break; default: break; } } /** * Update the tiling mode and bo stride (for imported resources). */ bool ilo_layout_update_for_imported_bo(struct ilo_layout *layout, enum gen_surface_tiling tiling, unsigned bo_stride) { if (!(layout->valid_tilings & (1 << tiling))) return false; if ((tiling == GEN6_TILING_X && bo_stride % 512) || (tiling == GEN6_TILING_Y && bo_stride % 128) || (tiling == GEN8_TILING_W && bo_stride % 64)) return false; layout->tiling = tiling; layout->bo_stride = bo_stride; return true; }