/* * Mesa 3-D graphics library * * Copyright (C) 2012-2013 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Chia-I Wu */ #include "util/u_transfer.h" #include "ilo_cp.h" #include "ilo_context.h" #include "ilo_screen.h" #include "ilo_resource.h" /* use PIPE_BIND_CUSTOM to indicate MCS */ #define ILO_BIND_MCS PIPE_BIND_CUSTOM static struct intel_bo * alloc_buf_bo(const struct ilo_resource *res) { struct ilo_screen *is = ilo_screen(res->base.screen); struct intel_bo *bo; const char *name; const unsigned size = res->bo_width; switch (res->base.bind) { case PIPE_BIND_VERTEX_BUFFER: name = "vertex buffer"; break; case PIPE_BIND_INDEX_BUFFER: name = "index buffer"; break; case PIPE_BIND_CONSTANT_BUFFER: name = "constant buffer"; break; case PIPE_BIND_STREAM_OUTPUT: name = "stream output"; break; default: name = "unknown buffer"; break; } /* this is what a buffer supposed to be like */ assert(res->bo_width * res->bo_height * res->bo_cpp == size); assert(res->tiling == INTEL_TILING_NONE); assert(res->bo_stride == 0); if (res->handle) { bo = is->winsys->import_handle(is->winsys, name, res->bo_width, res->bo_height, res->bo_cpp, res->handle); /* since the bo is shared to us, make sure it meets the expectations */ if (bo) { assert(bo->get_size(res->bo) == size); assert(bo->get_tiling(res->bo) == res->tiling); assert(bo->get_pitch(res->bo) == res->bo_stride); } } else { bo = is->winsys->alloc_buffer(is->winsys, name, size, 0); } return bo; } static struct intel_bo * alloc_tex_bo(const struct ilo_resource *res) { struct ilo_screen *is = ilo_screen(res->base.screen); struct intel_bo *bo; const char *name; switch (res->base.target) { case PIPE_TEXTURE_1D: name = "1D texture"; break; case PIPE_TEXTURE_2D: name = "2D texture"; break; case PIPE_TEXTURE_3D: name = "3D texture"; break; case PIPE_TEXTURE_CUBE: name = "cube texture"; break; case PIPE_TEXTURE_RECT: name = "rectangle texture"; break; case PIPE_TEXTURE_1D_ARRAY: name = "1D array texture"; break; case PIPE_TEXTURE_2D_ARRAY: name = "2D array texture"; break; case PIPE_TEXTURE_CUBE_ARRAY: name = "cube array texture"; break; default: name ="unknown texture"; break; } if (res->handle) { bo = is->winsys->import_handle(is->winsys, name, res->bo_width, res->bo_height, res->bo_cpp, res->handle); } else { const bool for_render = (res->base.bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET)); const unsigned long flags = (for_render) ? INTEL_ALLOC_FOR_RENDER : 0; bo = is->winsys->alloc(is->winsys, name, res->bo_width, res->bo_height, res->bo_cpp, res->tiling, flags); } return bo; } static bool realloc_bo(struct ilo_resource *res) { struct intel_bo *old_bo = res->bo; /* a shared bo cannot be reallocated */ if (old_bo && res->handle) return false; if (res->base.target == PIPE_BUFFER) res->bo = alloc_buf_bo(res); else res->bo = alloc_tex_bo(res); if (!res->bo) { res->bo = old_bo; return false; } /* winsys may decide to use a different tiling */ res->tiling = res->bo->get_tiling(res->bo); res->bo_stride = res->bo->get_pitch(res->bo); if (old_bo) old_bo->unreference(old_bo); return true; } static void ilo_transfer_inline_write(struct pipe_context *pipe, struct pipe_resource *r, unsigned level, unsigned usage, const struct pipe_box *box, const void *data, unsigned stride, unsigned layer_stride) { struct ilo_context *ilo = ilo_context(pipe); struct ilo_resource *res = ilo_resource(r); int offset, size; bool will_be_busy; /* * Fall back to map(), memcpy(), and unmap(). We use this path for * unsynchronized write, as the buffer is likely to be busy and pwrite() * will stall. */ if (unlikely(res->base.target != PIPE_BUFFER) || (usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { u_default_transfer_inline_write(pipe, r, level, usage, box, data, stride, layer_stride); return; } /* * XXX With hardware context support, the bo may be needed by GPU without * being referenced by ilo->cp->bo. We have to flush unconditionally, and * that is bad. */ if (ilo->cp->hw_ctx) ilo_cp_flush(ilo->cp); will_be_busy = ilo->cp->bo->references(ilo->cp->bo, res->bo); /* see if we can avoid stalling */ if (will_be_busy || intel_bo_is_busy(res->bo)) { bool will_stall = true; if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* old data not needed so discard the old bo to avoid stalling */ if (realloc_bo(res)) will_stall = false; } else { /* * We could allocate a temporary bo to hold the data and emit * pipelined copy blit to move them to res->bo. But for now, do * nothing. */ } /* flush to make bo busy (so that pwrite() stalls as it should be) */ if (will_stall && will_be_busy) ilo_cp_flush(ilo->cp); } /* they should specify just an offset and a size */ assert(level == 0); assert(box->y == 0); assert(box->z == 0); assert(box->height == 1); assert(box->depth == 1); offset = box->x; size = box->width; res->bo->pwrite(res->bo, offset, size, data); } static void ilo_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct ilo_resource *res = ilo_resource(transfer->resource); res->bo->unmap(res->bo); pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } static void ilo_transfer_flush_region(struct pipe_context *pipe, struct pipe_transfer *transfer, const struct pipe_box *box) { } static bool map_resource(struct ilo_context *ilo, struct ilo_resource *res, unsigned usage) { struct ilo_screen *is = ilo_screen(res->base.screen); bool will_be_busy; int err; /* simply map unsynchronized */ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { err = res->bo->map_unsynchronized(res->bo); return !err; } /* * XXX With hardware context support, the bo may be needed by GPU without * being referenced by ilo->cp->bo. We have to flush unconditionally, and * that is bad. */ if (ilo->cp->hw_ctx) ilo_cp_flush(ilo->cp); will_be_busy = ilo->cp->bo->references(ilo->cp->bo, res->bo); /* see if we can avoid stalling */ if (will_be_busy || intel_bo_is_busy(res->bo)) { bool will_stall = true; if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* discard old bo and allocate a new one for mapping */ if (realloc_bo(res)) will_stall = false; } else if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { /* nothing we can do */ } else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { /* * We could allocate and return a system buffer here. When a region * of the buffer is explicitly flushed, we pwrite() the region to a * temporary bo and emit pipelined copy blit. * * For now, do nothing. */ } else if (usage & PIPE_TRANSFER_DISCARD_RANGE) { /* * We could allocate a temporary bo for mapping, and emit pipelined * copy blit upon unmapping. * * For now, do nothing. */ } if (will_stall) { if (usage & PIPE_TRANSFER_DONTBLOCK) return false; /* flush to make bo busy (so that map() stalls as it should be) */ if (will_be_busy) ilo_cp_flush(ilo->cp); } } /* prefer map() when there is the last-level cache */ if (res->tiling == INTEL_TILING_NONE && (is->dev.has_llc || (usage & PIPE_TRANSFER_READ))) err = res->bo->map(res->bo, (usage & PIPE_TRANSFER_WRITE)); else err = res->bo->map_gtt(res->bo); return !err; } static void * ilo_transfer_map(struct pipe_context *pipe, struct pipe_resource *r, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **transfer) { struct ilo_context *ilo = ilo_context(pipe); struct ilo_resource *res = ilo_resource(r); struct pipe_transfer *xfer; void *ptr; int x, y; xfer = MALLOC_STRUCT(pipe_transfer); if (!xfer) return NULL; if (!map_resource(ilo, res, usage)) { FREE(xfer); return NULL; } /* init transfer */ xfer->resource = NULL; pipe_resource_reference(&xfer->resource, &res->base); xfer->level = level; xfer->usage = usage; xfer->box = *box; /* stride for a block row, not a texel row */ xfer->stride = res->bo_stride; /* * we can walk through layers when the resource is a texture array or * when this is the first level of a 3D texture being mapped */ if (res->base.array_size > 1 || (res->base.target == PIPE_TEXTURE_3D && level == 0)) { const unsigned qpitch = res->slice_offsets[level][1].y - res->slice_offsets[level][0].y; assert(qpitch % res->block_height == 0); xfer->layer_stride = (qpitch / res->block_height) * xfer->stride; } else { xfer->layer_stride = 0; } x = res->slice_offsets[level][box->z].x; y = res->slice_offsets[level][box->z].y; x += box->x; y += box->y; /* in blocks */ assert(x % res->block_width == 0 && y % res->block_height == 0); x /= res->block_width; y /= res->block_height; ptr = res->bo->get_virtual(res->bo); ptr += y * res->bo_stride + x * res->bo_cpp; *transfer = xfer; return ptr; } static bool alloc_slice_offsets(struct ilo_resource *res) { int depth, lv; /* sum the depths of all levels */ depth = 0; for (lv = 0; lv <= res->base.last_level; lv++) depth += u_minify(res->base.depth0, lv); /* * There are (depth * res->base.array_size) slices. Either depth is one * (non-3D) or res->base.array_size is one (non-array), but it does not * matter. */ res->slice_offsets[0] = CALLOC(depth * res->base.array_size, sizeof(res->slice_offsets[0][0])); if (!res->slice_offsets[0]) return false; /* point to the respective positions in the buffer */ for (lv = 1; lv <= res->base.last_level; lv++) { res->slice_offsets[lv] = res->slice_offsets[lv - 1] + u_minify(res->base.depth0, lv - 1) * res->base.array_size; } return true; } static void free_slice_offsets(struct ilo_resource *res) { int lv; FREE(res->slice_offsets[0]); for (lv = 0; lv <= res->base.last_level; lv++) res->slice_offsets[lv] = NULL; } struct layout_tex_info { bool compressed; int block_width, block_height; int align_i, align_j; bool array_spacing_full; bool interleaved; int qpitch; struct { int w, h, d; } sizes[PIPE_MAX_TEXTURE_LEVELS]; }; /** * Prepare for texture layout. */ static void layout_tex_init(const struct ilo_resource *res, struct layout_tex_info *info) { struct ilo_screen *is = ilo_screen(res->base.screen); const enum intel_tiling_mode tiling = res->tiling; const struct pipe_resource *templ = &res->base; int last_level, lv; memset(info, 0, sizeof(*info)); info->compressed = util_format_is_compressed(templ->format); info->block_width = util_format_get_blockwidth(templ->format); info->block_height = util_format_get_blockheight(templ->format); /* * From the Sandy Bridge PRM, volume 1 part 1, page 113: * * "surface format align_i align_j * YUV 4:2:2 formats 4 *see below * BC1-5 4 4 * FXT1 8 4 * all other formats 4 *see below" * * "- align_j = 4 for any depth buffer * - align_j = 2 for separate stencil buffer * - align_j = 4 for any render target surface is multisampled (4x) * - align_j = 4 for any render target surface with Surface Vertical * Alignment = VALIGN_4 * - align_j = 2 for any render target surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 2 for all other render target surface * - align_j = 2 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 4 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_4" * * From the Sandy Bridge PRM, volume 4 part 1, page 86: * * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if * the Surface Format is 96 bits per element (BPE)." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * PIPE_FORMAT_S8_UINT 4 2 * other depth/stencil formats 4 4 * 4x multisampled 4 4 * bpp 96 4 2 * others 4 2 or 4 */ /* * From the Ivy Bridge PRM, volume 1 part 1, page 110: * * "surface defined by surface format align_i align_j * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 * not D16_UNORM 4 4 * 3DSTATE_STENCIL_BUFFER N/A 8 8 * SURFACE_STATE BC*, ETC*, EAC* 4 4 * FXT1 8 4 * all others (set by SURFACE_STATE)" * * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "- This field (Surface Vertical Aligment) is intended to be set to * VALIGN_4 if the surface was rendered as a depth buffer, for a * multisampled (4x) render target, or for a multisampled (8x) * render target, since these surfaces support only alignment of 4. * - Use of VALIGN_4 for other surfaces is supported, but uses more * memory. * - This field must be set to VALIGN_4 for all tiled Y Render Target * surfaces. * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field * must be set to VALIGN_4." * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." * * "- This field (Surface Horizontal Aligment) is intended to be set to * HALIGN_8 only if the surface was rendered as a depth buffer with * Z16 format or a stencil buffer, since these surfaces support only * alignment of 8. * - Use of HALIGN_8 for other surfaces is supported, but uses more * memory. * - This field must be set to HALIGN_4 if the Surface Format is BC*. * - This field must be set to HALIGN_8 if the Surface Format is * FXT1." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * PIPE_FORMAT_Z16_UNORM 8 4 * PIPE_FORMAT_S8_UINT 8 8 * other depth/stencil formats 4 or 8 4 * 2x or 4x multisampled 4 or 8 4 * tiled Y 4 or 8 4 (if rt) * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 * others 4 or 8 2 or 4 */ if (info->compressed) { /* this happens to be the case */ info->align_i = info->block_width; info->align_j = info->block_height; } else if (util_format_is_depth_or_stencil(templ->format)) { if (is->dev.gen >= ILO_GEN(7)) { switch (templ->format) { case PIPE_FORMAT_Z16_UNORM: info->align_i = 8; info->align_j = 4; break; case PIPE_FORMAT_S8_UINT: info->align_i = 8; info->align_j = 8; break; default: /* * From the Ivy Bridge PRM, volume 2 part 1, page 319: * * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and * Depth Coordinate Offset X) must be zero to ensure correct * alignment" * * We will make use of them and setting align_i to 8 help us meet * the requirement. */ info->align_i = (templ->last_level > 0) ? 8 : 4; info->align_j = 4; break; } } else { switch (templ->format) { case PIPE_FORMAT_S8_UINT: info->align_i = 4; info->align_j = 2; break; default: info->align_i = 4; info->align_j = 4; break; } } } else { const bool valign_4 = (templ->nr_samples > 1) || (is->dev.gen >= ILO_GEN(7) && (templ->bind & PIPE_BIND_RENDER_TARGET) && tiling == INTEL_TILING_Y); if (valign_4) assert(util_format_get_blocksizebits(templ->format) != 96); info->align_i = 4; info->align_j = (valign_4) ? 4 : 2; } /* * the fact that align i and j are multiples of block width and height * respectively is what makes the size of the bo a multiple of the block * size, slices start at block boundaries, and many of the computations * work. */ assert(info->align_i % info->block_width == 0); assert(info->align_j % info->block_height == 0); /* make sure align() works */ assert(util_is_power_of_two(info->align_i) && util_is_power_of_two(info->align_j)); assert(util_is_power_of_two(info->block_width) && util_is_power_of_two(info->block_height)); if (is->dev.gen >= ILO_GEN(7)) { /* * It is not explicitly states, but render targets are expected to be * UMS/CMS (samples non-interleaved) and depth/stencil buffers are * expected to be IMS (samples interleaved). * * See "Multisampled Surface Storage Format" field of SURFACE_STATE. */ if (util_format_is_depth_or_stencil(templ->format)) { info->interleaved = true; /* * From the Ivy Bridge PRM, volume 1 part 1, page 111: * * "note that the depth buffer and stencil buffer have an implied * value of ARYSPC_FULL" */ info->array_spacing_full = true; } else { info->interleaved = false; /* * From the Ivy Bridge PRM, volume 4 part 1, page 66: * * "If Multisampled Surface Storage Format is MSFMT_MSS and * Number of Multisamples is not MULTISAMPLECOUNT_1, this field * (Surface Array Spacing) must be set to ARYSPC_LOD0." * * As multisampled resources are not mipmapped, we never use * ARYSPC_FULL for them. */ if (templ->nr_samples > 1) assert(templ->last_level == 0); info->array_spacing_full = (templ->last_level > 0); } } else { /* GEN6 supports only interleaved samples */ info->interleaved = true; /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The separate stencil buffer does not support mip mapping, thus * the storage for LODs other than LOD 0 is not needed. The * following QPitch equation applies only to the separate stencil * buffer: * * QPitch = h_0" * * GEN6 does not support compact spacing otherwise. */ info->array_spacing_full = (templ->format != PIPE_FORMAT_S8_UINT); } last_level = templ->last_level; /* need at least 2 levels to compute full qpitch */ if (last_level == 0 && templ->array_size > 1 && info->array_spacing_full) last_level++; /* compute mip level sizes */ for (lv = 0; lv <= last_level; lv++) { int w, h, d; w = u_minify(templ->width0, lv); h = u_minify(templ->height0, lv); d = u_minify(templ->depth0, lv); /* * From the Sandy Bridge PRM, volume 1 part 1, page 114: * * "The dimensions of the mip maps are first determined by applying * the sizing algorithm presented in Non-Power-of-Two Mipmaps * above. Then, if necessary, they are padded out to compression * block boundaries." */ w = align(w, info->block_width); h = align(h, info->block_height); /* * From the Sandy Bridge PRM, volume 1 part 1, page 111: * * "If the surface is multisampled (4x), these values must be * adjusted as follows before proceeding: * * W_L = ceiling(W_L / 2) * 4 * H_L = ceiling(H_L / 2) * 4" * * From the Ivy Bridge PRM, volume 1 part 1, page 108: * * "If the surface is multisampled and it is a depth or stencil * surface or Multisampled Surface StorageFormat in SURFACE_STATE * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows * before proceeding: * * #samples W_L = H_L = * 2 ceiling(W_L / 2) * 4 HL [no adjustment] * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" * * For interleaved samples (4x), where pixels * * (x, y ) (x+1, y ) * (x, y+1) (x+1, y+1) * * would be is occupied by * * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) * * Thus the need to * * w = align(w, 2) * 2; * y = align(y, 2) * 2; */ if (info->interleaved) { switch (templ->nr_samples) { case 0: case 1: break; case 2: w = align(w, 2) * 2; break; case 4: w = align(w, 2) * 2; h = align(h, 2) * 2; break; case 8: w = align(w, 2) * 4; h = align(h, 2) * 2; break; case 16: w = align(w, 2) * 4; h = align(h, 2) * 4; break; default: assert(!"unsupported sample count"); break; } } info->sizes[lv].w = w; info->sizes[lv].h = h; info->sizes[lv].d = d; } if (templ->array_size > 1) { const int h0 = align(info->sizes[0].h, info->align_j); if (info->array_spacing_full) { const int h1 = align(info->sizes[1].h, info->align_j); /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The following equation is used for surface formats other than * compressed textures: * * QPitch = (h0 + h1 + 11j)" * * "The equation for compressed textures (BC* and FXT1 surface * formats) follows: * * QPitch = (h0 + h1 + 11j) / 4" * * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than * the value calculated in the equation above, for every other * odd Surface Height starting from 1 i.e. 1,5,9,13" * * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: * * "If Surface Array Spacing is set to ARYSPC_FULL (note that the * depth buffer and stencil buffer have an implied value of * ARYSPC_FULL): * * QPitch = (h0 + h1 + 12j) * QPitch = (h0 + h1 + 12j) / 4 (compressed) * * (There are many typos or missing words here...)" * * To access the N-th slice, an offset of (Stride * QPitch * N) is * added to the base address. The PRM divides QPitch by 4 for * compressed formats because the block height for those formats are * 4, and it wants QPitch to mean the number of memory rows, as * opposed to texel rows, between slices. Since we use texel rows in * res->slice_offsets, we do not need to divide QPitch by 4. */ info->qpitch = h0 + h1 + ((is->dev.gen >= ILO_GEN(7)) ? 12 : 11) * info->align_j; if (is->dev.gen == ILO_GEN(6) && templ->nr_samples > 1 && templ->height0 % 4 == 1) info->qpitch += 4; } else { info->qpitch = h0; } } } /** * Layout a 2D texture. */ static void layout_tex_2d(struct ilo_resource *res, const struct layout_tex_info *info) { const struct pipe_resource *templ = &res->base; unsigned int level_x, level_y, num_slices; int lv; res->bo_width = 0; res->bo_height = 0; level_x = 0; level_y = 0; for (lv = 0; lv <= templ->last_level; lv++) { const unsigned int level_w = info->sizes[lv].w; const unsigned int level_h = info->sizes[lv].h; int slice; for (slice = 0; slice < templ->array_size; slice++) { res->slice_offsets[lv][slice].x = level_x; /* slices are qpitch apart in Y-direction */ res->slice_offsets[lv][slice].y = level_y + info->qpitch * slice; } /* extend the size of the monolithic bo to cover this mip level */ if (res->bo_width < level_x + level_w) res->bo_width = level_x + level_w; if (res->bo_height < level_y + level_h) res->bo_height = level_y + level_h; /* MIPLAYOUT_BELOW */ if (lv == 1) level_x += align(level_w, info->align_i); else level_y += align(level_h, info->align_j); } num_slices = templ->array_size; /* samples of the same index are stored in a slice */ if (templ->nr_samples > 1 && !info->interleaved) num_slices *= templ->nr_samples; /* we did not take slices into consideration in the computation above */ res->bo_height += info->qpitch * (num_slices - 1); } /** * Layout a 3D texture. */ static void layout_tex_3d(struct ilo_resource *res, const struct layout_tex_info *info) { const struct pipe_resource *templ = &res->base; unsigned int level_y; int lv; res->bo_width = 0; res->bo_height = 0; level_y = 0; for (lv = 0; lv <= templ->last_level; lv++) { const unsigned int level_w = info->sizes[lv].w; const unsigned int level_h = info->sizes[lv].h; const unsigned int level_d = info->sizes[lv].d; const unsigned int slice_pitch = align(level_w, info->align_i); const unsigned int slice_qpitch = align(level_h, info->align_j); const unsigned int num_slices_per_row = 1 << lv; int slice; for (slice = 0; slice < level_d; slice += num_slices_per_row) { int i; for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) { res->slice_offsets[lv][slice + i].x = slice_pitch * i; res->slice_offsets[lv][slice + i].y = level_y; } /* move on to the next slice row */ level_y += slice_qpitch; } /* rightmost slice */ slice = MIN2(num_slices_per_row, level_d) - 1; /* extend the size of the monolithic bo to cover this slice */ if (res->bo_width < slice_pitch * slice + level_w) res->bo_width = slice_pitch * slice + level_w; if (lv == templ->last_level) res->bo_height = (level_y - slice_qpitch) + level_h; } } /** * Guess the texture size. For large textures, the errors are relative small. */ static size_t guess_tex_size(const struct pipe_resource *templ, enum intel_tiling_mode tiling) { int bo_width, bo_height, bo_stride; /* HALIGN_8 and VALIGN_4 */ bo_width = align(templ->width0, 8); bo_height = align(templ->height0, 4); if (templ->target == PIPE_TEXTURE_3D) { const int num_rows = util_next_power_of_two(templ->depth0); int lv, sum; sum = bo_height * templ->depth0; for (lv = 1; lv <= templ->last_level; lv++) sum += u_minify(bo_height, lv) * u_minify(num_rows, lv); bo_height = sum; } else if (templ->last_level > 0) { /* MIPLAYOUT_BELOW, ignore qpich */ bo_height = (bo_height + u_minify(bo_height, 1)) * templ->array_size; } bo_stride = util_format_get_stride(templ->format, bo_width); switch (tiling) { case INTEL_TILING_X: bo_stride = align(bo_stride, 512); bo_height = align(bo_height, 8); break; case INTEL_TILING_Y: bo_stride = align(bo_stride, 128); bo_height = align(bo_height, 32); break; default: bo_height = align(bo_height, 2); break; } return util_format_get_2d_size(templ->format, bo_stride, bo_height); } static enum intel_tiling_mode get_tex_tiling(const struct ilo_resource *res) { const struct pipe_resource *templ = &res->base; /* * From the Sandy Bridge PRM, volume 1 part 2, page 32: * * "Display/Overlay Y-Major not supported. * X-Major required for Async Flips" */ if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) return INTEL_TILING_X; /* * From the Sandy Bridge PRM, volume 3 part 2, page 158: * * "The cursor surface address must be 4K byte aligned. The cursor must * be in linear memory, it cannot be tiled." */ if (unlikely(templ->bind & PIPE_BIND_CURSOR)) return INTEL_TILING_NONE; /* * From the Ivy Bridge PRM, volume 4 part 1, page 76: * * "The MCS surface must be stored as Tile Y." */ if (templ->bind & ILO_BIND_MCS) return INTEL_TILING_Y; /* * From the Sandy Bridge PRM, volume 2 part 1, page 318: * * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear * Depth Buffer is not supported." * * "The Depth Buffer, if tiled, must use Y-Major tiling." */ if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { /* separate stencil uses W-tiling but we do not know how to specify that */ return (templ->format == PIPE_FORMAT_S8_UINT) ? INTEL_TILING_NONE : INTEL_TILING_Y; } if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { enum intel_tiling_mode tiling = INTEL_TILING_NONE; /* * From the Sandy Bridge PRM, volume 1 part 2, page 32: * * "NOTE: 128BPE Format Color buffer ( render target ) MUST be * either TileX or Linear." * * Also, heuristically set a minimum width/height for enabling tiling. */ if (util_format_get_blocksizebits(templ->format) == 128 && (templ->bind & PIPE_BIND_RENDER_TARGET) && templ->width0 >= 64) tiling = INTEL_TILING_X; else if ((templ->width0 >= 32 && templ->height0 >= 16) || (templ->width0 >= 16 && templ->height0 >= 32)) tiling = INTEL_TILING_Y; /* make sure the bo can be mapped through GTT if tiled */ if (tiling != INTEL_TILING_NONE) { /* * Usually only the first 256MB of the GTT is mappable. * * See also how intel_context::max_gtt_map_object_size is calculated. */ const size_t mappable_gtt_size = 256 * 1024 * 1024; const size_t size = guess_tex_size(templ, tiling); /* be conservative */ if (size > mappable_gtt_size / 4) tiling = INTEL_TILING_NONE; } return tiling; } return INTEL_TILING_NONE; } static void init_texture(struct ilo_resource *res) { const enum pipe_format format = res->base.format; struct layout_tex_info info; /* determine tiling first as it may affect the layout */ res->tiling = get_tex_tiling(res); layout_tex_init(res, &info); res->compressed = info.compressed; res->block_width = info.block_width; res->block_height = info.block_height; res->halign_8 = (info.align_i == 8); res->valign_4 = (info.align_j == 4); res->array_spacing_full = info.array_spacing_full; res->interleaved = info.interleaved; switch (res->base.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: layout_tex_2d(res, &info); break; case PIPE_TEXTURE_3D: layout_tex_3d(res, &info); break; default: assert(!"unknown resource target"); break; } /* * From the Sandy Bridge PRM, volume 1 part 2, page 22: * * "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for * W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes." * * Since we ask for INTEL_TILING_NONE instead lf INTEL_TILING_W, we need to * manually align the bo width and height to the tile boundaries. */ if (format == PIPE_FORMAT_S8_UINT) { res->bo_width = align(res->bo_width, 64); res->bo_height = align(res->bo_height, 64); } /* in blocks */ assert(res->bo_width % info.block_width == 0); assert(res->bo_height % info.block_height == 0); res->bo_width /= info.block_width; res->bo_height /= info.block_height; res->bo_cpp = util_format_get_blocksize(format); } static void init_buffer(struct ilo_resource *res) { res->bo_width = res->base.width0; res->bo_height = 1; res->bo_cpp = 1; res->bo_stride = 0; res->tiling = INTEL_TILING_NONE; res->compressed = false; res->block_width = 1; res->block_height = 1; res->halign_8 = false; res->valign_4 = false; res->array_spacing_full = false; res->interleaved = false; } static struct pipe_resource * create_resource(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *handle) { struct ilo_resource *res; res = CALLOC_STRUCT(ilo_resource); if (!res) return NULL; res->base = *templ; res->base.screen = screen; pipe_reference_init(&res->base.reference, 1); res->handle = handle; if (!alloc_slice_offsets(res)) { FREE(res); return NULL; } if (templ->target == PIPE_BUFFER) init_buffer(res); else init_texture(res); if (!realloc_bo(res)) { free_slice_offsets(res); FREE(res); return NULL; } return &res->base; } static boolean ilo_can_create_resource(struct pipe_screen *screen, const struct pipe_resource *templ) { /* * We do not know if we will fail until we try to allocate the bo. * So just set a limit on the texture size. */ const size_t max_size = 1 * 1024 * 1024 * 1024; const size_t size = guess_tex_size(templ, INTEL_TILING_Y); return (size <= max_size); } static struct pipe_resource * ilo_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) { return create_resource(screen, templ, NULL); } static struct pipe_resource * ilo_resource_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *handle) { return create_resource(screen, templ, handle); } static boolean ilo_resource_get_handle(struct pipe_screen *screen, struct pipe_resource *r, struct winsys_handle *handle) { struct ilo_resource *res = ilo_resource(r); int err; err = res->bo->export_handle(res->bo, handle); return !err; } static void ilo_resource_destroy(struct pipe_screen *screen, struct pipe_resource *r) { struct ilo_resource *res = ilo_resource(r); free_slice_offsets(res); res->bo->unreference(res->bo); FREE(res); } /** * Initialize resource-related functions. */ void ilo_init_resource_functions(struct ilo_screen *is) { is->base.can_create_resource = ilo_can_create_resource; is->base.resource_create = ilo_resource_create; is->base.resource_from_handle = ilo_resource_from_handle; is->base.resource_get_handle = ilo_resource_get_handle; is->base.resource_destroy = ilo_resource_destroy; } /** * Initialize transfer-related functions. */ void ilo_init_transfer_functions(struct ilo_context *ilo) { ilo->base.transfer_map = ilo_transfer_map; ilo->base.transfer_flush_region = ilo_transfer_flush_region; ilo->base.transfer_unmap = ilo_transfer_unmap; ilo->base.transfer_inline_write = ilo_transfer_inline_write; } /** * Return the offset (in bytes) to a slice within the bo. * * When tile_aligned is true, the offset is to the tile containing the start * address of the slice. x_offset and y_offset are offsets (in pixels) from * the tile start to slice start. x_offset is always a multiple of 4 and * y_offset is always a multiple of 2. */ unsigned ilo_resource_get_slice_offset(const struct ilo_resource *res, int level, int slice, bool tile_aligned, unsigned *x_offset, unsigned *y_offset) { const unsigned x = res->slice_offsets[level][slice].x / res->block_width; const unsigned y = res->slice_offsets[level][slice].y / res->block_height; unsigned tile_w, tile_h, tile_size, row_size; unsigned slice_offset; /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */ switch (res->tiling) { case INTEL_TILING_NONE: tile_w = res->bo_cpp; tile_h = 1; break; case INTEL_TILING_X: tile_w = 512; tile_h = 8; break; case INTEL_TILING_Y: tile_w = 128; tile_h = 32; break; default: assert(!"unknown tiling"); tile_w = res->bo_cpp; tile_h = 1; break; } tile_size = tile_w * tile_h; row_size = res->bo_stride * tile_h; /* * for non-tiled resources, this is equivalent to * * slice_offset = y * res->bo_stride + x * res->bo_cpp; */ slice_offset = row_size * (y / tile_h) + tile_size * (x * res->bo_cpp / tile_w); /* * Since res->bo_stride is a multiple of tile_w, slice_offset should be * aligned at this point. */ assert(slice_offset % tile_size == 0); if (tile_aligned) { /* * because of the possible values of align_i and align_j in * layout_tex_init(), x_offset must be a multiple of 4 and y_offset must * be a multiple of 2. */ if (x_offset) { assert(tile_w % res->bo_cpp == 0); *x_offset = (x % (tile_w / res->bo_cpp)) * res->block_width; assert(*x_offset % 4 == 0); } if (y_offset) { *y_offset = (y % tile_h) * res->block_height; assert(*y_offset % 2 == 0); } } else { const unsigned tx = (x * res->bo_cpp) % tile_w; const unsigned ty = y % tile_h; switch (res->tiling) { case INTEL_TILING_NONE: assert(tx == 0 && ty == 0); break; case INTEL_TILING_X: slice_offset += tile_w * ty + tx; break; case INTEL_TILING_Y: slice_offset += tile_h * 16 * (tx / 16) + ty * 16 + (tx % 16); break; } if (x_offset) *x_offset = 0; if (y_offset) *y_offset = 0; } return slice_offset; }