From 81d7f33e30e7f54c5b721085057a53f9cd958fe2 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 14 Jul 2014 10:10:35 +0800 Subject: ilo: move away from drm_intel_bo_alloc_tiled We want to know the exact sizes of the BOs, and the driver has the knowledge to do so. Refactoring of the resource allocation code is needed though. --- src/gallium/drivers/ilo/ilo_resource.c | 529 ++++++++++++++++++--------------- src/gallium/drivers/ilo/ilo_resource.h | 15 +- src/gallium/drivers/ilo/ilo_transfer.c | 6 +- 3 files changed, 300 insertions(+), 250 deletions(-) (limited to 'src/gallium/drivers/ilo') diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index dbcc5f8a82e..61464a2c070 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -57,8 +57,17 @@ struct tex_layout { int qpitch; int width, height; + + int bo_stride, bo_height; + int hiz_stride, hiz_height; }; +/* + * We do not know if we will fail until we try to allocate the bo. + * So just set a limit on the texture size. + */ +static const size_t max_resource_size = 1u << 30; + static void tex_layout_init_qpitch(struct tex_layout *layout) { @@ -682,19 +691,65 @@ tex_layout_init(struct tex_layout *layout, } } -static bool -tex_layout_force_linear(struct tex_layout *layout) +static void +tex_layout_align(struct tex_layout *layout) { - if (!layout->can_be_linear) - return false; + int align_w = 1, align_h = 1, pad_h = 0; /* - * we may be able to switch from VALIGN_4 to VALIGN_2 when the layout was - * Y-tiled, but let's keep it simple + * From the Sandy Bridge PRM, volume 1 part 1, page 118: + * + * "To determine the necessary padding on the bottom and right side of + * the surface, refer to the table in Section 7.18.3.4 for the i and j + * parameters for the surface format in use. The surface must then be + * extended to the next multiple of the alignment unit size in each + * dimension, and all texels contained in this extended surface must + * have valid GTT entries." + * + * "For cube surfaces, an additional two rows of padding are required + * at the bottom of the surface. This must be ensured regardless of + * whether the surface is stored tiled or linear. This is due to the + * potential rotation of cache line orientation from memory to cache." + * + * "For compressed textures (BC* and FXT1 surface formats), padding at + * the bottom of the surface is to an even compressed row, which is + * equal to a multiple of 8 uncompressed texel rows. Thus, for padding + * purposes, these surfaces behave as if j = 8 only for surface + * padding purposes. The value of 4 for j still applies for mip level + * alignment and QPitch calculation." */ - layout->tiling = INTEL_TILING_NONE; + if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) { + align_w = MAX2(align_w, layout->align_i); + align_h = MAX2(align_h, layout->align_j); - return true; + if (layout->templ->target == PIPE_TEXTURE_CUBE) + pad_h += 2; + + if (layout->compressed) + align_h = MAX2(align_h, layout->align_j * 2); + } + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 118: + * + * "If the surface contains an odd number of rows of data, a final row + * below the surface must be allocated." + */ + if (layout->templ->bind & PIPE_BIND_RENDER_TARGET) + align_h = MAX2(align_h, 2); + + /* + * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In + * ilo_texture_can_enable_hiz(), we always return true for the first slice. + * To avoid out-of-bound access, we have to pad. + */ + if (layout->hiz) { + align_w = MAX2(align_w, 8); + align_h = MAX2(align_h, 4); + } + + layout->width = align(layout->width, align_w); + layout->height = align(layout->height + pad_h, align_h); } /** @@ -744,6 +799,8 @@ tex_layout_2d(struct tex_layout *layout) /* we did not take slices into consideration in the computation above */ layout->height += layout->qpitch * (num_slices - 1); + + tex_layout_align(layout); } /** @@ -790,127 +847,176 @@ tex_layout_3d(struct tex_layout *layout) if (lv == templ->last_level) layout->height = (level_y - slice_qpitch) + level_h; } + + tex_layout_align(layout); } -static void -tex_layout_validate(struct tex_layout *layout) +/* note that this may force the texture to be linear */ +static bool +tex_layout_calculate_bo_size(struct tex_layout *layout) { - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 118: - * - * "To determine the necessary padding on the bottom and right side of - * the surface, refer to the table in Section 7.18.3.4 for the i and j - * parameters for the surface format in use. The surface must then be - * extended to the next multiple of the alignment unit size in each - * dimension, and all texels contained in this extended surface must - * have valid GTT entries." - * - * "For cube surfaces, an additional two rows of padding are required - * at the bottom of the surface. This must be ensured regardless of - * whether the surface is stored tiled or linear. This is due to the - * potential rotation of cache line orientation from memory to cache." - * - * "For compressed textures (BC* and FXT1 surface formats), padding at - * the bottom of the surface is to an even compressed row, which is - * equal to a multiple of 8 uncompressed texel rows. Thus, for padding - * purposes, these surfaces behave as if j = 8 only for surface - * padding purposes. The value of 4 for j still applies for mip level - * alignment and QPitch calculation." - */ - if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) { - layout->width = align(layout->width, layout->align_i); - layout->height = align(layout->height, layout->align_j); + assert(layout->width % layout->block_width == 0); + assert(layout->height % layout->block_height == 0); + assert(layout->qpitch % layout->block_height == 0); - if (layout->templ->target == PIPE_TEXTURE_CUBE) - layout->height += 2; + layout->bo_stride = + (layout->width / layout->block_width) * layout->block_size; + layout->bo_height = layout->height / layout->block_height; - if (layout->compressed) - layout->height = align(layout->height, layout->align_j * 2); + while (true) { + int w = layout->bo_stride, h = layout->bo_height; + int align_w, align_h; + + /* + * From the Haswell PRM, volume 5, page 163: + * + * "For linear surfaces, additional padding of 64 bytes is required + * at the bottom of the surface. This is in addition to the padding + * required above." + */ + if (layout->dev->gen >= ILO_GEN(7.5) && + (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) && + layout->tiling == INTEL_TILING_NONE) { + layout->bo_height += + (64 + layout->bo_stride - 1) / layout->bo_stride; + } + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "- For linear render target surfaces, the pitch must be a + * multiple of the element size for non-YUV surface formats. + * Pitch must be a multiple of 2 * element size for YUV surface + * formats. + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + * - For tiled surfaces, the pitch must be a multiple of the tile + * width." + * + * Different requirements may exist when the bo is used in different + * places, but our alignments here should be good enough that we do not + * need to check layout->templ->bind. + */ + switch (layout->tiling) { + case INTEL_TILING_X: + align_w = 512; + align_h = 8; + break; + case INTEL_TILING_Y: + align_w = 128; + align_h = 32; + break; + default: + if (layout->format == PIPE_FORMAT_S8_UINT) { + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 22: + * + * "A 4KB tile is subdivided into 8-high by 8-wide array of + * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 + * bytes." + * + * Since we asked for INTEL_TILING_NONE instead of the non-existent + * INTEL_TILING_W, we want to align to W tiles here. + */ + align_w = 64; + align_h = 64; + } + else { + /* some good enough values */ + align_w = 64; + align_h = 2; + } + break; + } + + w = align(w, align_w); + h = align(h, align_h); + + /* make sure the bo is mappable */ + if (layout->tiling != INTEL_TILING_NONE) { + /* + * Usually only the first 256MB of the GTT is mappable. + * + * See also how intel_context::max_gtt_map_object_size is calculated. + */ + const size_t mappable_gtt_size = 256 * 1024 * 1024; + + /* + * Be conservative. We may be able to switch from VALIGN_4 to + * VALIGN_2 if the layout was Y-tiled, but let's keep it simple. + */ + if (mappable_gtt_size / w / 4 < h) { + if (layout->can_be_linear) { + layout->tiling = INTEL_TILING_NONE; + continue; + } + else { + ilo_warn("cannot force texture to be linear\n"); + } + } + } + + layout->bo_stride = w; + layout->bo_height = h; + break; } - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 118: - * - * "If the surface contains an odd number of rows of data, a final row - * below the surface must be allocated." - */ - if (layout->templ->bind & PIPE_BIND_RENDER_TARGET) - layout->height = align(layout->height, 2); + return (layout->bo_height <= max_resource_size / layout->bo_stride); +} + +static void +tex_layout_calculate_hiz_size(struct tex_layout *layout) +{ + const struct pipe_resource *templ = layout->templ; + const int hz_align_j = 8; + int hz_width, hz_height; + + if (!layout->hiz) + return; /* - * From the Sandy Bridge PRM, volume 1 part 2, page 22: - * - * "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for - * W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes." + * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge + * PRM, volume 2 part 1, page 312-313. * - * Since we ask for INTEL_TILING_NONE instead of the non-existent - * INTEL_TILING_W, we need to manually align the width and height to the - * tile boundaries. + * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a + * memory row. */ - if (layout->templ->format == PIPE_FORMAT_S8_UINT) { - layout->width = align(layout->width, 64); - layout->height = align(layout->height, 64); - } - /* - * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In - * ilo_texture_can_enable_hiz(), we always return true for the first slice. - * To avoid out-of-bound access, we have to pad. - */ - if (layout->hiz) { - layout->width = align(layout->width, 8); - layout->height = align(layout->height, 4); - } + hz_width = align(layout->levels[0].w, 16); - assert(layout->width % layout->block_width == 0); - assert(layout->height % layout->block_height == 0); - assert(layout->qpitch % layout->block_height == 0); -} + if (templ->target == PIPE_TEXTURE_3D) { + unsigned lv; -static size_t -tex_layout_estimate_size(const struct tex_layout *layout) -{ - unsigned stride, height; + hz_height = 0; - stride = (layout->width / layout->block_width) * layout->block_size; - height = layout->height / layout->block_height; + for (lv = 0; lv <= templ->last_level; lv++) { + const unsigned h = align(layout->levels[lv].h, hz_align_j); + hz_height += h * layout->levels[lv].d; + } - switch (layout->tiling) { - case INTEL_TILING_X: - stride = align(stride, 512); - height = align(height, 8); - break; - case INTEL_TILING_Y: - stride = align(stride, 128); - height = align(height, 32); - break; - default: - height = align(height, 2); - break; + hz_height /= 2; } + else { + const unsigned h0 = align(layout->levels[0].h, hz_align_j); + unsigned hz_qpitch = h0; - return stride * height; -} + if (layout->array_spacing_full) { + const unsigned h1 = align(layout->levels[1].h, hz_align_j); + const unsigned htail = + ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; -static void -tex_layout_apply(const struct tex_layout *layout, struct ilo_texture *tex) -{ - tex->bo_format = layout->format; + hz_qpitch += h1 + htail; + } - /* in blocks */ - tex->bo_width = layout->width / layout->block_width; - tex->bo_height = layout->height / layout->block_height; - tex->bo_cpp = layout->block_size; - tex->tiling = layout->tiling; + hz_height = hz_qpitch * templ->array_size / 2; - tex->compressed = layout->compressed; - tex->block_width = layout->block_width; - tex->block_height = layout->block_height; + if (layout->dev->gen >= ILO_GEN(7)) + hz_height = align(hz_height, 8); + } - tex->halign_8 = (layout->align_i == 8); - tex->valign_4 = (layout->align_j == 4); - tex->array_spacing_full = layout->array_spacing_full; - tex->interleaved = layout->interleaved; + /* align to Y-tile */ + layout->hiz_stride = align(hz_width, 128); + layout->hiz_height = align(hz_height, 32); } static void @@ -958,8 +1064,6 @@ tex_create_bo(struct ilo_texture *tex, struct ilo_screen *is = ilo_screen(tex->base.screen); const char *name; struct intel_bo *bo; - enum intel_tiling_mode tiling; - unsigned long pitch; switch (tex->base.target) { case PIPE_TEXTURE_1D: @@ -992,9 +1096,16 @@ tex_create_bo(struct ilo_texture *tex, } if (handle) { + enum intel_tiling_mode tiling; + unsigned long pitch; + bo = intel_winsys_import_handle(is->winsys, name, handle, - tex->bo_width, tex->bo_height, tex->bo_cpp, - &tiling, &pitch); + tex->bo_height, &tiling, &pitch); + + if (bo) { + tex->tiling = tiling; + tex->bo_stride = pitch; + } } else { const uint32_t initial_domain = @@ -1002,11 +1113,8 @@ tex_create_bo(struct ilo_texture *tex, PIPE_BIND_RENDER_TARGET)) ? INTEL_DOMAIN_RENDER : 0; - bo = intel_winsys_alloc_texture(is->winsys, name, - tex->bo_width, tex->bo_height, tex->bo_cpp, - tex->tiling, initial_domain, &pitch); - - tiling = tex->tiling; + bo = intel_winsys_alloc_bo(is->winsys, name, tex->tiling, + tex->bo_stride, tex->bo_height, initial_domain); } if (!bo) @@ -1016,8 +1124,6 @@ tex_create_bo(struct ilo_texture *tex, intel_bo_unreference(tex->bo); tex->bo = bo; - tex->tiling = tiling; - tex->bo_stride = pitch; return true; } @@ -1051,55 +1157,15 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout) { struct ilo_screen *is = ilo_screen(tex->base.screen); const struct pipe_resource *templ = layout->templ; - const int hz_align_j = 8; - unsigned hz_width, hz_height, lv; - unsigned long pitch; + unsigned lv; - /* - * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge - * PRM, volume 2 part 1, page 312-313. - * - * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a - * memory row. - */ - - hz_width = align(layout->levels[0].w, 16); - - if (templ->target == PIPE_TEXTURE_3D) { - hz_height = 0; - - for (lv = 0; lv <= templ->last_level; lv++) { - const unsigned h = align(layout->levels[lv].h, hz_align_j); - hz_height += h * layout->levels[lv].d; - } - - hz_height /= 2; - } - else { - const unsigned h0 = align(layout->levels[0].h, hz_align_j); - unsigned hz_qpitch = h0; - - if (layout->array_spacing_full) { - const unsigned h1 = align(layout->levels[1].h, hz_align_j); - const unsigned htail = - ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; - - hz_qpitch += h1 + htail; - } - - hz_height = hz_qpitch * templ->array_size / 2; - - if (layout->dev->gen >= ILO_GEN(7)) - hz_height = align(hz_height, 8); - } - - tex->hiz.bo = intel_winsys_alloc_texture(is->winsys, - "hiz texture", hz_width, hz_height, 1, - INTEL_TILING_Y, INTEL_DOMAIN_RENDER, &pitch); + tex->hiz.bo = intel_winsys_alloc_bo(is->winsys, "hiz texture", + INTEL_TILING_Y, layout->hiz_stride, layout->hiz_height, + INTEL_DOMAIN_RENDER); if (!tex->hiz.bo) return false; - tex->hiz.bo_stride = pitch; + tex->hiz.bo_stride = layout->hiz_stride; /* * From the Sandy Bridge PRM, volume 2 part 1, page 313-314: @@ -1177,6 +1243,42 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout) return true; } +static bool +tex_apply_layout(struct ilo_texture *tex, + const struct tex_layout *layout, + const struct winsys_handle *handle) +{ + tex->bo_format = layout->format; + + tex->tiling = layout->tiling; + tex->bo_stride = layout->bo_stride; + tex->bo_height = layout->bo_height; + + tex->block_width = layout->block_width; + tex->block_height = layout->block_height; + tex->block_size = layout->block_size; + + tex->halign_8 = (layout->align_i == 8); + tex->valign_4 = (layout->align_j == 4); + tex->array_spacing_full = layout->array_spacing_full; + tex->interleaved = layout->interleaved; + + if (!tex_create_bo(tex, handle)) + return false; + + /* allocate separate stencil resource */ + if (layout->separate_stencil && !tex_create_separate_stencil(tex)) + return false; + + if (layout->hiz && !tex_create_hiz(tex, layout)) { + /* Separate Stencil Buffer requires HiZ to be enabled */ + if (layout->dev->gen == ILO_GEN(6) && layout->separate_stencil) + return false; + } + + return true; +} + static void tex_destroy(struct ilo_texture *tex) { @@ -1186,7 +1288,9 @@ tex_destroy(struct ilo_texture *tex) if (tex->separate_s8) tex_destroy(tex->separate_s8); - intel_bo_unreference(tex->bo); + if (tex->bo) + intel_bo_unreference(tex->bo); + tex_free_slices(tex); FREE(tex); } @@ -1234,45 +1338,18 @@ tex_create(struct pipe_screen *screen, break; } - tex_layout_validate(&layout); - - /* make sure the bo can be mapped through GTT if tiled */ - if (layout.tiling != INTEL_TILING_NONE) { - /* - * Usually only the first 256MB of the GTT is mappable. - * - * See also how intel_context::max_gtt_map_object_size is calculated. - */ - const size_t mappable_gtt_size = 256 * 1024 * 1024; - const size_t size = tex_layout_estimate_size(&layout); - - /* be conservative */ - if (size > mappable_gtt_size / 4) - tex_layout_force_linear(&layout); - } - - tex_layout_apply(&layout, tex); - - if (!tex_create_bo(tex, handle)) { - tex_free_slices(tex); - FREE(tex); + if (!tex_layout_calculate_bo_size(&layout)) { + tex_destroy(tex); return NULL; } - /* allocate separate stencil resource */ - if (layout.separate_stencil && !tex_create_separate_stencil(tex)) { + tex_layout_calculate_hiz_size(&layout); + + if (!tex_apply_layout(tex, &layout, handle)) { tex_destroy(tex); return NULL; } - if (layout.hiz && !tex_create_hiz(tex, &layout)) { - /* Separate Stencil Buffer requires HiZ to be enabled */ - if (layout.dev->gen == ILO_GEN(6) && layout.separate_stencil) { - tex_destroy(tex); - return NULL; - } - } - return &tex->base; } @@ -1283,37 +1360,11 @@ tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle) int err; err = intel_winsys_export_handle(is->winsys, tex->bo, - tex->tiling, tex->bo_stride, handle); + tex->tiling, tex->bo_stride, tex->bo_height, handle); return !err; } -/** - * Estimate the texture size. For large textures, the errors should be pretty - * small. - */ -static size_t -tex_estimate_size(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - struct tex_layout layout; - - tex_layout_init(&layout, screen, templ, NULL); - - switch (templ->target) { - case PIPE_TEXTURE_3D: - tex_layout_3d(&layout); - break; - default: - tex_layout_2d(&layout); - break; - } - - tex_layout_validate(&layout); - - return tex_layout_estimate_size(&layout); -} - static bool buf_create_bo(struct ilo_buffer *buf) { @@ -1405,7 +1456,9 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ) buf->bo_size = align(buf->bo_size, 4096); } - if (!buf_create_bo(buf)) { + if (buf->bo_size < templ->width0 || + buf->bo_size > max_resource_size || + !buf_create_bo(buf)) { FREE(buf); return NULL; } @@ -1417,19 +1470,23 @@ static boolean ilo_can_create_resource(struct pipe_screen *screen, const struct pipe_resource *templ) { - /* - * We do not know if we will fail until we try to allocate the bo. - * So just set a limit on the texture size. - */ - const size_t max_size = 1 * 1024 * 1024 * 1024; - size_t size; + struct tex_layout layout; if (templ->target == PIPE_BUFFER) - size = templ->width0; - else - size = tex_estimate_size(screen, templ); + return (templ->width0 <= max_resource_size); + + tex_layout_init(&layout, screen, templ, NULL); + + switch (templ->target) { + case PIPE_TEXTURE_3D: + tex_layout_3d(&layout); + break; + default: + tex_layout_2d(&layout); + break; + } - return (size <= max_size); + return tex_layout_calculate_bo_size(&layout); } static struct pipe_resource * @@ -1555,7 +1612,7 @@ ilo_texture_get_slice_offset(const struct ilo_texture *tex, row_size = tex->bo_stride * tile_h; /* in bytes */ - x = s->x / tex->block_width * tex->bo_cpp; + x = s->x / tex->block_width * tex->block_size; y = s->y / tex->block_height; slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w); @@ -1572,7 +1629,7 @@ ilo_texture_get_slice_offset(const struct ilo_texture *tex, */ if (x_offset) { /* in pixels */ - x = (x % tile_w) / tex->bo_cpp * tex->block_width; + x = (x % tile_w) / tex->block_size * tex->block_width; assert(x % 4 == 0); *x_offset = x; diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h index f11cd94d64b..81563da7a2b 100644 --- a/src/gallium/drivers/ilo/ilo_resource.h +++ b/src/gallium/drivers/ilo/ilo_resource.h @@ -112,20 +112,13 @@ struct ilo_texture { enum pipe_format bo_format; struct intel_bo *bo; - /* - * These are the values passed to or returned from winsys for bo - * allocation. As such, - * - * - width and height are in blocks, - * - cpp is the block size in bytes, and - * - stride is the distance in bytes between two block rows. - */ - int bo_width, bo_height, bo_cpp, bo_stride; enum intel_tiling_mode tiling; + unsigned long bo_stride; /* distance between two block rows in bytes */ + unsigned long bo_height; - bool compressed; unsigned block_width; unsigned block_height; + unsigned block_size; /* true if the mip level alignments are stricter */ bool halign_8, valign_4; @@ -140,7 +133,7 @@ struct ilo_texture { struct { struct intel_bo *bo; - int bo_stride; + unsigned long bo_stride; } hiz; }; diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c index 456c839c953..4a38e29e076 100644 --- a/src/gallium/drivers/ilo/ilo_transfer.c +++ b/src/gallium/drivers/ilo/ilo_transfer.c @@ -196,7 +196,7 @@ tex_get_box_origin(const struct ilo_texture *tex, assert(x % tex->block_width == 0 && y % tex->block_height == 0); - *mem_x = x / tex->block_width * tex->bo_cpp; + *mem_x = x / tex->block_width * tex->block_size; *mem_y = y / tex->block_height; } @@ -506,7 +506,7 @@ tex_staging_sys_zs_read(struct ilo_context *ilo, d[dst_s8_pos] = s8_src[s8_offset]; d += dst_cpp; - x += tex->bo_cpp; + x += tex->block_size; s8_x++; } @@ -633,7 +633,7 @@ tex_staging_sys_zs_write(struct ilo_context *ilo, s8_dst[s8_offset] = s[src_s8_pos]; s += src_cpp; - x += tex->bo_cpp; + x += tex->block_size; s8_x++; } -- cgit v1.2.3