diff options
Diffstat (limited to 'src/gallium/drivers/ilo')
29 files changed, 1956 insertions, 1590 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.am b/src/gallium/drivers/ilo/Makefile.am index a8785a5e8c4..1f14153748e 100644 --- a/src/gallium/drivers/ilo/Makefile.am +++ b/src/gallium/drivers/ilo/Makefile.am @@ -21,8 +21,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -AUTOMAKE_OPTIONS = subdir-objects - include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index e1bbb9a0781..7a7db938f92 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -1,5 +1,4 @@ C_SOURCES := \ - core/ilo_buffer.h \ core/ilo_builder.c \ core/ilo_builder.h \ core/ilo_builder_3d.h \ @@ -43,6 +42,7 @@ C_SOURCES := \ core/ilo_state_viewport.h \ core/ilo_state_zs.c \ core/ilo_state_zs.h \ + core/ilo_vma.h \ core/intel_winsys.h \ ilo_blit.c \ ilo_blit.h \ @@ -65,8 +65,6 @@ C_SOURCES := \ ilo_public.h \ ilo_query.c \ ilo_query.h \ - ilo_resource.c \ - ilo_resource.h \ ilo_render.c \ ilo_render.h \ ilo_render_gen.h \ @@ -76,6 +74,8 @@ C_SOURCES := \ ilo_render_gen8.c \ ilo_render_media.c \ ilo_render_surface.c \ + ilo_resource.c \ + ilo_resource.h \ ilo_screen.c \ ilo_screen.h \ ilo_shader.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 6d9e3699125..5efe9da2d22 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -39,6 +39,7 @@ #include "ilo_state_shader.h" #include "ilo_state_viewport.h" #include "ilo_state_zs.h" +#include "ilo_vma.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" @@ -674,9 +675,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT; - if (zs->depth_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, zs->depth_bo, - zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); + if (zs->z_vma) { + ilo_builder_batch_reloc64(builder, pos + 2, zs->z_vma->bo, + zs->z_vma->bo_offset + zs->depth[1], + (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } else { dw[1] = zs->depth[0]; @@ -691,9 +693,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, else dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT; - if (zs->depth_bo) { - ilo_builder_batch_reloc(builder, pos + 2, zs->depth_bo, - zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); + if (zs->z_vma) { + ilo_builder_batch_reloc(builder, pos + 2, zs->z_vma->bo, + zs->z_vma->bo_offset + zs->depth[1], + (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } } @@ -724,9 +727,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT; - if (zs->stencil_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, zs->stencil_bo, - zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE); + if (zs->s_vma) { + ilo_builder_batch_reloc64(builder, pos + 2, zs->s_vma->bo, + zs->s_vma->bo_offset + zs->stencil[1], + (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE); } } else { dw[1] = zs->stencil[0]; @@ -734,9 +738,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT; - if (zs->stencil_bo) { - ilo_builder_batch_reloc(builder, pos + 2, zs->stencil_bo, - zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE); + if (zs->s_vma) { + ilo_builder_batch_reloc(builder, pos + 2, zs->s_vma->bo, + zs->s_vma->bo_offset + zs->stencil[1], + (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE); } } } @@ -767,9 +772,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT; - if (zs->hiz_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_bo, - zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); + if (zs->hiz_vma) { + ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_vma->bo, + zs->hiz_vma->bo_offset + zs->hiz[1], + (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } else { dw[1] = zs->hiz[0]; @@ -777,9 +783,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT; - if (zs->hiz_bo) { - ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_bo, - zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); + if (zs->hiz_vma) { + ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_vma->bo, + zs->hiz_vma->bo_offset + zs->hiz[1], + (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE); } } } diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 8d30095e6f6..6e94fb25f1f 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -39,6 +39,7 @@ #include "ilo_state_surface.h" #include "ilo_state_urb.h" #include "ilo_state_vf.h" +#include "ilo_vma.h" #include "ilo_builder.h" static inline void @@ -318,8 +319,10 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, dw[3] = 0; if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - if (b->need_bo) - ilo_builder_batch_reloc64(builder, pos + 1, b->bo, b->vb[1], 0); + if (b->vma) { + ilo_builder_batch_reloc64(builder, pos + 1, b->vma->bo, + b->vma->bo_offset + b->vb[1], 0); + } dw[3] |= b->vb[2]; } else { @@ -331,9 +334,11 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, dw[3] |= vf->user_instancing[elem][1]; } - if (b->need_bo) { - ilo_builder_batch_reloc(builder, pos + 1, b->bo, b->vb[1], 0); - ilo_builder_batch_reloc(builder, pos + 2, b->bo, b->vb[2], 0); + if (b->vma) { + ilo_builder_batch_reloc(builder, pos + 1, b->vma->bo, + b->vma->bo_offset + b->vb[1], 0); + ilo_builder_batch_reloc(builder, pos + 2, b->vma->bo, + b->vma->bo_offset + b->vb[2], 0); } } @@ -429,9 +434,11 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - if (ib->need_bo) { - ilo_builder_batch_reloc(builder, pos + 1, ib->bo, ib->ib[1], 0); - ilo_builder_batch_reloc(builder, pos + 2, ib->bo, ib->ib[2], 0); + if (ib->vma) { + ilo_builder_batch_reloc(builder, pos + 1, ib->vma->bo, + ib->vma->bo_offset + ib->ib[1], 0); + ilo_builder_batch_reloc(builder, pos + 2, ib->vma->bo, + ib->vma->bo_offset + ib->ib[2], 0); } else { dw[1] = 0; dw[2] = 0; @@ -456,8 +463,9 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, dw[1] = ib->ib[0] | builder->mocs << GEN8_IB_DW1_MOCS__SHIFT; - if (ib->need_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, ib->bo, ib->ib[1], 0); + if (ib->vma) { + ilo_builder_batch_reloc64(builder, pos + 2, ib->vma->bo, + ib->vma->bo_offset + ib->ib[1], 0); } else { dw[2] = 0; dw[3] = 0; @@ -801,11 +809,11 @@ gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT | sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT; - if (sb->need_bo) { - ilo_builder_batch_reloc(builder, pos + 2, sb->bo, - sb->so_buf[0], INTEL_RELOC_WRITE); - ilo_builder_batch_reloc(builder, pos + 3, sb->bo, - sb->so_buf[1], INTEL_RELOC_WRITE); + if (sb->vma) { + ilo_builder_batch_reloc(builder, pos + 2, sb->vma->bo, + sb->vma->bo_offset + sb->so_buf[0], INTEL_RELOC_WRITE); + ilo_builder_batch_reloc(builder, pos + 3, sb->vma->bo, + sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE); } else { dw[2] = 0; dw[3] = 0; @@ -832,9 +840,9 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder, buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT | builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT; - if (sb->need_bo) { - ilo_builder_batch_reloc64(builder, pos + 2, sb->bo, - sb->so_buf[1], INTEL_RELOC_WRITE); + if (sb->vma) { + ilo_builder_batch_reloc64(builder, pos + 2, sb->vma->bo, + sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE); } else { dw[2] = 0; dw[3] = 0; @@ -842,9 +850,10 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder, dw[4] = sb->so_buf[2]; - if (sb->need_write_offset_bo) { - ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_bo, - sizeof(uint32_t) * buffer, INTEL_RELOC_WRITE); + if (sb->write_offset_vma) { + ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_vma->bo, + sb->write_offset_vma->bo_offset + sizeof(uint32_t) * buffer, + INTEL_RELOC_WRITE); } else { dw[5] = 0; dw[6] = 0; @@ -1254,14 +1263,15 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw); memcpy(dw, surf->surface, state_len << 2); - if (surf->bo) { + if (surf->vma) { const uint32_t mocs = (surf->scanout) ? (GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs; dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT; - ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo, - surf->surface[8], (surf->readonly) ? 0 : INTEL_RELOC_WRITE); + ilo_builder_surface_reloc64(builder, state_offset, 8, surf->vma->bo, + surf->vma->bo_offset + surf->surface[8], + (surf->readonly) ? 0 : INTEL_RELOC_WRITE); } } else { state_align = 32; @@ -1271,15 +1281,16 @@ gen6_SURFACE_STATE(struct ilo_builder *builder, ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw); memcpy(dw, surf->surface, state_len << 2); - if (surf->bo) { + if (surf->vma) { /* * For scanouts, we should not enable caching in LLC. Since we only * enable that on Gen8+, we are fine here. */ dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT; - ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, - surf->surface[1], (surf->readonly) ? 0 : INTEL_RELOC_WRITE); + ilo_builder_surface_reloc(builder, state_offset, 1, surf->vma->bo, + surf->vma->bo_offset + surf->surface[1], + (surf->readonly) ? 0 : INTEL_RELOC_WRITE); } } diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h index 0a7f7d9d3fe..da7db90a54b 100644 --- a/src/gallium/drivers/ilo/core/ilo_core.h +++ b/src/gallium/drivers/ilo/core/ilo_core.h @@ -29,15 +29,9 @@ #define ILO_CORE_H #include "pipe/p_compiler.h" -#include "pipe/p_defines.h" -#include "pipe/p_format.h" #include "util/u_debug.h" -#include "util/list.h" -#include "util/u_format.h" -#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_pointer.h" #endif /* ILO_CORE_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c index 0d837d8a9d5..fa547ac5c36 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.c +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -40,269 +40,356 @@ enum { IMAGE_TILING_W) }; -struct ilo_image_params { - const struct ilo_dev *dev; - const struct pipe_resource *templ; - unsigned valid_tilings; +struct ilo_image_layout { + enum ilo_image_walk_type walk; + bool interleaved_samples; - bool compressed; + uint8_t valid_tilings; + enum gen_surface_tiling tiling; - unsigned h0, h1; - unsigned max_x, max_y; + enum ilo_image_aux_type aux; + + int align_i; + int align_j; + + struct ilo_image_lod *lods; + int walk_layer_h0; + int walk_layer_h1; + int walk_layer_height; + int monolithic_width; + int monolithic_height; }; -static void -img_get_slice_size(const struct ilo_image *img, - const struct ilo_image_params *params, - unsigned level, unsigned *width, unsigned *height) +static enum ilo_image_walk_type +image_get_gen6_walk(const struct ilo_dev *dev, + const struct ilo_image_info *info) { - const struct pipe_resource *templ = params->templ; - unsigned w, h; + ILO_DEV_ASSERT(dev, 6, 6); - w = u_minify(img->width0, level); - h = u_minify(img->height0, level); + /* TODO we want LODs to be page-aligned */ + if (info->type == GEN6_SURFTYPE_3D) + return ILO_IMAGE_WALK_3D; /* - * From the Sandy Bridge PRM, volume 1 part 1, page 114: + * From the Sandy Bridge PRM, volume 1 part 1, page 115: * - * "The dimensions of the mip maps are first determined by applying the - * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, - * if necessary, they are padded out to compression block boundaries." + * "The separate stencil buffer does not support mip mapping, thus the + * storage for LODs other than LOD 0 is not needed. The following + * QPitch equation applies only to the separate stencil buffer: + * + * QPitch = h_0" + * + * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels + * when bound. */ - w = align(w, img->block_width); - h = align(h, img->block_height); + if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT) + return ILO_IMAGE_WALK_LOD; + + /* compact spacing is not supported otherwise */ + return ILO_IMAGE_WALK_LAYER; +} + +static enum ilo_image_walk_type +image_get_gen7_walk(const struct ilo_dev *dev, + const struct ilo_image_info *info) +{ + ILO_DEV_ASSERT(dev, 7, 8); + + if (info->type == GEN6_SURFTYPE_3D) + return ILO_IMAGE_WALK_3D; /* - * From the Sandy Bridge PRM, volume 1 part 1, page 111: - * - * "If the surface is multisampled (4x), these values must be adjusted - * as follows before proceeding: + * From the Ivy Bridge PRM, volume 1 part 1, page 111: * - * W_L = ceiling(W_L / 2) * 4 - * H_L = ceiling(H_L / 2) * 4" + * "note that the depth buffer and stencil buffer have an implied value + * of ARYSPC_FULL" * - * From the Ivy Bridge PRM, volume 1 part 1, page 108: + * From the Ivy Bridge PRM, volume 4 part 1, page 66: * - * "If the surface is multisampled and it is a depth or stencil surface - * or Multisampled Surface StorageFormat in SURFACE_STATE is - * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before - * proceeding: + * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of + * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array + * Spacing) must be set to ARYSPC_LOD0." + */ + if (info->sample_count > 1) + assert(info->level_count == 1); + return (info->bind_zs || info->level_count > 1) ? + ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD; +} + +static bool +image_get_gen6_interleaved_samples(const struct ilo_dev *dev, + const struct ilo_image_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * Gen6 supports only interleaved samples. It is not explicitly stated, + * but on Gen7+, render targets are expected to be UMS/CMS (samples + * non-interleaved) and depth/stencil buffers are expected to be IMS + * (samples interleaved). * - * #samples W_L = H_L = - * 2 ceiling(W_L / 2) * 4 HL [no adjustment] - * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 - * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 - * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" + * See "Multisampled Surface Storage Format" field of SURFACE_STATE. + */ + return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs); +} + +static uint8_t +image_get_gen6_valid_tilings(const struct ilo_dev *dev, + const struct ilo_image_info *info) +{ + uint8_t valid_tilings = IMAGE_TILING_ALL; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (info->valid_tilings) + valid_tilings &= info->valid_tilings; + + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 32: * - * For interleaved samples (4x), where pixels + * "Display/Overlay Y-Major not supported. + * X-Major required for Async Flips" + */ + if (unlikely(info->bind_scanout)) + valid_tilings &= IMAGE_TILING_X; + + /* + * From the Sandy Bridge PRM, volume 3 part 2, page 158: * - * (x, y ) (x+1, y ) - * (x, y+1) (x+1, y+1) + * "The cursor surface address must be 4K byte aligned. The cursor must + * be in linear memory, it cannot be tiled." + */ + if (unlikely(info->bind_cursor)) + valid_tilings &= IMAGE_TILING_NONE; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 318: * - * would be is occupied by + * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear + * Depth Buffer is not supported." * - * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) - * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) - * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) - * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) + * "The Depth Buffer, if tiled, must use Y-Major tiling." * - * Thus the need to + * From the Sandy Bridge PRM, volume 1 part 2, page 22: * - * w = align(w, 2) * 2; - * y = align(y, 2) * 2; + * "W-Major Tile Format is used for separate stencil." */ - if (img->interleaved_samples) { - switch (templ->nr_samples) { - case 0: - case 1: - break; - case 2: - w = align(w, 2) * 2; - break; - case 4: - w = align(w, 2) * 2; - h = align(h, 2) * 2; - break; - case 8: - w = align(w, 2) * 4; - h = align(h, 2) * 2; - break; - case 16: - w = align(w, 2) * 4; - h = align(h, 2) * 4; - break; - default: - assert(!"unsupported sample count"); - break; - } + if (info->bind_zs) { + if (info->format == GEN6_FORMAT_R8_UINT) + valid_tilings &= IMAGE_TILING_W; + else + valid_tilings &= IMAGE_TILING_Y; } - /* - * From the Ivy Bridge PRM, volume 1 part 1, page 108: - * - * "For separate stencil buffer, the width must be mutiplied by 2 and - * height divided by 2..." - * - * To make things easier (for transfer), we will just double the stencil - * stride in 3DSTATE_STENCIL_BUFFER. - */ - w = align(w, img->align_i); - h = align(h, img->align_j); + if (info->bind_surface_sampler || + info->bind_surface_dp_render || + info->bind_surface_dp_typed) { + /* + * From the Haswell PRM, volume 2d, page 233: + * + * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * (Tiled Surface) must be TRUE." + */ + if (info->sample_count > 1) + valid_tilings &= ~IMAGE_TILING_NONE; - *width = w; - *height = h; -} + if (ilo_dev_gen(dev) < ILO_GEN(8)) + valid_tilings &= ~IMAGE_TILING_W; + } -static unsigned -img_get_num_layers(const struct ilo_image *img, - const struct ilo_image_params *params) -{ - const struct pipe_resource *templ = params->templ; - unsigned num_layers = templ->array_size; + if (info->bind_surface_dp_render) { + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 32: + * + * "NOTE: 128BPE Format Color buffer ( render target ) MUST be + * either TileX or Linear." + * + * From the Haswell PRM, volume 5, page 32: + * + * "NOTE: 128 BPP format color buffer (render target) supports + * Linear, TiledX and TiledY." + */ + if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16) + valid_tilings &= ~IMAGE_TILING_Y; - /* samples of the same index are stored in a layer */ - if (templ->nr_samples > 1 && !img->interleaved_samples) - num_layers *= templ->nr_samples; + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 63: + * + * "This field (Surface Vertical Aligment) must be set to VALIGN_4 + * for all tiled Y Render Target surfaces." + * + * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." + * + * R32G32B32_FLOAT is not renderable and we only need an assert() here. + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) + assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT); + } - return num_layers; + return valid_tilings; } -static void -img_init_layer_height(struct ilo_image *img, - struct ilo_image_params *params) +static uint64_t +image_get_gen6_estimated_size(const struct ilo_dev *dev, + const struct ilo_image_info *info) { - const struct pipe_resource *templ = params->templ; - unsigned num_layers; + /* padding not considered */ + const uint64_t slice_size = info->width * info->height * + info->block_size / (info->block_width * info->block_height); + const uint64_t slice_count = + info->depth * info->array_size * info->sample_count; + const uint64_t estimated_size = slice_size * slice_count; - if (img->walk != ILO_IMAGE_WALK_LAYER) - return; + ILO_DEV_ASSERT(dev, 6, 8); - num_layers = img_get_num_layers(img, params); - if (num_layers <= 1) - return; + if (info->level_count == 1) + return estimated_size; + else + return estimated_size * 4 / 3; +} + +static enum gen_surface_tiling +image_get_gen6_tiling(const struct ilo_dev *dev, + const struct ilo_image_info *info, + uint8_t valid_tilings) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + switch (valid_tilings) { + case IMAGE_TILING_NONE: + return GEN6_TILING_NONE; + case IMAGE_TILING_X: + return GEN6_TILING_X; + case IMAGE_TILING_Y: + return GEN6_TILING_Y; + case IMAGE_TILING_W: + return GEN8_TILING_W; + default: + break; + } /* - * From the Sandy Bridge PRM, volume 1 part 1, page 115: - * - * "The following equation is used for surface formats other than - * compressed textures: - * - * QPitch = (h0 + h1 + 11j)" - * - * "The equation for compressed textures (BC* and FXT1 surface formats) - * follows: - * - * QPitch = (h0 + h1 + 11j) / 4" - * - * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the - * value calculated in the equation above, for every other odd Surface - * Height starting from 1 i.e. 1,5,9,13" - * - * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: + * X-tiling has the property that vertically adjacent pixels are usually in + * the same page. When the image size is less than a page, the image + * height is 1, or when the image is not accessed in blocks, there is no + * reason to tile. * - * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth - * buffer and stencil buffer have an implied value of ARYSPC_FULL): - * - * QPitch = (h0 + h1 + 12j) - * QPitch = (h0 + h1 + 12j) / 4 (compressed) - * - * (There are many typos or missing words here...)" - * - * To access the N-th slice, an offset of (Stride * QPitch * N) is added to - * the base address. The PRM divides QPitch by 4 for compressed formats - * because the block height for those formats are 4, and it wants QPitch to - * mean the number of memory rows, as opposed to texel rows, between - * slices. Since we use texel rows everywhere, we do not need to divide - * QPitch by 4. + * Y-tiling is similar, where vertically adjacent pixels are usually in the + * same cacheline. */ - img->walk_layer_height = params->h0 + params->h1 + - ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j; + if (valid_tilings & IMAGE_TILING_NONE) { + const uint64_t estimated_size = + image_get_gen6_estimated_size(dev, info); - if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 && - img->height0 % 4 == 1) - img->walk_layer_height += 4; + if (info->height == 1 || !(info->bind_surface_sampler || + info->bind_surface_dp_render || + info->bind_surface_dp_typed)) + return GEN6_TILING_NONE; + + if (estimated_size <= 64 || + estimated_size > info->prefer_linear_threshold) + return GEN6_TILING_NONE; + + if (estimated_size <= 2048) + valid_tilings &= ~IMAGE_TILING_X; + } - params->max_y += img->walk_layer_height * (num_layers - 1); + return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y : + (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X : + GEN6_TILING_NONE; } -static void -img_init_lods(struct ilo_image *img, - struct ilo_image_params *params) +static bool +image_get_gen6_hiz_enable(const struct ilo_dev *dev, + const struct ilo_image_info *info) { - const struct pipe_resource *templ = params->templ; - unsigned cur_x, cur_y; - unsigned lv; + ILO_DEV_ASSERT(dev, 6, 8); - cur_x = 0; - cur_y = 0; - for (lv = 0; lv <= templ->last_level; lv++) { - unsigned lod_w, lod_h; + /* depth buffer? */ + if (!info->bind_zs || + info->format == GEN6_FORMAT_R8_UINT || + info->interleaved_stencil) + return false; - img_get_slice_size(img, params, lv, &lod_w, &lod_h); + /* we want to be able to force 8x4 alignments */ + if (info->type == GEN6_SURFTYPE_1D) + return false; - img->lods[lv].x = cur_x; - img->lods[lv].y = cur_y; - img->lods[lv].slice_width = lod_w; - img->lods[lv].slice_height = lod_h; + if (info->aux_disable) + return false; - switch (img->walk) { - case ILO_IMAGE_WALK_LAYER: - /* MIPLAYOUT_BELOW */ - if (lv == 1) - cur_x += lod_w; - else - cur_y += lod_h; - break; - case ILO_IMAGE_WALK_LOD: - lod_h *= img_get_num_layers(img, params); - if (lv == 1) - cur_x += lod_w; - else - cur_y += lod_h; + if (ilo_debug & ILO_DEBUG_NOHIZ) + return false; - /* every LOD begins at tile boundaries */ - if (templ->last_level > 0) { - assert(img->format == PIPE_FORMAT_S8_UINT); - cur_x = align(cur_x, 64); - cur_y = align(cur_y, 64); - } - break; - case ILO_IMAGE_WALK_3D: - { - const unsigned num_slices = u_minify(templ->depth0, lv); - const unsigned num_slices_per_row = 1 << lv; - const unsigned num_rows = - (num_slices + num_slices_per_row - 1) / num_slices_per_row; + return true; +} - lod_w *= num_slices_per_row; - lod_h *= num_rows; +static bool +image_get_gen7_mcs_enable(const struct ilo_dev *dev, + const struct ilo_image_info *info, + enum gen_surface_tiling tiling) +{ + ILO_DEV_ASSERT(dev, 7, 8); - cur_y += lod_h; - } - break; - } + if (!info->bind_surface_sampler && !info->bind_surface_dp_render) + return false; - if (params->max_x < img->lods[lv].x + lod_w) - params->max_x = img->lods[lv].x + lod_w; - if (params->max_y < img->lods[lv].y + lod_h) - params->max_y = img->lods[lv].y + lod_h; + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 77: + * + * "For Render Target and Sampling Engine Surfaces:If the surface is + * multisampled (Number of Multisamples any value other than + * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." + * + * "This field must be set to 0 for all SINT MSRTs when all RT channels + * are not written" + */ + if (info->sample_count > 1) { + if (ilo_dev_gen(dev) < ILO_GEN(8)) + assert(!info->is_integer); + return true; } - if (img->walk == ILO_IMAGE_WALK_LAYER) { - params->h0 = img->lods[0].slice_height; + if (info->aux_disable) + return false; - if (templ->last_level > 0) - params->h1 = img->lods[1].slice_height; - else - img_get_slice_size(img, params, 1, &cur_x, ¶ms->h1); + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 326: + * + * "When MCS is buffer is used for color clear of non-multisampler + * render target, the following restrictions apply. + * - Support is limited to tiled render targets. + * - Support is for non-mip-mapped and non-array surface types only. + * - Clear is supported only on the full RT; i.e., no partial clear or + * overlapping clears. + * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, + * 64bpp and 128bpp. + * ..." + * + * How about SURFTYPE_3D? + */ + if (!info->bind_surface_dp_render || + tiling == GEN6_TILING_NONE || + info->level_count > 1 || + info->array_size > 1) + return false; + + switch (info->block_size) { + case 4: + case 8: + case 16: + return true; + default: + return false; } } static void -img_init_alignments(struct ilo_image *img, - const struct ilo_image_params *params) +image_get_gen6_alignments(const struct ilo_dev *dev, + const struct ilo_image_info *info, + int *align_i, int *align_j) { - const struct pipe_resource *templ = params->templ; + ILO_DEV_ASSERT(dev, 6, 6); /* * From the Sandy Bridge PRM, volume 1 part 1, page 113: @@ -335,13 +422,33 @@ img_init_alignments(struct ilo_image *img, * * align_i align_j * compressed formats block width block height - * PIPE_FORMAT_S8_UINT 4 2 + * GEN6_FORMAT_R8_UINT 4 2 * other depth/stencil formats 4 4 * 4x multisampled 4 4 * bpp 96 4 2 * others 4 2 or 4 */ + *align_i = (info->compressed) ? info->block_width : 4; + if (info->compressed) { + *align_j = info->block_height; + } else if (info->bind_zs) { + *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4; + } else { + *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2; + } +} + +static void +image_get_gen7_alignments(const struct ilo_dev *dev, + const struct ilo_image_info *info, + enum gen_surface_tiling tiling, + int *align_i, int *align_j) +{ + int i, j; + + ILO_DEV_ASSERT(dev, 7, 8); + /* * From the Ivy Bridge PRM, volume 1 part 1, page 110: * @@ -383,465 +490,301 @@ img_init_alignments(struct ilo_image *img, * * align_i align_j * compressed formats block width block height - * PIPE_FORMAT_Z16_UNORM 8 4 - * PIPE_FORMAT_S8_UINT 8 8 + * GEN6_FORMAT_R16_UNORM 8 4 + * GEN6_FORMAT_R8_UINT 8 8 * other depth/stencil formats 4 4 * 2x or 4x multisampled 4 or 8 4 * tiled Y 4 or 8 4 (if rt) - * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 + * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2 * others 4 or 8 2 or 4 */ - - if (params->compressed) { - /* this happens to be the case */ - img->align_i = img->block_width; - img->align_j = img->block_height; - } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) { - switch (img->format) { - case PIPE_FORMAT_Z16_UNORM: - img->align_i = 8; - img->align_j = 4; - break; - case PIPE_FORMAT_S8_UINT: - img->align_i = 8; - img->align_j = 8; - break; - default: - img->align_i = 4; - img->align_j = 4; - break; - } - } else { - switch (img->format) { - case PIPE_FORMAT_S8_UINT: - img->align_i = 4; - img->align_j = 2; - break; - default: - img->align_i = 4; - img->align_j = 4; - break; - } + if (info->compressed) { + i = info->block_width; + j = info->block_height; + } else if (info->bind_zs) { + switch (info->format) { + case GEN6_FORMAT_R16_UNORM: + i = 8; + j = 4; + break; + case GEN6_FORMAT_R8_UINT: + i = 8; + j = 8; + break; + default: + i = 4; + j = 4; + break; } } else { const bool valign_4 = - (templ->nr_samples > 1) || - (ilo_dev_gen(params->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(params->dev) >= ILO_GEN(7) && - img->tiling == GEN6_TILING_Y && - (templ->bind & PIPE_BIND_RENDER_TARGET)); - - if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && - ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4) - assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT); - - img->align_i = 4; - img->align_j = (valign_4) ? 4 : 2; - } + (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) || + (tiling == GEN6_TILING_Y && info->bind_surface_dp_render)); - /* - * the fact that align i and j are multiples of block width and height - * respectively is what makes the size of the bo a multiple of the block - * size, slices start at block boundaries, and many of the computations - * work. - */ - assert(img->align_i % img->block_width == 0); - assert(img->align_j % img->block_height == 0); + if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4) + assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT); - /* make sure align() works */ - assert(util_is_power_of_two(img->align_i) && - util_is_power_of_two(img->align_j)); - assert(util_is_power_of_two(img->block_width) && - util_is_power_of_two(img->block_height)); + i = 4; + j = (valign_4) ? 4 : 2; + } + + *align_i = i; + *align_j = j; } -static void -img_init_tiling(struct ilo_image *img, - const struct ilo_image_params *params) +static bool +image_init_gen6_hardware_layout(const struct ilo_dev *dev, + const struct ilo_image_info *info, + struct ilo_image_layout *layout) { - const struct pipe_resource *templ = params->templ; - unsigned preferred_tilings = params->valid_tilings; - - /* no fencing nor BLT support */ - if (preferred_tilings & ~IMAGE_TILING_W) - preferred_tilings &= ~IMAGE_TILING_W; - - if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { - /* - * heuristically set a minimum width/height for enabling tiling - */ - if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X)) - preferred_tilings &= ~IMAGE_TILING_X; - - if ((img->width0 < 32 || img->height0 < 16) && - (img->width0 < 16 || img->height0 < 32) && - (preferred_tilings & ~IMAGE_TILING_Y)) - preferred_tilings &= ~IMAGE_TILING_Y; - } else { - /* force linear if we are not sure where the texture is bound to */ - if (preferred_tilings & IMAGE_TILING_NONE) - preferred_tilings &= IMAGE_TILING_NONE; - } + ILO_DEV_ASSERT(dev, 6, 8); - /* prefer tiled over linear */ - if (preferred_tilings & IMAGE_TILING_Y) - img->tiling = GEN6_TILING_Y; - else if (preferred_tilings & IMAGE_TILING_X) - img->tiling = GEN6_TILING_X; - else if (preferred_tilings & IMAGE_TILING_W) - img->tiling = GEN8_TILING_W; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + layout->walk = image_get_gen7_walk(dev, info); else - img->tiling = GEN6_TILING_NONE; -} + layout->walk = image_get_gen6_walk(dev, info); -static void -img_init_walk_gen7(struct ilo_image *img, - const struct ilo_image_params *params) -{ - const struct pipe_resource *templ = params->templ; + layout->interleaved_samples = + image_get_gen6_interleaved_samples(dev, info); - /* - * It is not explicitly states, but render targets are expected to be - * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected - * to be IMS (samples interleaved). - * - * See "Multisampled Surface Storage Format" field of SURFACE_STATE. - */ - if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - /* - * From the Ivy Bridge PRM, volume 1 part 1, page 111: - * - * "note that the depth buffer and stencil buffer have an implied - * value of ARYSPC_FULL" - */ - img->walk = (templ->target == PIPE_TEXTURE_3D) ? - ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER; + layout->valid_tilings = image_get_gen6_valid_tilings(dev, info); + if (!layout->valid_tilings) + return false; - img->interleaved_samples = true; - } else { - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 66: - * - * "If Multisampled Surface Storage Format is MSFMT_MSS and Number - * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface - * Array Spacing) must be set to ARYSPC_LOD0." - * - * As multisampled resources are not mipmapped, we never use - * ARYSPC_FULL for them. - */ - if (templ->nr_samples > 1) - assert(templ->last_level == 0); + layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings); - img->walk = - (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D : - (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER : - ILO_IMAGE_WALK_LOD; + if (image_get_gen6_hiz_enable(dev, info)) + layout->aux = ILO_IMAGE_AUX_HIZ; + else if (ilo_dev_gen(dev) >= ILO_GEN(7) && + image_get_gen7_mcs_enable(dev, info, layout->tiling)) + layout->aux = ILO_IMAGE_AUX_MCS; + else + layout->aux = ILO_IMAGE_AUX_NONE; - img->interleaved_samples = false; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + image_get_gen7_alignments(dev, info, layout->tiling, + &layout->align_i, &layout->align_j); + } else { + image_get_gen6_alignments(dev, info, + &layout->align_i, &layout->align_j); } + + return true; } -static void -img_init_walk_gen6(struct ilo_image *img, - const struct ilo_image_params *params) +static bool +image_init_gen6_transfer_layout(const struct ilo_dev *dev, + const struct ilo_image_info *info, + struct ilo_image_layout *layout) { - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 115: - * - * "The separate stencil buffer does not support mip mapping, thus the - * storage for LODs other than LOD 0 is not needed. The following - * QPitch equation applies only to the separate stencil buffer: - * - * QPitch = h_0" - * - * GEN6 does not support compact spacing otherwise. - */ - img->walk = - (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D : - (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD : - ILO_IMAGE_WALK_LAYER; + ILO_DEV_ASSERT(dev, 6, 8); + + /* we can define our own layout to save space */ + layout->walk = ILO_IMAGE_WALK_LOD; + layout->interleaved_samples = false; + layout->valid_tilings = IMAGE_TILING_NONE; + layout->tiling = GEN6_TILING_NONE; + layout->aux = ILO_IMAGE_AUX_NONE; + layout->align_i = info->block_width; + layout->align_j = info->block_height; - /* GEN6 supports only interleaved samples */ - img->interleaved_samples = true; + return true; } static void -img_init_walk(struct ilo_image *img, - const struct ilo_image_params *params) +image_get_gen6_slice_size(const struct ilo_dev *dev, + const struct ilo_image_info *info, + const struct ilo_image_layout *layout, + uint8_t level, + int *width, int *height) { - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - img_init_walk_gen7(img, params); - else - img_init_walk_gen6(img, params); -} + int w, h; -static unsigned -img_get_valid_tilings(const struct ilo_image *img, - const struct ilo_image_params *params) -{ - const struct pipe_resource *templ = params->templ; - const enum pipe_format format = img->format; - unsigned valid_tilings = params->valid_tilings; + ILO_DEV_ASSERT(dev, 6, 8); - /* - * From the Sandy Bridge PRM, volume 1 part 2, page 32: - * - * "Display/Overlay Y-Major not supported. - * X-Major required for Async Flips" - */ - if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) - valid_tilings &= IMAGE_TILING_X; + w = u_minify(info->width, level); + h = u_minify(info->height, level); /* - * From the Sandy Bridge PRM, volume 3 part 2, page 158: + * From the Sandy Bridge PRM, volume 1 part 1, page 114: * - * "The cursor surface address must be 4K byte aligned. The cursor must - * be in linear memory, it cannot be tiled." + * "The dimensions of the mip maps are first determined by applying the + * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, + * if necessary, they are padded out to compression block boundaries." */ - if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR))) - valid_tilings &= IMAGE_TILING_NONE; + w = align(w, info->block_width); + h = align(h, info->block_height); /* - * From the Sandy Bridge PRM, volume 2 part 1, page 318: + * From the Sandy Bridge PRM, volume 1 part 1, page 111: * - * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear - * Depth Buffer is not supported." + * "If the surface is multisampled (4x), these values must be adjusted + * as follows before proceeding: * - * "The Depth Buffer, if tiled, must use Y-Major tiling." + * W_L = ceiling(W_L / 2) * 4 + * H_L = ceiling(H_L / 2) * 4" * - * From the Sandy Bridge PRM, volume 1 part 2, page 22: + * From the Ivy Bridge PRM, volume 1 part 1, page 108: * - * "W-Major Tile Format is used for separate stencil." + * "If the surface is multisampled and it is a depth or stencil surface + * or Multisampled Surface StorageFormat in SURFACE_STATE is + * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before + * proceeding: + * + * #samples W_L = H_L = + * 2 ceiling(W_L / 2) * 4 HL [no adjustment] + * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 + * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 + * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" + * + * For interleaved samples (4x), where pixels + * + * (x, y ) (x+1, y ) + * (x, y+1) (x+1, y+1) + * + * would be is occupied by + * + * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) + * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) + * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) + * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) + * + * Thus the need to + * + * w = align(w, 2) * 2; + * y = align(y, 2) * 2; */ - if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_S8_UINT: - valid_tilings &= IMAGE_TILING_W; + if (layout->interleaved_samples) { + switch (info->sample_count) { + case 1: + break; + case 2: + w = align(w, 2) * 2; + break; + case 4: + w = align(w, 2) * 2; + h = align(h, 2) * 2; + break; + case 8: + w = align(w, 2) * 4; + h = align(h, 2) * 2; + break; + case 16: + w = align(w, 2) * 4; + h = align(h, 2) * 4; break; default: - valid_tilings &= IMAGE_TILING_Y; + assert(!"unsupported sample count"); break; } } - if (templ->bind & PIPE_BIND_RENDER_TARGET) { - /* - * From the Sandy Bridge PRM, volume 1 part 2, page 32: - * - * "NOTE: 128BPE Format Color buffer ( render target ) MUST be - * either TileX or Linear." - * - * From the Haswell PRM, volume 5, page 32: - * - * "NOTE: 128 BPP format color buffer (render target) supports - * Linear, TiledX and TiledY." - */ - if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16) - valid_tilings &= ~IMAGE_TILING_Y; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 63: - * - * "This field (Surface Vertical Aligment) must be set to VALIGN_4 - * for all tiled Y Render Target surfaces." - * - * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." - */ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && - ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && - img->format == PIPE_FORMAT_R32G32B32_FLOAT) - valid_tilings &= ~IMAGE_TILING_Y; - - valid_tilings &= ~IMAGE_TILING_W; - } - - if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { - if (ilo_dev_gen(params->dev) < ILO_GEN(8)) - valid_tilings &= ~IMAGE_TILING_W; - } - - /* no conflicting binding flags */ - assert(valid_tilings); - - return valid_tilings; -} - -static void -img_init_size_and_format(struct ilo_image *img, - struct ilo_image_params *params) -{ - const struct pipe_resource *templ = params->templ; - enum pipe_format format = templ->format; - bool require_separate_stencil = false; - - img->target = templ->target; - img->width0 = templ->width0; - img->height0 = templ->height0; - img->depth0 = templ->depth0; - img->array_size = templ->array_size; - img->level_count = templ->last_level + 1; - img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 317: + * From the Ivy Bridge PRM, volume 1 part 1, page 108: * - * "This field (Separate Stencil Buffer Enable) must be set to the same - * value (enabled or disabled) as Hierarchical Depth Buffer Enable." + * "For separate stencil buffer, the width must be mutiplied by 2 and + * height divided by 2..." * - * GEN7+ requires separate stencil buffers. + * To make things easier (for transfer), we will just double the stencil + * stride in 3DSTATE_STENCIL_BUFFER. */ - if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - require_separate_stencil = true; - else - require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ); - } - - switch (format) { - case PIPE_FORMAT_ETC1_RGB8: - format = PIPE_FORMAT_R8G8B8X8_UNORM; - break; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - if (require_separate_stencil) { - format = PIPE_FORMAT_Z24X8_UNORM; - img->separate_stencil = true; - } - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - if (require_separate_stencil) { - format = PIPE_FORMAT_Z32_FLOAT; - img->separate_stencil = true; - } - break; - default: - break; - } + w = align(w, layout->align_i); + h = align(h, layout->align_j); - img->format = format; - img->block_width = util_format_get_blockwidth(format); - img->block_height = util_format_get_blockheight(format); - img->block_size = util_format_get_blocksize(format); - - params->valid_tilings = img_get_valid_tilings(img, params); - params->compressed = util_format_is_compressed(img->format); + *width = w; + *height = h; } -static bool -img_want_mcs(const struct ilo_image *img, - const struct ilo_image_params *params) +static int +image_get_gen6_layer_count(const struct ilo_dev *dev, + const struct ilo_image_info *info, + const struct ilo_image_layout *layout) { - const struct pipe_resource *templ = params->templ; - bool want_mcs = false; + int count = info->array_size; - /* MCS is for RT on GEN7+ */ - if (ilo_dev_gen(params->dev) < ILO_GEN(7)) - return false; + ILO_DEV_ASSERT(dev, 6, 8); - if (templ->target != PIPE_TEXTURE_2D || - !(templ->bind & PIPE_BIND_RENDER_TARGET)) - return false; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 77: - * - * "For Render Target and Sampling Engine Surfaces:If the surface is - * multisampled (Number of Multisamples any value other than - * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." - * - * "This field must be set to 0 for all SINT MSRTs when all RT channels - * are not written" - */ - if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) { - want_mcs = true; - } else if (templ->nr_samples <= 1) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 326: - * - * "When MCS is buffer is used for color clear of non-multisampler - * render target, the following restrictions apply. - * - Support is limited to tiled render targets. - * - Support is for non-mip-mapped and non-array surface types - * only. - * - Clear is supported only on the full RT; i.e., no partial clear - * or overlapping clears. - * - MCS buffer for non-MSRT is supported only for RT formats - * 32bpp, 64bpp and 128bpp. - * ..." - */ - if (img->tiling != GEN6_TILING_NONE && - templ->last_level == 0 && templ->array_size == 1) { - switch (img->block_size) { - case 4: - case 8: - case 16: - want_mcs = true; - break; - default: - break; - } - } - } + /* samples of the same index are stored in a layer */ + if (!layout->interleaved_samples) + count *= info->sample_count; - return want_mcs; + return count; } -static bool -img_want_hiz(const struct ilo_image *img, - const struct ilo_image_params *params) +static void +image_get_gen6_walk_layer_heights(const struct ilo_dev *dev, + const struct ilo_image_info *info, + struct ilo_image_layout *layout) { - const struct pipe_resource *templ = params->templ; - const struct util_format_description *desc = - util_format_description(templ->format); + ILO_DEV_ASSERT(dev, 6, 8); - if (ilo_debug & ILO_DEBUG_NOHIZ) - return false; + layout->walk_layer_h0 = layout->lods[0].slice_height; - /* we want 8x4 aligned levels */ - if (templ->target == PIPE_TEXTURE_1D) - return false; - - if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) - return false; - - if (!util_format_has_depth(desc)) - return false; + if (info->level_count > 1) { + layout->walk_layer_h1 = layout->lods[1].slice_height; + } else { + int dummy; + image_get_gen6_slice_size(dev, info, layout, 1, + &dummy, &layout->walk_layer_h1); + } - /* no point in having HiZ */ - if (templ->usage == PIPE_USAGE_STAGING) - return false; + if (image_get_gen6_layer_count(dev, info, layout) == 1) { + layout->walk_layer_height = 0; + return; + } /* - * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled - * for every level. This is generally fine except on GEN6, where HiZ and - * separate stencil are enabled and disabled at the same time. When the - * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ - * can result in incompatible formats. + * From the Sandy Bridge PRM, volume 1 part 1, page 115: + * + * "The following equation is used for surface formats other than + * compressed textures: + * + * QPitch = (h0 + h1 + 11j)" + * + * "The equation for compressed textures (BC* and FXT1 surface formats) + * follows: + * + * QPitch = (h0 + h1 + 11j) / 4" + * + * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the + * value calculated in the equation above, for every other odd Surface + * Height starting from 1 i.e. 1,5,9,13" + * + * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: + * + * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth + * buffer and stencil buffer have an implied value of ARYSPC_FULL): + * + * QPitch = (h0 + h1 + 12j) + * QPitch = (h0 + h1 + 12j) / 4 (compressed) + * + * (There are many typos or missing words here...)" + * + * To access the N-th slice, an offset of (Stride * QPitch * N) is added to + * the base address. The PRM divides QPitch by 4 for compressed formats + * because the block height for those formats are 4, and it wants QPitch to + * mean the number of memory rows, as opposed to texel rows, between + * slices. Since we use texel rows everywhere, we do not need to divide + * QPitch by 4. */ - if (ilo_dev_gen(params->dev) == ILO_GEN(6) && - templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && - templ->last_level) - return false; + layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 + + ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; - return true; -} - -static void -img_init_aux(struct ilo_image *img, - const struct ilo_image_params *params) -{ - if (img_want_hiz(img, params)) - img->aux.type = ILO_IMAGE_AUX_HIZ; - else if (img_want_mcs(img, params)) - img->aux.type = ILO_IMAGE_AUX_MCS; + if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 && + info->height % 4 == 1) + layout->walk_layer_height += 4; } static void -img_align(struct ilo_image *img, struct ilo_image_params *params) +image_get_gen6_monolithic_size(const struct ilo_dev *dev, + const struct ilo_image_info *info, + struct ilo_image_layout *layout, + int max_x, int max_y) { - const struct pipe_resource *templ = params->templ; int align_w = 1, align_h = 1, pad_h = 0; + ILO_DEV_ASSERT(dev, 6, 8); + /* * From the Sandy Bridge PRM, volume 1 part 1, page 118: * @@ -864,15 +807,15 @@ img_align(struct ilo_image *img, struct ilo_image_params *params) * padding purposes. The value of 4 for j still applies for mip level * alignment and QPitch calculation." */ - if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { - align_w = MAX2(align_w, img->align_i); - align_h = MAX2(align_h, img->align_j); + if (info->bind_surface_sampler) { + align_w = MAX2(align_w, layout->align_i); + align_h = MAX2(align_h, layout->align_j); - if (templ->target == PIPE_TEXTURE_CUBE) + if (info->type == GEN6_SURFTYPE_CUBE) pad_h += 2; - if (params->compressed) - align_h = MAX2(align_h, img->align_j * 2); + if (info->compressed) + align_h = MAX2(align_h, layout->align_j * 2); } /* @@ -881,149 +824,288 @@ img_align(struct ilo_image *img, struct ilo_image_params *params) * "If the surface contains an odd number of rows of data, a final row * below the surface must be allocated." */ - if (templ->bind & PIPE_BIND_RENDER_TARGET) + if (info->bind_surface_dp_render) align_h = MAX2(align_h, 2); /* * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ * for unaligned non-mipmapped and non-array images. */ - if (img->aux.type == ILO_IMAGE_AUX_HIZ && - templ->last_level == 0 && - templ->array_size == 1 && - templ->depth0 == 1) { + if (layout->aux == ILO_IMAGE_AUX_HIZ && + info->level_count == 1 && info->array_size == 1 && info->depth == 1) { align_w = MAX2(align_w, 8); align_h = MAX2(align_h, 4); } - params->max_x = align(params->max_x, align_w); - params->max_y = align(params->max_y + pad_h, align_h); + layout->monolithic_width = align(max_x, align_w); + layout->monolithic_height = align(max_y + pad_h, align_h); } -/* note that this may force the texture to be linear */ static void -img_calculate_bo_size(struct ilo_image *img, - const struct ilo_image_params *params) +image_get_gen6_lods(const struct ilo_dev *dev, + const struct ilo_image_info *info, + struct ilo_image_layout *layout) { - assert(params->max_x % img->block_width == 0); - assert(params->max_y % img->block_height == 0); - assert(img->walk_layer_height % img->block_height == 0); + const int layer_count = image_get_gen6_layer_count(dev, info, layout); + int cur_x, cur_y, max_x, max_y; + uint8_t lv; - img->bo_stride = - (params->max_x / img->block_width) * img->block_size; - img->bo_height = params->max_y / img->block_height; + ILO_DEV_ASSERT(dev, 6, 8); - while (true) { - unsigned w = img->bo_stride, h = img->bo_height; - unsigned align_w, align_h; + cur_x = 0; + cur_y = 0; + max_x = 0; + max_y = 0; + for (lv = 0; lv < info->level_count; lv++) { + int slice_w, slice_h, lod_w, lod_h; - /* - * From the Haswell PRM, volume 5, page 163: - * - * "For linear surfaces, additional padding of 64 bytes is required - * at the bottom of the surface. This is in addition to the padding - * required above." - */ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) && - (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) && - img->tiling == GEN6_TILING_NONE) - h += (64 + img->bo_stride - 1) / img->bo_stride; + image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h); - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "- For linear render target surfaces, the pitch must be a - * multiple of the element size for non-YUV surface formats. - * Pitch must be a multiple of 2 * element size for YUV surface - * formats. - * - For other linear surfaces, the pitch can be any multiple of - * bytes. - * - For tiled surfaces, the pitch must be a multiple of the tile - * width." - * - * Different requirements may exist when the bo is used in different - * places, but our alignments here should be good enough that we do not - * need to check params->templ->bind. - */ - switch (img->tiling) { - case GEN6_TILING_X: - align_w = 512; - align_h = 8; + layout->lods[lv].x = cur_x; + layout->lods[lv].y = cur_y; + layout->lods[lv].slice_width = slice_w; + layout->lods[lv].slice_height = slice_h; + + switch (layout->walk) { + case ILO_IMAGE_WALK_LAYER: + lod_w = slice_w; + lod_h = slice_h; + + /* MIPLAYOUT_BELOW */ + if (lv == 1) + cur_x += lod_w; + else + cur_y += lod_h; break; - case GEN6_TILING_Y: - align_w = 128; - align_h = 32; + case ILO_IMAGE_WALK_LOD: + lod_w = slice_w; + lod_h = slice_h * layer_count; + + if (lv == 1) + cur_x += lod_w; + else + cur_y += lod_h; + + /* every LOD begins at tile boundaries */ + if (info->level_count > 1) { + assert(info->format == GEN6_FORMAT_R8_UINT); + cur_x = align(cur_x, 64); + cur_y = align(cur_y, 64); + } break; - case GEN8_TILING_W: - /* - * From the Sandy Bridge PRM, volume 1 part 2, page 22: - * - * "A 4KB tile is subdivided into 8-high by 8-wide array of - * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 - * bytes." - */ - align_w = 64; - align_h = 64; + case ILO_IMAGE_WALK_3D: + { + const int slice_count = u_minify(info->depth, lv); + const int slice_count_per_row = 1 << lv; + const int row_count = + (slice_count + slice_count_per_row - 1) / slice_count_per_row; + + lod_w = slice_w * slice_count_per_row; + lod_h = slice_h * row_count; + } + + cur_y += lod_h; break; default: - assert(img->tiling == GEN6_TILING_NONE); - /* some good enough values */ - align_w = 64; - align_h = 2; + assert(!"unknown walk type"); + lod_w = 0; + lod_h = 0; break; } - w = align(w, align_w); - h = align(h, align_h); - - /* make sure the bo is mappable */ - if (img->tiling != GEN6_TILING_NONE) { - /* - * Usually only the first 256MB of the GTT is mappable. - * - * See also how intel_context::max_gtt_map_object_size is calculated. - */ - const size_t mappable_gtt_size = 256 * 1024 * 1024; - - /* - * Be conservative. We may be able to switch from VALIGN_4 to - * VALIGN_2 if the image was Y-tiled, but let's keep it simple. - */ - if (mappable_gtt_size / w / 4 < h) { - if (params->valid_tilings & IMAGE_TILING_NONE) { - img->tiling = GEN6_TILING_NONE; - /* MCS support for non-MSRTs is limited to tiled RTs */ - if (img->aux.type == ILO_IMAGE_AUX_MCS && - params->templ->nr_samples <= 1) - img->aux.type = ILO_IMAGE_AUX_NONE; - - continue; - } else { - ilo_warn("cannot force texture to be linear\n"); - } - } - } + if (max_x < layout->lods[lv].x + lod_w) + max_x = layout->lods[lv].x + lod_w; + if (max_y < layout->lods[lv].y + lod_h) + max_y = layout->lods[lv].y + lod_h; + } + + if (layout->walk == ILO_IMAGE_WALK_LAYER) { + image_get_gen6_walk_layer_heights(dev, info, layout); + if (layer_count > 1) + max_y += layout->walk_layer_height * (layer_count - 1); + } else { + layout->walk_layer_h0 = 0; + layout->walk_layer_h1 = 0; + layout->walk_layer_height = 0; + } + + image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y); +} + +static bool +image_bind_gpu(const struct ilo_image_info *info) +{ + return (info->bind_surface_sampler || + info->bind_surface_dp_render || + info->bind_surface_dp_typed || + info->bind_zs || + info->bind_scanout || + info->bind_cursor); +} + +static bool +image_validate_gen6(const struct ilo_dev *dev, + const struct ilo_image_info *info) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 314: + * + * "The separate stencil buffer is always enabled, thus the field in + * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil + * buffer has been removed Surface formats with interleaved depth and + * stencil are no longer supported" + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs) + assert(!info->interleaved_stencil); + + return true; +} + +static bool +image_get_gen6_layout(const struct ilo_dev *dev, + const struct ilo_image_info *info, + struct ilo_image_layout *layout) +{ + ILO_DEV_ASSERT(dev, 6, 8); + + if (!image_validate_gen6(dev, info)) + return false; + + if (image_bind_gpu(info) || info->level_count > 1) { + if (!image_init_gen6_hardware_layout(dev, info, layout)) + return false; + } else { + if (!image_init_gen6_transfer_layout(dev, info, layout)) + return false; + } + + /* + * the fact that align i and j are multiples of block width and height + * respectively is what makes the size of the bo a multiple of the block + * size, slices start at block boundaries, and many of the computations + * work. + */ + assert(layout->align_i % info->block_width == 0); + assert(layout->align_j % info->block_height == 0); + + /* make sure align() works */ + assert(util_is_power_of_two(layout->align_i) && + util_is_power_of_two(layout->align_j)); + assert(util_is_power_of_two(info->block_width) && + util_is_power_of_two(info->block_height)); + + image_get_gen6_lods(dev, info, layout); + + assert(layout->walk_layer_height % info->block_height == 0); + assert(layout->monolithic_width % info->block_width == 0); + assert(layout->monolithic_height % info->block_height == 0); + + return true; +} + +static bool +image_set_gen6_bo_size(struct ilo_image *img, + const struct ilo_dev *dev, + const struct ilo_image_info *info, + const struct ilo_image_layout *layout) +{ + int stride, height; + int align_w, align_h; + + ILO_DEV_ASSERT(dev, 6, 8); + + stride = (layout->monolithic_width / info->block_width) * info->block_size; + height = layout->monolithic_height / info->block_height; + + /* + * From the Haswell PRM, volume 5, page 163: + * + * "For linear surfaces, additional padding of 64 bytes is required + * at the bottom of the surface. This is in addition to the padding + * required above." + */ + if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler && + layout->tiling == GEN6_TILING_NONE) + height += (64 + stride - 1) / stride; - img->bo_stride = w; - img->bo_height = h; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "- For linear render target surfaces, the pitch must be a multiple + * of the element size for non-YUV surface formats. Pitch must be a + * multiple of 2 * element size for YUV surface formats. + * + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + * - For tiled surfaces, the pitch must be a multiple of the tile + * width." + * + * Different requirements may exist when the image is used in different + * places, but our alignments here should be good enough that we do not + * need to check info->bind_x. + */ + switch (layout->tiling) { + case GEN6_TILING_X: + align_w = 512; + align_h = 8; + break; + case GEN6_TILING_Y: + align_w = 128; + align_h = 32; + break; + case GEN8_TILING_W: + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 22: + * + * "A 4KB tile is subdivided into 8-high by 8-wide array of + * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 + * bytes." + */ + align_w = 64; + align_h = 64; + break; + default: + assert(layout->tiling == GEN6_TILING_NONE); + /* some good enough values */ + align_w = 64; + align_h = 2; break; } + + if (info->force_bo_stride) { + if (info->force_bo_stride % align_w || info->force_bo_stride < stride) + return false; + + img->bo_stride = info->force_bo_stride; + } else { + img->bo_stride = align(stride, align_w); + } + + img->bo_height = align(height, align_h); + + return true; } -static void -img_calculate_hiz_size(struct ilo_image *img, - const struct ilo_image_params *params) +static bool +image_set_gen6_hiz(struct ilo_image *img, + const struct ilo_dev *dev, + const struct ilo_image_info *info, + const struct ilo_image_layout *layout) { - const struct pipe_resource *templ = params->templ; - const unsigned hz_align_j = 8; + const int hz_align_j = 8; enum ilo_image_walk_type hz_walk; - unsigned hz_width, hz_height, lv; - unsigned hz_clear_w, hz_clear_h; + int hz_width, hz_height; + int hz_clear_w, hz_clear_h; + uint8_t lv; + + ILO_DEV_ASSERT(dev, 6, 8); - assert(img->aux.type == ILO_IMAGE_AUX_HIZ); + assert(layout->aux == ILO_IMAGE_AUX_HIZ); - assert(img->walk == ILO_IMAGE_WALK_LAYER || - img->walk == ILO_IMAGE_WALK_3D); + assert(layout->walk == ILO_IMAGE_WALK_LAYER || + layout->walk == ILO_IMAGE_WALK_3D); /* * From the Sandy Bridge PRM, volume 2 part 1, page 312: @@ -1036,8 +1118,8 @@ img_calculate_hiz_size(struct ilo_image *img, * * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD. */ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - hz_walk = img->walk; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) + hz_walk = layout->walk; else hz_walk = ILO_IMAGE_WALK_LOD; @@ -1051,16 +1133,16 @@ img_calculate_hiz_size(struct ilo_image *img, switch (hz_walk) { case ILO_IMAGE_WALK_LAYER: { - const unsigned h0 = align(params->h0, hz_align_j); - const unsigned h1 = align(params->h1, hz_align_j); - const unsigned htail = - ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; - const unsigned hz_qpitch = h0 + h1 + htail; + const int h0 = align(layout->walk_layer_h0, hz_align_j); + const int h1 = align(layout->walk_layer_h1, hz_align_j); + const int htail = + ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; + const int hz_qpitch = h0 + h1 + htail; - hz_width = align(img->lods[0].slice_width, 16); + hz_width = align(layout->lods[0].slice_width, 16); - hz_height = hz_qpitch * templ->array_size / 2; - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) + hz_height = hz_qpitch * info->array_size / 2; + if (ilo_dev_gen(dev) >= ILO_GEN(7)) hz_height = align(hz_height, 8); img->aux.walk_layer_height = hz_qpitch; @@ -1068,27 +1150,27 @@ img_calculate_hiz_size(struct ilo_image *img, break; case ILO_IMAGE_WALK_LOD: { - unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS]; - unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS]; - unsigned cur_tx, cur_ty; + int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT]; + int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT]; + int cur_tx, cur_ty; /* figure out the tile offsets of LODs */ hz_width = 0; hz_height = 0; cur_tx = 0; cur_ty = 0; - for (lv = 0; lv <= templ->last_level; lv++) { - unsigned tw, th; + for (lv = 0; lv < info->level_count; lv++) { + int tw, th; lod_tx[lv] = cur_tx; lod_ty[lv] = cur_ty; - tw = align(img->lods[lv].slice_width, 16); - th = align(img->lods[lv].slice_height, hz_align_j) * - templ->array_size / 2; + tw = align(layout->lods[lv].slice_width, 16); + th = align(layout->lods[lv].slice_height, hz_align_j) * + info->array_size / 2; /* convert to Y-tiles */ - tw = align(tw, 128) / 128; - th = align(th, 32) / 32; + tw = (tw + 127) / 128; + th = (th + 31) / 32; if (hz_width < cur_tx + tw) hz_width = cur_tx + tw; @@ -1102,22 +1184,23 @@ img_calculate_hiz_size(struct ilo_image *img, } /* convert tile offsets to memory offsets */ - for (lv = 0; lv <= templ->last_level; lv++) { + for (lv = 0; lv < info->level_count; lv++) { img->aux.walk_lod_offsets[lv] = (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; } + hz_width *= 128; hz_height *= 32; } break; case ILO_IMAGE_WALK_3D: - hz_width = align(img->lods[0].slice_width, 16); + hz_width = align(layout->lods[0].slice_width, 16); hz_height = 0; - for (lv = 0; lv <= templ->last_level; lv++) { - const unsigned h = align(img->lods[lv].slice_height, hz_align_j); + for (lv = 0; lv < info->level_count; lv++) { + const int h = align(layout->lods[lv].slice_height, hz_align_j); /* according to the formula, slices are packed together vertically */ - hz_height += h * u_minify(templ->depth0, lv); + hz_height += h * u_minify(info->depth, lv); } hz_height /= 2; break; @@ -1136,8 +1219,7 @@ img_calculate_hiz_size(struct ilo_image *img, */ hz_clear_w = 8; hz_clear_h = 4; - switch (templ->nr_samples) { - case 0: + switch (info->sample_count) { case 1: default: break; @@ -1158,33 +1240,38 @@ img_calculate_hiz_size(struct ilo_image *img, break; } - for (lv = 0; lv <= templ->last_level; lv++) { - if (u_minify(img->width0, lv) % hz_clear_w || - u_minify(img->height0, lv) % hz_clear_h) + for (lv = 0; lv < info->level_count; lv++) { + if (u_minify(info->width, lv) % hz_clear_w || + u_minify(info->height, lv) % hz_clear_h) break; img->aux.enables |= 1 << lv; } - /* we padded to allow this in img_align() */ - if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1) + /* we padded to allow this in image_get_gen6_monolithic_size() */ + if (info->level_count == 1 && info->array_size == 1 && info->depth == 1) img->aux.enables |= 0x1; /* align to Y-tile */ img->aux.bo_stride = align(hz_width, 128); img->aux.bo_height = align(hz_height, 32); + + return true; } -static void -img_calculate_mcs_size(struct ilo_image *img, - const struct ilo_image_params *params) +static bool +image_set_gen7_mcs(struct ilo_image *img, + const struct ilo_dev *dev, + const struct ilo_image_info *info, + const struct ilo_image_layout *layout) { - const struct pipe_resource *templ = params->templ; int mcs_width, mcs_height, mcs_cpp; int downscale_x, downscale_y; - assert(img->aux.type == ILO_IMAGE_AUX_MCS); + ILO_DEV_ASSERT(dev, 7, 8); + + assert(layout->aux == ILO_IMAGE_AUX_MCS); - if (templ->nr_samples > 1) { + if (info->sample_count > 1) { /* * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The @@ -1198,7 +1285,7 @@ img_calculate_mcs_size(struct ilo_image *img, * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 * pixel block in the RT. */ - switch (templ->nr_samples) { + switch (info->sample_count) { case 2: case 4: downscale_x = 8; @@ -1217,7 +1304,7 @@ img_calculate_mcs_size(struct ilo_image *img, break; default: assert(!"unsupported sample count"); - return; + return false; break; } @@ -1226,8 +1313,8 @@ img_calculate_mcs_size(struct ilo_image *img, * clear rectangle cannot be masked. The scale-down clear rectangle * thus must be aligned to 2x2, and we need to pad. */ - mcs_width = align(img->width0, downscale_x * 2); - mcs_height = align(img->height0, downscale_y * 2); + mcs_width = align(info->width, downscale_x * 2); + mcs_height = align(info->height, downscale_y * 2); } else { /* * From the Ivy Bridge PRM, volume 2 part 1, page 327: @@ -1262,18 +1349,18 @@ img_calculate_mcs_size(struct ilo_image *img, * anything except for the size of the allocated MCS. Let's see if we * hit out-of-bound access. */ - switch (img->tiling) { + switch (layout->tiling) { case GEN6_TILING_X: - downscale_x = 64 / img->block_size; + downscale_x = 64 / info->block_size; downscale_y = 2; break; case GEN6_TILING_Y: - downscale_x = 32 / img->block_size; + downscale_x = 32 / info->block_size; downscale_y = 4; break; default: assert(!"unsupported tiling mode"); - return; + return false; break; } @@ -1290,181 +1377,75 @@ img_calculate_mcs_size(struct ilo_image *img, * The scaled-down clear rectangle must be aligned to 4x4 instead of * 2x2, and we need to pad. */ - mcs_width = align(img->width0, downscale_x * 4) / downscale_x; - mcs_height = align(img->height0, downscale_y * 4) / downscale_y; + mcs_width = align(info->width, downscale_x * 4) / downscale_x; + mcs_height = align(info->height, downscale_y * 4) / downscale_y; mcs_cpp = 16; /* an OWord */ } - img->aux.enables = (1 << (templ->last_level + 1)) - 1; + img->aux.enables = (1 << info->level_count) - 1; /* align to Y-tile */ img->aux.bo_stride = align(mcs_width * mcs_cpp, 128); img->aux.bo_height = align(mcs_height, 32); -} - -static void -img_init(struct ilo_image *img, - struct ilo_image_params *params) -{ - /* there are hard dependencies between every function here */ - - img_init_aux(img, params); - img_init_size_and_format(img, params); - img_init_walk(img, params); - img_init_tiling(img, params); - img_init_alignments(img, params); - img_init_lods(img, params); - img_init_layer_height(img, params); - - img_align(img, params); - img_calculate_bo_size(img, params); - img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT); - - switch (img->aux.type) { - case ILO_IMAGE_AUX_HIZ: - img_calculate_hiz_size(img, params); - break; - case ILO_IMAGE_AUX_MCS: - img_calculate_mcs_size(img, params); - break; - default: - break; - } -} - -/** - * The texutre is for transfer only. We can define our own layout to save - * space. - */ -static void -img_init_for_transfer(struct ilo_image *img, - const struct ilo_dev *dev, - const struct pipe_resource *templ) -{ - const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ? - templ->depth0 : templ->array_size; - unsigned layer_width, layer_height; - - assert(templ->last_level == 0); - assert(templ->nr_samples <= 1); - - img->aux.type = ILO_IMAGE_AUX_NONE; - - img->target = templ->target; - img->width0 = templ->width0; - img->height0 = templ->height0; - img->depth0 = templ->depth0; - img->array_size = templ->array_size; - img->level_count = 1; - img->sample_count = 1; - - img->format = templ->format; - img->block_width = util_format_get_blockwidth(templ->format); - img->block_height = util_format_get_blockheight(templ->format); - img->block_size = util_format_get_blocksize(templ->format); - - img->walk = ILO_IMAGE_WALK_LOD; - - img->tiling = GEN6_TILING_NONE; - - img->align_i = img->block_width; - img->align_j = img->block_height; - - assert(util_is_power_of_two(img->block_width) && - util_is_power_of_two(img->block_height)); - - /* use packed layout */ - layer_width = align(templ->width0, img->align_i); - layer_height = align(templ->height0, img->align_j); - - img->lods[0].slice_width = layer_width; - img->lods[0].slice_height = layer_height; - - img->bo_stride = (layer_width / img->block_width) * img->block_size; - img->bo_stride = align(img->bo_stride, 64); - - img->bo_height = (layer_height / img->block_height) * num_layers; + return true; } -/** - * Initialize the image. Callers should zero-initialize \p img first. - */ -void ilo_image_init(struct ilo_image *img, - const struct ilo_dev *dev, - const struct pipe_resource *templ) +bool +ilo_image_init(struct ilo_image *img, + const struct ilo_dev *dev, + const struct ilo_image_info *info) { - struct ilo_image_params params; - bool transfer_only; + struct ilo_image_layout layout; assert(ilo_is_zeroed(img, sizeof(*img))); - /* use transfer layout when the texture is never bound to GPU */ - transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | - PIPE_BIND_TRANSFER_READ)); - if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) { - img_init_for_transfer(img, dev, templ); - return; - } + memset(&layout, 0, sizeof(layout)); + layout.lods = img->lods; - memset(¶ms, 0, sizeof(params)); - params.dev = dev; - params.templ = templ; - params.valid_tilings = IMAGE_TILING_ALL; + if (!image_get_gen6_layout(dev, info, &layout)) + return false; - img_init(img, ¶ms); -} + img->type = info->type; -bool -ilo_image_init_for_imported(struct ilo_image *img, - const struct ilo_dev *dev, - const struct pipe_resource *templ, - enum gen_surface_tiling tiling, - unsigned bo_stride) -{ - struct ilo_image_params params; + img->format = info->format; + img->block_width = info->block_width; + img->block_height = info->block_height; + img->block_size = info->block_size; - assert(ilo_is_zeroed(img, sizeof(*img))); + img->width0 = info->width; + img->height0 = info->height; + img->depth0 = info->depth; + img->array_size = info->array_size; + img->level_count = info->level_count; + img->sample_count = info->sample_count; - if ((tiling == GEN6_TILING_X && bo_stride % 512) || - (tiling == GEN6_TILING_Y && bo_stride % 128) || - (tiling == GEN8_TILING_W && bo_stride % 64)) - return false; + img->walk = layout.walk; + img->interleaved_samples = layout.interleaved_samples; - memset(¶ms, 0, sizeof(params)); - params.dev = dev; - params.templ = templ; - params.valid_tilings = 1 << tiling; + img->tiling = layout.tiling; - img_init(img, ¶ms); + img->aux.type = layout.aux; - assert(img->tiling == tiling); - if (img->bo_stride > bo_stride) - return false; - - img->bo_stride = bo_stride; - - /* assume imported RTs are also scanouts */ - if (!img->scanout) - img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET); + img->align_i = layout.align_i; + img->align_j = layout.align_j; - return true; -} + img->walk_layer_height = layout.walk_layer_height; -bool -ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev) -{ - /* HiZ is required for separate stencil on Gen6 */ - if (ilo_dev_gen(dev) == ILO_GEN(6) && - img->aux.type == ILO_IMAGE_AUX_HIZ && - img->separate_stencil) + if (!image_set_gen6_bo_size(img, dev, info, &layout)) return false; - /* MCS is required for multisample images */ - if (img->aux.type == ILO_IMAGE_AUX_MCS && - img->sample_count > 1) - return false; + img->scanout = info->bind_scanout; - img->aux.enables = 0x0; + switch (layout.aux) { + case ILO_IMAGE_AUX_HIZ: + image_set_gen6_hiz(img, dev, info, &layout); + break; + case ILO_IMAGE_AUX_MCS: + image_set_gen7_mcs(img, dev, info, &layout); + break; + default: + break; + } return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h index af15e856028..646ed6f5727 100644 --- a/src/gallium/drivers/ilo/core/ilo_image.h +++ b/src/gallium/drivers/ilo/core/ilo_image.h @@ -29,11 +29,17 @@ #define ILO_IMAGE_H #include "genhw/genhw.h" -#include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" +/* + * From the Ivy Bridge PRM, volume 4 part 1, page 75: + * + * "(MIP Count / LOD) representing [1,15] MIP levels" + */ +#define ILO_IMAGE_MAX_LEVEL_COUNT 15 + enum ilo_image_aux_type { ILO_IMAGE_AUX_NONE, ILO_IMAGE_AUX_HIZ, @@ -68,6 +74,49 @@ enum ilo_image_walk_type { ILO_IMAGE_WALK_3D, }; +struct ilo_image_info { + enum gen_surface_type type; + + enum gen_surface_format format; + bool interleaved_stencil; + bool is_integer; + /* width, height and size of pixel blocks */ + bool compressed; + unsigned block_width; + unsigned block_height; + unsigned block_size; + + /* image size */ + uint16_t width; + uint16_t height; + uint16_t depth; + uint16_t array_size; + uint8_t level_count; + uint8_t sample_count; + + /* disable optional aux */ + bool aux_disable; + + /* tilings to consider, if any bit is set */ + uint8_t valid_tilings; + + /* + * prefer GEN6_TILING_NONE when the (estimated) image size exceeds the + * threshold + */ + uint32_t prefer_linear_threshold; + + /* force a stride when non-zero */ + uint32_t force_bo_stride; + + bool bind_surface_sampler; + bool bind_surface_dp_render; + bool bind_surface_dp_typed; + bool bind_zs; + bool bind_scanout; + bool bind_cursor; +}; + /* * When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each * LOD and this is used to describe LODs in the first array layer. Otherwise, @@ -88,7 +137,10 @@ struct ilo_image_lod { * Texture layout. */ struct ilo_image { - enum pipe_texture_target target; + enum gen_surface_type type; + + enum gen_surface_format format; + bool interleaved_stencil; /* size, format, etc for programming hardware states */ unsigned width0; @@ -97,8 +149,6 @@ struct ilo_image { unsigned array_size; unsigned level_count; unsigned sample_count; - enum pipe_format format; - bool separate_stencil; /* * width, height, and size of pixel blocks for conversion between pixel @@ -117,7 +167,7 @@ struct ilo_image { unsigned align_i; unsigned align_j; - struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS]; + struct ilo_image_lod lods[ILO_IMAGE_MAX_LEVEL_COUNT]; /* physical layer height for ILO_IMAGE_WALK_LAYER */ unsigned walk_layer_height; @@ -136,36 +186,18 @@ struct ilo_image { unsigned enables; /* LOD offsets for ILO_IMAGE_WALK_LOD */ - unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS]; + unsigned walk_lod_offsets[ILO_IMAGE_MAX_LEVEL_COUNT]; unsigned walk_layer_height; unsigned bo_stride; unsigned bo_height; - - /* managed by users */ - struct intel_bo *bo; } aux; - - /* managed by users */ - struct intel_bo *bo; }; -struct pipe_resource; - -void +bool ilo_image_init(struct ilo_image *img, const struct ilo_dev *dev, - const struct pipe_resource *templ); - -bool -ilo_image_init_for_imported(struct ilo_image *img, - const struct ilo_dev *dev, - const struct pipe_resource *templ, - enum gen_surface_tiling tiling, - unsigned bo_stride); - -bool -ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev); + const struct ilo_image_info *info); static inline bool ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level) diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c index 38c0b719ab3..6ef2c91a592 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_sol.c +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c @@ -26,7 +26,7 @@ */ #include "ilo_debug.h" -#include "ilo_buffer.h" +#include "ilo_vma.h" #include "ilo_state_sol.h" static bool @@ -270,9 +270,6 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev, { ILO_DEV_ASSERT(dev, 7, 8); - if (info->buf) - assert(info->offset < info->buf->bo_size && info->size); - /* * From the Ivy Bridge PRM, volume 2 part 1, page 208: * @@ -281,9 +278,17 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev, */ assert(info->offset % 4 == 0); + if (info->vma) { + assert(info->vma->vm_alignment % 4 == 0); + assert(info->size && info->offset + info->size <= info->vma->vm_size); + } + /* Gen8+ only */ - if (info->write_offset_load || info->write_offset_save) - assert(ilo_dev_gen(dev) >= ILO_GEN(8)); + if (info->write_offset_load || info->write_offset_save) { + assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma); + assert(info->write_offset_offset + sizeof(uint32_t) <= + info->write_offset_vma->vm_size); + } /* * From the Broadwell PRM, volume 2b, page 206: @@ -304,25 +309,15 @@ static uint32_t sol_buffer_get_gen6_size(const struct ilo_dev *dev, const struct ilo_state_sol_buffer_info *info) { - uint32_t size; - ILO_DEV_ASSERT(dev, 6, 8); - if (!info->buf) - return 0; - - size = (info->offset + info->size <= info->buf->bo_size) ? info->size : - info->buf->bo_size - info->offset; - /* * From the Ivy Bridge PRM, volume 2 part 1, page 208: * * "(Surface End Address) This field specifies the ending DWord * address..." */ - size &= ~3; - - return size; + return (info->vma) ? info->size & ~3 : 0; } static bool @@ -359,7 +354,7 @@ sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb, dw1 = 0; - if (info->buf) + if (info->vma) dw1 |= GEN8_SO_BUF_DW1_ENABLE; if (info->write_offset_load) dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE; @@ -429,6 +424,15 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol, return ilo_state_sol_init(sol, dev, &info); } +uint32_t +ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size, + uint32_t *alignment) +{ + /* DWord aligned without padding */ + *alignment = 4; + return size; +} + bool ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb, const struct ilo_dev *dev, @@ -443,9 +447,8 @@ ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb, else ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info); - sb->need_bo = (info->size > 0); - sb->need_write_offset_bo = (info->write_offset_save || - (info->write_offset_load && !info->write_offset_imm_enable)); + sb->vma = info->vma; + sb->write_offset_vma = info->write_offset_vma; assert(ret); diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h index 2513fcb4979..92c5f94725b 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_sol.h +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h @@ -107,17 +107,17 @@ struct ilo_state_sol { uint8_t decl_count; }; -struct ilo_buffer; +struct ilo_vma; struct ilo_state_sol_buffer_info { - const struct ilo_buffer *buf; + const struct ilo_vma *vma; uint32_t offset; uint32_t size; - /* - * Gen8+ only. When enabled, require a write offset bo of at least - * (sizeof(uint32_t) * ILO_STATE_SOL_MAX_BUFFER_COUNT) bytes - */ + /* Gen8+ only; at least sizeof(uint32_t) bytes */ + const struct ilo_vma *write_offset_vma; + uint32_t write_offset_offset; + bool write_offset_load; bool write_offset_save; @@ -126,14 +126,10 @@ struct ilo_state_sol_buffer_info { }; struct ilo_state_sol_buffer { - uint32_t so_buf[4]; - - bool need_bo; - bool need_write_offset_bo; + uint32_t so_buf[5]; - /* managed by users */ - struct intel_bo *bo; - struct intel_bo *write_offset_bo; + const struct ilo_vma *vma; + const struct ilo_vma *write_offset_vma; }; static inline size_t @@ -154,6 +150,10 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol, const struct ilo_dev *dev, bool render_disable); +uint32_t +ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size, + uint32_t *alignment); + bool ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb, const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c index 5be9f8f6270..40fe15f316f 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_surface.c +++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c @@ -26,8 +26,8 @@ */ #include "ilo_debug.h" -#include "ilo_buffer.h" #include "ilo_image.h" +#include "ilo_vma.h" #include "ilo_state_surface.h" static bool @@ -94,31 +94,13 @@ surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf, return true; } -static bool -surface_validate_gen6_buffer(const struct ilo_dev *dev, - const struct ilo_state_surface_buffer_info *info) +static uint32_t +surface_get_gen6_buffer_offset_alignment(const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info) { - ILO_DEV_ASSERT(dev, 6, 8); - - /* SVB writes are Gen6-only */ - if (ilo_dev_gen(dev) >= ILO_GEN(7)) - assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB); - - if (info->offset + info->size > info->buf->bo_size) { - ilo_warn("invalid buffer range\n"); - return false; - } + uint32_t alignment; - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B] - * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]" - */ - if (!info->struct_size || info->struct_size > 2048) { - ilo_warn("invalid buffer struct size\n"); - return false; - } + ILO_DEV_ASSERT(dev, 6, 8); /* * From the Ivy Bridge PRM, volume 4 part 1, page 68: @@ -132,76 +114,153 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev, * "Certain message types used to access surfaces have more stringent * alignment requirements. Please refer to the specific message * documentation for additional restrictions." - * - * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237: - * - * "the surface base address must be OWord aligned" - * - * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord Dual - * Block Read/Write. - * - * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249: - * - * "The surface base address must be DWord aligned" - * - * for DWord Scattered Read/Write and Byte Scattered Read/Write. - * - * We have to rely on users to correctly set info->struct_size here. DWord - * Scattered Read/Write has conflicting pitch and alignment, but we do not - * use them yet so we are fine. - * - * It is unclear if sampling engine surfaces require aligned offsets. */ - if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) { - assert(info->struct_size % info->format_size == 0); + switch (info->access) { + case ILO_STATE_SURFACE_ACCESS_SAMPLER: + /* no alignment requirements */ + alignment = 1; + break; + case ILO_STATE_SURFACE_ACCESS_DP_RENDER: + case ILO_STATE_SURFACE_ACCESS_DP_TYPED: + /* element-size aligned */ + alignment = info->format_size; - if (info->offset % info->struct_size) { - ilo_warn("bad buffer offset\n"); - return false; - } - } + assert(info->struct_size % alignment == 0); + break; + case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED: + /* + * Nothing is said about Untyped* messages, but I think they require the + * base address to be DWord aligned. + */ + alignment = 4; - if (info->format == GEN6_FORMAT_RAW) { /* - * From the Sandy Bridge PRM, volume 4 part 1, page 97: + * From the Ivy Bridge PRM, volume 4 part 1, page 70: + * + * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the + * pitch must be a multiple of 4 bytes." + */ + if (info->struct_size > 1) + assert(info->struct_size % alignment == 0); + break; + case ILO_STATE_SURFACE_ACCESS_DP_DATA: + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237: + * + * "the surface base address must be OWord aligned" + * + * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord + * Dual Block Read/Write. + * + * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249: * - * ""RAW" is supported only with buffers and structured buffers - * accessed via the untyped surface read/write and untyped atomic - * operation messages, which do not have a column in the table." + * "The surface base address must be DWord aligned" * - * We do not have a specific access mode for untyped messages. + * for DWord Scattered Read/Write and Byte Scattered Read/Write. */ - assert(info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED); + alignment = (info->format_size > 4) ? 16 : 4; /* - * Nothing is said about Untyped* messages, but I guess they require the - * base address to be DWord aligned. + * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, 237, and + * 246: + * + * "the surface pitch is ignored, the surface is treated as a + * 1-dimensional surface. An element size (pitch) of 16 bytes is + * used to determine the size of the buffer for out-of-bounds + * checking if using the surface state model." + * + * for OWord Block Read/Write, Unaligned OWord Block Read, OWord + * Dual Block Read/Write, and DWord Scattered Read/Write. + * + * From the Ivy Bridge PRM, volume 4 part 1, page 248: + * + * "The surface pitch is ignored, the surface is treated as a + * 1-dimensional surface. An element size (pitch) of 4 bytes is + * used to determine the size of the buffer for out-of-bounds + * checking if using the surface state model." + * + * for Byte Scattered Read/Write. + * + * It is programmable on Gen7.5+. */ - if (info->offset % 4) { - ilo_warn("bad RAW buffer offset\n"); - return false; + if (ilo_dev_gen(dev) < ILO_GEN(7.5)) { + const int fixed = (info->format_size > 1) ? 16 : 4; + assert(info->struct_size == fixed); } + break; + case ILO_STATE_SURFACE_ACCESS_DP_SVB: + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 259: + * + * "Both the surface base address and surface pitch must be DWord + * aligned." + */ + alignment = 4; - if (info->struct_size > 1) { - /* no STRBUF on Gen6 */ - if (ilo_dev_gen(dev) == ILO_GEN(6)) { - ilo_warn("no STRBUF support\n"); - return false; - } + assert(info->struct_size % alignment == 0); + break; + default: + assert(!"unknown access"); + alignment = 1; + break; + } - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 70: - * - * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the - * pitch must be a multiple of 4 bytes." - */ - if (info->struct_size % 4) { - ilo_warn("bad STRBUF pitch\n"); - return false; - } - } + return alignment; +} + +static bool +surface_validate_gen6_buffer(const struct ilo_dev *dev, + const struct ilo_state_surface_buffer_info *info) +{ + uint32_t alignment; + + ILO_DEV_ASSERT(dev, 6, 8); + + if (info->offset + info->size > info->vma->vm_size) { + ilo_warn("invalid buffer range\n"); + return false; } + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B] + * For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]" + */ + if (!info->struct_size || info->struct_size > 2048) { + ilo_warn("invalid buffer struct size\n"); + return false; + } + + alignment = surface_get_gen6_buffer_offset_alignment(dev, info); + if (info->offset % alignment || info->vma->vm_alignment % alignment) { + ilo_warn("bad buffer offset\n"); + return false; + } + + /* no STRBUF on Gen6 */ + if (info->format == GEN6_FORMAT_RAW && info->struct_size > 1) + assert(ilo_dev_gen(dev) >= ILO_GEN(7)); + + /* SVB writes are Gen6 only */ + if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB) + assert(ilo_dev_gen(dev) == ILO_GEN(6)); + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 83: + * + * "NOTE: "RAW" is supported only with buffers and structured buffers + * accessed via the untyped surface read/write and untyped atomic + * operation messages, which do not have a column in the table." + * + * From the Ivy Bridge PRM, volume 4 part 1, page 252: + * + * "For untyped messages, the Surface Format must be RAW and the + * Surface Type must be SURFTYPE_BUFFER or SURFTYPE_STRBUF." + */ + assert((info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED) == + (info->format == GEN6_FORMAT_RAW)); + return true; } @@ -215,8 +274,7 @@ surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); c = info->size / info->struct_size; - if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB && - info->format_size < info->size - info->struct_size * c) + if (info->format_size < info->size - info->struct_size * c) c++; /* @@ -367,29 +425,6 @@ surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf, return true; } -static enum gen_surface_type -get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - switch (img->target) { - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return GEN6_SURFTYPE_1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_CUBE_ARRAY: - return GEN6_SURFTYPE_2D; - case PIPE_TEXTURE_3D: - return GEN6_SURFTYPE_3D; - default: - assert(!"unknown texture target"); - return GEN6_SURFTYPE_NULL; - } -} - static bool surface_validate_gen6_image(const struct ilo_dev *dev, const struct ilo_state_surface_image_info *info) @@ -408,6 +443,17 @@ surface_validate_gen6_image(const struct ilo_dev *dev, break; } + assert(info->img && info->vma); + + if (info->img->tiling != GEN6_TILING_NONE) + assert(info->vma->vm_alignment % 4096 == 0); + + if (info->aux_vma) { + assert(ilo_image_can_enable_aux(info->img, info->level_base)); + /* always tiled */ + assert(info->aux_vma->vm_alignment % 4096 == 0); + } + /* * From the Sandy Bridge PRM, volume 4 part 1, page 78: * @@ -418,16 +464,18 @@ surface_validate_gen6_image(const struct ilo_dev *dev, assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 && info->img->width0 <= info->img->bo_stride); - if (info->is_cube_map) { - assert(get_gen6_surface_type(dev, info->img) == GEN6_SURFTYPE_2D); + if (info->type != info->img->type) { + assert(info->type == GEN6_SURFTYPE_2D && + info->img->type == GEN6_SURFTYPE_CUBE); + } - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 78: - * - * "For cube maps, Width must be set equal to the Height." - */ + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 78: + * + * "For cube maps, Width must be set equal to the Height." + */ + if (info->type == GEN6_SURFTYPE_CUBE) assert(info->img->width0 == info->img->height0); - } /* * From the Sandy Bridge PRM, volume 4 part 1, page 72: @@ -463,20 +511,21 @@ surface_validate_gen6_image(const struct ilo_dev *dev, } static void -get_gen6_max_extent(const struct ilo_dev *dev, - const struct ilo_image *img, - uint16_t *max_w, uint16_t *max_h) +surface_get_gen6_image_max_extent(const struct ilo_dev *dev, + const struct ilo_state_surface_image_info *info, + uint16_t *max_w, uint16_t *max_h) { const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; ILO_DEV_ASSERT(dev, 6, 8); - switch (get_gen6_surface_type(dev, img)) { + switch (info->type) { case GEN6_SURFTYPE_1D: *max_w = max_size; *max_h = 1; break; case GEN6_SURFTYPE_2D: + case GEN6_SURFTYPE_CUBE: *max_w = max_size; *max_h = max_size; break; @@ -504,7 +553,7 @@ surface_get_gen6_image_extent(const struct ilo_dev *dev, w = info->img->width0; h = info->img->height0; - get_gen6_max_extent(dev, info->img, &max_w, &max_h); + surface_get_gen6_image_max_extent(dev, info, &max_w, &max_h); assert(w && h && w <= max_w && h <= max_h); *width = w - 1; @@ -555,16 +604,17 @@ surface_get_gen6_image_slices(const struct ilo_dev *dev, * layers to (86 * 6), about 512. */ - switch (get_gen6_surface_type(dev, info->img)) { + switch (info->type) { case GEN6_SURFTYPE_1D: case GEN6_SURFTYPE_2D: + case GEN6_SURFTYPE_CUBE: max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512; assert(info->img->array_size <= max_slice); max_slice = info->img->array_size; d = info->slice_count; - if (info->is_cube_map) { + if (info->type == GEN6_SURFTYPE_CUBE) { if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) { if (!d || d % 6) { ilo_warn("invalid cube slice count\n"); @@ -877,7 +927,6 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf, uint8_t min_lod, mip_count; enum gen_sample_count sample_count; uint32_t alignments; - enum gen_surface_type type; uint32_t dw0, dw2, dw3, dw4, dw5; ILO_DEV_ASSERT(dev, 6, 6); @@ -897,10 +946,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf, if (info->img->sample_count > 1) assert(info->img->interleaved_samples); - type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : - get_gen6_surface_type(dev, info->img); - - dw0 = type << GEN6_SURFACE_DW0_TYPE__SHIFT | + dw0 = info->type << GEN6_SURFACE_DW0_TYPE__SHIFT | info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT | GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; @@ -927,7 +973,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf, * "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this * field must be programmed to 111111b (all faces enabled)." */ - if (info->is_cube_map && + if (info->type == GEN6_SURFTYPE_CUBE && info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) { dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE | GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; @@ -956,7 +1002,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf, surf->surface[4] = dw4; surf->surface[5] = dw5; - surf->type = type; + surf->type = info->type; surf->min_lod = min_lod; surf->mip_count = mip_count; @@ -972,7 +1018,6 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf, uint8_t min_lod, mip_count; uint32_t alignments; enum gen_sample_count sample_count; - enum gen_surface_type type; uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7; ILO_DEV_ASSERT(dev, 7, 8); @@ -986,10 +1031,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf, !surface_get_gen6_image_alignments(dev, info, &alignments)) return false; - type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : - get_gen6_surface_type(dev, info->img); - - dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT | + dw0 = info->type << GEN7_SURFACE_DW0_TYPE__SHIFT | info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT | alignments; @@ -1023,7 +1065,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf, * field must be programmed to 111111b (all faces enabled). This field * is ignored unless the Surface Type is SURFTYPE_CUBE." */ - if (info->is_cube_map && + if (info->type == GEN6_SURFTYPE_CUBE && info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; @@ -1087,13 +1129,61 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf, surf->surface[12] = 0; } - surf->type = type; + surf->type = info->type; surf->min_lod = min_lod; surf->mip_count = mip_count; return true; } +uint32_t +ilo_state_surface_buffer_size(const struct ilo_dev *dev, + enum ilo_state_surface_access access, + uint32_t size, uint32_t *alignment) +{ + switch (access) { + case ILO_STATE_SURFACE_ACCESS_SAMPLER: + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 118: + * + * "For buffers, which have no inherent "height," padding + * requirements are different. A buffer must be padded to the next + * multiple of 256 array elements, with an additional 16 bytes + * added beyond that to account for the L1 cache line." + * + * Assuming tightly packed GEN6_FORMAT_R32G32B32A32_FLOAT, the size + * needs to be padded to 4096 (= 16 * 256). + */ + *alignment = 1; + size = align(size, 4096) + 16; + break; + case ILO_STATE_SURFACE_ACCESS_DP_RENDER: + case ILO_STATE_SURFACE_ACCESS_DP_TYPED: + /* element-size aligned for worst cases */ + *alignment = 16; + break; + case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED: + /* DWord aligned? */ + *alignment = 4; + break; + case ILO_STATE_SURFACE_ACCESS_DP_DATA: + /* OWord aligned */ + *alignment = 16; + size = align(size, 16); + break; + case ILO_STATE_SURFACE_ACCESS_DP_SVB: + /* always DWord aligned */ + *alignment = 4; + break; + default: + assert(!"unknown access"); + *alignment = 1; + break; + } + + return size; +} + bool ilo_state_surface_init_for_null(struct ilo_state_surface *surf, const struct ilo_dev *dev) @@ -1107,6 +1197,7 @@ ilo_state_surface_init_for_null(struct ilo_state_surface *surf, else ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev); + surf->vma = NULL; surf->type = GEN6_SURFTYPE_NULL; surf->readonly = true; @@ -1129,6 +1220,7 @@ ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf, else ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info); + surf->vma = info->vma; surf->readonly = info->readonly; assert(ret); @@ -1150,6 +1242,9 @@ ilo_state_surface_init_for_image(struct ilo_state_surface *surf, else ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info); + surf->vma = info->vma; + surf->aux_vma = info->aux_vma; + surf->is_integer = info->is_integer; surf->readonly = info->readonly; surf->scanout = info->img->scanout; diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h index 9c025428d50..e78c7c97db1 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_surface.h +++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h @@ -29,14 +29,10 @@ #define ILO_STATE_SURFACE_H #include "genhw/genhw.h" -#include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" -struct ilo_buffer; -struct ilo_image; - enum ilo_state_surface_access { ILO_STATE_SURFACE_ACCESS_SAMPLER, /* sampling engine surfaces */ ILO_STATE_SURFACE_ACCESS_DP_RENDER, /* render target surfaces */ @@ -46,42 +42,51 @@ enum ilo_state_surface_access { ILO_STATE_SURFACE_ACCESS_DP_SVB, }; +struct ilo_vma; +struct ilo_image; + struct ilo_state_surface_buffer_info { - const struct ilo_buffer *buf; + const struct ilo_vma *vma; + uint32_t offset; + uint32_t size; enum ilo_state_surface_access access; + /* format_size may be less than, equal to, or greater than struct_size */ enum gen_surface_format format; uint8_t format_size; bool readonly; uint16_t struct_size; - - uint32_t offset; - uint32_t size; }; struct ilo_state_surface_image_info { const struct ilo_image *img; + uint8_t level_base; + uint8_t level_count; + uint16_t slice_base; + uint16_t slice_count; + + const struct ilo_vma *vma; + const struct ilo_vma *aux_vma; enum ilo_state_surface_access access; + enum gen_surface_type type; + enum gen_surface_format format; bool is_integer; bool readonly; - bool is_cube_map; bool is_array; - - uint8_t level_base; - uint8_t level_count; - uint16_t slice_base; - uint16_t slice_count; }; struct ilo_state_surface { uint32_t surface[13]; + const struct ilo_vma *vma; + const struct ilo_vma *aux_vma; + enum gen_surface_type type; uint8_t min_lod; uint8_t mip_count; @@ -89,9 +94,6 @@ struct ilo_state_surface { bool readonly; bool scanout; - - /* managed by users */ - struct intel_bo *bo; }; bool @@ -99,6 +101,11 @@ ilo_state_surface_valid_format(const struct ilo_dev *dev, enum ilo_state_surface_access access, enum gen_surface_format format); +uint32_t +ilo_state_surface_buffer_size(const struct ilo_dev *dev, + enum ilo_state_surface_access access, + uint32_t size, uint32_t *alignment); + bool ilo_state_surface_init_for_null(struct ilo_state_surface *surf, const struct ilo_dev *dev); diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c index ddc75428ed7..9faf835fef2 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.c +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c @@ -26,7 +26,7 @@ */ #include "ilo_debug.h" -#include "ilo_buffer.h" +#include "ilo_vma.h" #include "ilo_state_vf.h" static bool @@ -479,8 +479,8 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev, { ILO_DEV_ASSERT(dev, 6, 8); - if (info->buf) - assert(info->offset < info->buf->bo_size && info->size); + if (info->vma) + assert(info->size && info->offset + info->size <= info->vma->vm_size); /* * From the Sandy Bridge PRM, volume 2 part 1, page 86: @@ -500,6 +500,9 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev, * aligned address, and BufferPitch must be a multiple of 64-bits." */ if (info->cv_has_double) { + if (info->vma) + assert(info->vma->vm_alignment % 8 == 0); + assert(info->stride % 8 == 0); assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0); } @@ -512,12 +515,7 @@ vertex_buffer_get_gen6_size(const struct ilo_dev *dev, const struct ilo_state_vertex_buffer_info *info) { ILO_DEV_ASSERT(dev, 6, 8); - - if (!info->buf) - return 0; - - return (info->offset + info->size <= info->buf->bo_size) ? info->size : - info->buf->bo_size - info->offset; + return (info->vma) ? info->size : 0; } static bool @@ -537,7 +535,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb, if (ilo_dev_gen(dev) >= ILO_GEN(7)) dw0 |= GEN7_VB_DW0_ADDR_MODIFIED; - if (!info->buf) + if (!info->vma) dw0 |= GEN6_VB_DW0_IS_NULL; STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3); @@ -551,7 +549,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb, vb->vb[2] = (size) ? info->offset + size - 1 : 0; } - vb->need_bo = (info->buf != NULL); + vb->vma = info->vma; return true; } @@ -586,8 +584,10 @@ index_buffer_validate_gen6(const struct ilo_dev *dev, */ assert(info->offset % format_size == 0); - if (info->buf) - assert(info->offset < info->buf->bo_size && info->size); + if (info->vma) { + assert(info->vma->vm_alignment % format_size == 0); + assert(info->size && info->offset + info->size <= info->vma->vm_size); + } return true; } @@ -600,12 +600,10 @@ index_buffer_get_gen6_size(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); - if (!info->buf) + if (!info->vma) return 0; - size = (info->offset + info->size <= info->buf->bo_size) ? info->size : - info->buf->bo_size - info->offset; - + size = info->size; if (ilo_dev_gen(dev) < ILO_GEN(8)) { const uint32_t format_size = get_index_format_size(info->format); size -= (size % format_size); @@ -638,7 +636,7 @@ index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib, ib->ib[2] = (size) ? info->offset + size - 1 : 0; } - ib->need_bo = (info->buf != NULL); + ib->vma = info->vma; return true; } @@ -949,6 +947,15 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, } } +uint32_t +ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size, + uint32_t *alignment) +{ + /* align for doubles without padding */ + *alignment = 8; + return size; +} + /** * No need to initialize first. */ @@ -966,6 +973,15 @@ ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb, return ret; } +uint32_t +ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size, + uint32_t *alignment) +{ + /* align for the worst case without padding */ + *alignment = get_index_format_size(GEN6_INDEX_DWORD); + return size; +} + /** * No need to initialize first. */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h index f15c63a248a..16b128bf63c 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_vf.h +++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h @@ -126,10 +126,10 @@ struct ilo_state_vf_delta { uint32_t dirty; }; -struct ilo_buffer; +struct ilo_vma; struct ilo_state_vertex_buffer_info { - const struct ilo_buffer *buf; + const struct ilo_vma *vma; uint32_t offset; uint32_t size; @@ -143,14 +143,11 @@ struct ilo_state_vertex_buffer_info { struct ilo_state_vertex_buffer { uint32_t vb[3]; - bool need_bo; - - /* managed by users */ - struct intel_bo *bo; + const struct ilo_vma *vma; }; struct ilo_state_index_buffer_info { - const struct ilo_buffer *buf; + const struct ilo_vma *vma; uint32_t offset; uint32_t size; @@ -160,10 +157,7 @@ struct ilo_state_index_buffer_info { struct ilo_state_index_buffer { uint32_t ib[3]; - bool need_bo; - - /* managed by users */ - struct intel_bo *bo; + const struct ilo_vma *vma; }; static inline size_t @@ -215,11 +209,19 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf, const struct ilo_state_vf *old, struct ilo_state_vf_delta *delta); +uint32_t +ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size, + uint32_t *alignment); + bool ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb, const struct ilo_dev *dev, const struct ilo_state_vertex_buffer_info *info); +uint32_t +ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size, + uint32_t *alignment); + bool ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib, const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c index 901fedb5599..827632764b2 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_zs.c +++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c @@ -25,10 +25,9 @@ * Chia-I Wu <[email protected]> */ -#include "intel_winsys.h" - #include "ilo_debug.h" #include "ilo_image.h" +#include "ilo_vma.h" #include "ilo_state_zs.h" static bool @@ -56,70 +55,9 @@ zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, zs->depth[3] = 0; zs->depth[4] = 0; - zs->depth_format = format; - return true; } -static enum gen_surface_type -get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - switch (img->target) { - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_1D_ARRAY: - return GEN6_SURFTYPE_1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D_ARRAY: - case PIPE_TEXTURE_CUBE_ARRAY: - return GEN6_SURFTYPE_2D; - case PIPE_TEXTURE_3D: - return GEN6_SURFTYPE_3D; - default: - assert(!"unknown texture target"); - return GEN6_SURFTYPE_NULL; - } -} - -static enum gen_depth_format -get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img) -{ - ILO_DEV_ASSERT(dev, 6, 8); - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - switch (img->format) { - case PIPE_FORMAT_Z32_FLOAT: - return GEN6_ZFORMAT_D32_FLOAT; - case PIPE_FORMAT_Z24X8_UNORM: - return GEN6_ZFORMAT_D24_UNORM_X8_UINT; - case PIPE_FORMAT_Z16_UNORM: - return GEN6_ZFORMAT_D16_UNORM; - default: - assert(!"unknown depth format"); - return GEN6_ZFORMAT_D32_FLOAT; - } - } else { - switch (img->format) { - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; - case PIPE_FORMAT_Z32_FLOAT: - return GEN6_ZFORMAT_D32_FLOAT; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return GEN6_ZFORMAT_D24_UNORM_S8_UINT; - case PIPE_FORMAT_Z24X8_UNORM: - return GEN6_ZFORMAT_D24_UNORM_X8_UINT; - case PIPE_FORMAT_Z16_UNORM: - return GEN6_ZFORMAT_D16_UNORM; - default: - assert(!"unknown depth format"); - return GEN6_ZFORMAT_D32_FLOAT; - } - } -} - static bool zs_validate_gen6(const struct ilo_dev *dev, const struct ilo_state_zs_info *info) @@ -128,63 +66,102 @@ zs_validate_gen6(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); + assert(!info->z_img == !info->z_vma); + assert(!info->s_img == !info->s_vma); + + /* all tiled */ + if (info->z_img) { + assert(info->z_img->tiling == GEN6_TILING_Y); + assert(info->z_vma->vm_alignment % 4096 == 0); + } + if (info->s_img) { + assert(info->s_img->tiling == GEN8_TILING_W); + assert(info->s_vma->vm_alignment % 4096 == 0); + } + if (info->hiz_vma) { + assert(info->z_img && + ilo_image_can_enable_aux(info->z_img, info->level)); + assert(info->z_vma->vm_alignment % 4096 == 0); + } + /* * From the Ivy Bridge PRM, volume 2 part 1, page 315: * - * The stencil buffer has a format of S8_UINT, and shares Surface + * "The stencil buffer has a format of S8_UINT, and shares Surface * Type, Height, Width, and Depth, Minimum Array Element, Render * Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth - * Buffer Object Control State fields of the depth buffer. + * Buffer Object Control State fields of the depth buffer." */ - if (info->z_img == info->s_img) { - assert(info->z_img->target == info->s_img->target && - info->z_img->width0 == info->s_img->width0 && + if (info->z_img && info->s_img && info->z_img != info->s_img) { + assert(info->z_img->type == info->s_img->type && info->z_img->height0 == info->s_img->height0 && info->z_img->depth0 == info->s_img->depth0); } - assert(info->level < img->level_count); - assert(img->bo_stride); - - if (info->hiz_enable) { - assert(info->z_img && - ilo_image_can_enable_aux(info->z_img, info->level)); + if (info->type != img->type) { + assert(info->type == GEN6_SURFTYPE_2D && + img->type == GEN6_SURFTYPE_CUBE); } - if (info->is_cube_map) { - assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D); - + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + switch (info->format) { + case GEN6_ZFORMAT_D32_FLOAT: + case GEN6_ZFORMAT_D24_UNORM_X8_UINT: + case GEN6_ZFORMAT_D16_UNORM: + break; + default: + assert(!"unknown depth format"); + break; + } + } else { /* - * From the Sandy Bridge PRM, volume 2 part 1, page 323: + * From the Ironlake PRM, volume 2 part 1, page 330: + * + * "If this field (Separate Stencil Buffer Enable) is disabled, the + * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." + * + * From the Sandy Bridge PRM, volume 2 part 1, page 321: * - * "For cube maps, Width must be set equal to Height." + * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be + * set to the same value (enabled or disabled) as Hierarchical + * Depth Buffer Enable." */ - assert(img->width0 == img->height0); + if (info->hiz_vma) + assert(info->format != GEN6_ZFORMAT_D24_UNORM_S8_UINT); + else + assert(info->format != GEN6_ZFORMAT_D24_UNORM_X8_UINT); } - if (info->z_img) - assert(info->z_img->tiling == GEN6_TILING_Y); - if (info->s_img) - assert(info->s_img->tiling == GEN8_TILING_W); + assert(info->level < img->level_count); + assert(img->bo_stride); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 323: + * + * "For cube maps, Width must be set equal to Height." + */ + if (info->type == GEN6_SURFTYPE_CUBE) + assert(img->width0 == img->height0); return true; } static void -get_gen6_max_extent(const struct ilo_dev *dev, - const struct ilo_image *img, - uint16_t *max_w, uint16_t *max_h) +zs_get_gen6_max_extent(const struct ilo_dev *dev, + const struct ilo_state_zs_info *info, + uint16_t *max_w, uint16_t *max_h) { const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; ILO_DEV_ASSERT(dev, 6, 8); - switch (get_gen6_surface_type(dev, img)) { + switch (info->type) { case GEN6_SURFTYPE_1D: *max_w = max_size; *max_h = 1; break; case GEN6_SURFTYPE_2D: + case GEN6_SURFTYPE_CUBE: *max_w = max_size; *max_h = max_size; break; @@ -274,7 +251,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev, w = img->width0; h = img->height0; - if (info->hiz_enable) { + if (info->hiz_vma) { uint16_t align_w, align_h; get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h); @@ -290,7 +267,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev, h = align(h, align_h); } - get_gen6_max_extent(dev, img, &max_w, &max_h); + zs_get_gen6_max_extent(dev, info, &max_w, &max_h); assert(w && h && w <= max_w && h <= max_h); *width = w - 1; @@ -319,16 +296,17 @@ zs_get_gen6_depth_slices(const struct ilo_dev *dev, * surfaces. If the volume texture is MIP-mapped, this field specifies * the depth of the base MIP level." */ - switch (get_gen6_surface_type(dev, img)) { + switch (info->type) { case GEN6_SURFTYPE_1D: case GEN6_SURFTYPE_2D: + case GEN6_SURFTYPE_CUBE: max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512; assert(img->array_size <= max_slice); max_slice = img->array_size; d = info->slice_count; - if (info->is_cube_map) { + if (info->type == GEN6_SURFTYPE_CUBE) { /* * Minumum Array Element and Depth must be 0; Render Target View * Extent is ignored. @@ -408,8 +386,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, const struct ilo_state_zs_info *info) { uint16_t width, height, depth, array_base, view_extent; - enum gen_surface_type type; - enum gen_depth_format format; uint32_t dw1, dw2, dw3, dw4; ILO_DEV_ASSERT(dev, 6, 6); @@ -420,37 +396,15 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, &view_extent)) return false; - type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : - (info->z_img) ? get_gen6_surface_type(dev, info->z_img) : - get_gen6_surface_type(dev, info->s_img); - - format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) : - GEN6_ZFORMAT_D32_FLOAT; - - /* - * From the Ironlake PRM, volume 2 part 1, page 330: - * - * "If this field (Separate Stencil Buffer Enable) is disabled, the - * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." - * - * From the Sandy Bridge PRM, volume 2 part 1, page 321: - * - * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be set - * to the same value (enabled or disabled) as Hierarchical Depth - * Buffer Enable." - */ - if (!info->hiz_enable && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT) - format = GEN6_ZFORMAT_D24_UNORM_S8_UINT; - /* info->z_readonly and info->s_readonly are ignored on Gen6 */ - dw1 = type << GEN6_DEPTH_DW1_TYPE__SHIFT | + dw1 = info->type << GEN6_DEPTH_DW1_TYPE__SHIFT | GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT | - format << GEN6_DEPTH_DW1_FORMAT__SHIFT; + info->format << GEN6_DEPTH_DW1_FORMAT__SHIFT; if (info->z_img) dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT; - if (info->hiz_enable || !info->z_img) { + if (info->hiz_vma || !info->z_img) { dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE | GEN6_DEPTH_DW1_SEPARATE_STENCIL; } @@ -471,8 +425,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, zs->depth[3] = dw4; zs->depth[4] = 0; - zs->depth_format = format; - return true; } @@ -481,8 +433,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, const struct ilo_dev *dev, const struct ilo_state_zs_info *info) { - enum gen_surface_type type; - enum gen_depth_format format; uint16_t width, height, depth; uint16_t array_base, view_extent; uint32_t dw1, dw2, dw3, dw4, dw6; @@ -495,20 +445,13 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, &view_extent)) return false; - type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE : - (info->z_img) ? get_gen6_surface_type(dev, info->z_img) : - get_gen6_surface_type(dev, info->s_img); - - format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) : - GEN6_ZFORMAT_D32_FLOAT; - - dw1 = type << GEN7_DEPTH_DW1_TYPE__SHIFT | - format << GEN7_DEPTH_DW1_FORMAT__SHIFT; + dw1 = info->type << GEN7_DEPTH_DW1_TYPE__SHIFT | + info->format << GEN7_DEPTH_DW1_FORMAT__SHIFT; if (info->z_img) { if (!info->z_readonly) dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE; - if (info->hiz_enable) + if (info->hiz_vma) dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE; dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT; @@ -539,8 +482,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs, zs->depth[3] = dw4; zs->depth[4] = dw6; - zs->depth_format = format; - return true; } @@ -683,11 +624,15 @@ ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev, else ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev); - if (info->z_img && info->hiz_enable) + if (info->z_img && info->hiz_vma) ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info); else ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev); + zs->z_vma = info->z_vma; + zs->s_vma = info->s_vma; + zs->hiz_vma = info->hiz_vma; + zs->z_readonly = info->z_readonly; zs->s_readonly = info->s_readonly; @@ -703,6 +648,8 @@ ilo_state_zs_init_for_null(struct ilo_state_zs *zs, struct ilo_state_zs_info info; memset(&info, 0, sizeof(info)); + info.type = GEN6_SURFTYPE_NULL; + info.format = GEN6_ZFORMAT_D32_FLOAT; return ilo_state_zs_init(zs, dev, &info); } @@ -720,8 +667,11 @@ ilo_state_zs_disable_hiz(struct ilo_state_zs *zs, */ assert(ilo_dev_gen(dev) >= ILO_GEN(7)); - zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE; - zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev); + if (zs->hiz_vma) { + zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE; + zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev); + zs->hiz_vma = NULL; + } return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h index 98212daf74f..6a25a873897 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_zs.h +++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h @@ -29,28 +29,31 @@ #define ILO_STATE_ZS_H #include "genhw/genhw.h" -#include "intel_winsys.h" #include "ilo_core.h" #include "ilo_dev.h" +struct ilo_vma; struct ilo_image; struct ilo_state_zs_info { - /* both are optional */ + /* both optional */ const struct ilo_image *z_img; const struct ilo_image *s_img; + uint8_t level; + uint16_t slice_base; + uint16_t slice_count; + + const struct ilo_vma *z_vma; + const struct ilo_vma *s_vma; + const struct ilo_vma *hiz_vma; + + enum gen_surface_type type; + enum gen_depth_format format; /* ignored prior to Gen7 */ bool z_readonly; bool s_readonly; - - bool hiz_enable; - bool is_cube_map; - - uint8_t level; - uint16_t slice_base; - uint16_t slice_count; }; struct ilo_state_zs { @@ -58,16 +61,12 @@ struct ilo_state_zs { uint32_t stencil[3]; uint32_t hiz[3]; - /* TODO move this to ilo_image */ - enum gen_depth_format depth_format; + const struct ilo_vma *z_vma; + const struct ilo_vma *s_vma; + const struct ilo_vma *hiz_vma; bool z_readonly; bool s_readonly; - - /* managed by users */ - struct intel_bo *depth_bo; - struct intel_bo *stencil_bo; - struct intel_bo *hiz_bo; }; bool @@ -83,11 +82,4 @@ bool ilo_state_zs_disable_hiz(struct ilo_state_zs *zs, const struct ilo_dev *dev); -static inline enum gen_depth_format -ilo_state_zs_get_depth_format(const struct ilo_state_zs *zs, - const struct ilo_dev *dev) -{ - return zs->depth_format; -} - #endif /* ILO_STATE_ZS_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_vma.h index ca3c61ff890..ad2a1d4b33e 100644 --- a/src/gallium/drivers/ilo/core/ilo_buffer.h +++ b/src/gallium/drivers/ilo/core/ilo_vma.h @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library * - * Copyright (C) 2012-2013 LunarG, Inc. + * Copyright (C) 2015 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,40 +25,49 @@ * Chia-I Wu <[email protected]> */ -#ifndef ILO_BUFFER_H -#define ILO_BUFFER_H - -#include "intel_winsys.h" +#ifndef ILO_VMA_H +#define ILO_VMA_H #include "ilo_core.h" #include "ilo_debug.h" #include "ilo_dev.h" -struct ilo_buffer { - unsigned bo_size; +struct intel_bo; + +/** + * A virtual memory area. + */ +struct ilo_vma { + /* address space */ + uint32_t vm_size; + uint32_t vm_alignment; - /* managed by users */ + /* backing storage */ struct intel_bo *bo; + uint32_t bo_offset; }; -static inline void -ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev, - unsigned size, uint32_t bind, uint32_t flags) +static inline bool +ilo_vma_init(struct ilo_vma *vma, const struct ilo_dev *dev, + uint32_t size, uint32_t alignment) { - assert(ilo_is_zeroed(buf, sizeof(*buf))); + assert(ilo_is_zeroed(vma, sizeof(*vma))); + assert(size && alignment); + + vma->vm_alignment = alignment; + vma->vm_size = size; - buf->bo_size = size; + return true; +} + +static inline void +ilo_vma_set_bo(struct ilo_vma *vma, const struct ilo_dev *dev, + struct intel_bo *bo, uint32_t offset) +{ + assert(offset % vma->vm_alignment == 0); - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 118: - * - * "For buffers, which have no inherent "height," padding requirements - * are different. A buffer must be padded to the next multiple of 256 - * array elements, with an additional 16 bytes added beyond that to - * account for the L1 cache line." - */ - if (bind & PIPE_BIND_SAMPLER_VIEW) - buf->bo_size = align(buf->bo_size, 256) + 16; + vma->bo = bo; + vma->bo_offset = offset; } -#endif /* ILO_BUFFER_H */ +#endif /* ILO_VMA_H */ diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c index d55dc35e360..66203e86137 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_blt.c +++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c @@ -127,7 +127,7 @@ ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl) static bool buf_clear_region(struct ilo_blitter *blitter, - struct ilo_buffer *buf, unsigned offset, + struct ilo_buffer_resource *buf, unsigned offset, uint32_t val, unsigned size, enum gen6_blt_mask value_mask, enum gen6_blt_mask write_mask) @@ -140,8 +140,8 @@ buf_clear_region(struct ilo_blitter *blitter, if (offset % cpp || size % cpp) return false; - dst.bo = buf->bo; - dst.offset = offset; + dst.bo = buf->vma.bo; + dst.offset = buf->vma.bo_offset + offset; ilo_blitter_blt_begin(blitter, GEN6_COLOR_BLT__SIZE * (1 + size / 32764 / gen6_blt_max_scanlines), @@ -179,25 +179,26 @@ buf_clear_region(struct ilo_blitter *blitter, static bool buf_copy_region(struct ilo_blitter *blitter, - struct ilo_buffer *dst_buf, unsigned dst_offset, - struct ilo_buffer *src_buf, unsigned src_offset, + struct ilo_buffer_resource *dst_buf, unsigned dst_offset, + struct ilo_buffer_resource *src_buf, unsigned src_offset, unsigned size) { const uint8_t rop = 0xcc; /* SRCCOPY */ struct ilo_builder *builder = &blitter->ilo->cp->builder; struct gen6_blt_bo dst, src; - dst.bo = dst_buf->bo; - dst.offset = dst_offset; + dst.bo = dst_buf->vma.bo; + dst.offset = dst_buf->vma.bo_offset + dst_offset; dst.pitch = 0; - src.bo = src_buf->bo; - src.offset = src_offset; + src.bo = src_buf->vma.bo; + src.offset = src_buf->vma.bo_offset + src_offset; src.pitch = 0; ilo_blitter_blt_begin(blitter, GEN6_SRC_COPY_BLT__SIZE * (1 + size / 32764 / gen6_blt_max_scanlines), - dst_buf->bo, GEN6_TILING_NONE, src_buf->bo, GEN6_TILING_NONE); + dst_buf->vma.bo, GEN6_TILING_NONE, + src_buf->vma.bo, GEN6_TILING_NONE); while (size) { unsigned width, height; @@ -258,14 +259,14 @@ tex_clear_region(struct ilo_blitter *blitter, if (dst_box->width * cpp > gen6_blt_max_bytes_per_scanline) return false; - dst.bo = dst_tex->image.bo; - dst.offset = 0; + dst.bo = dst_tex->vma.bo; + dst.offset = dst_tex->vma.bo_offset; dst.pitch = dst_tex->image.bo_stride; dst.tiling = dst_tex->image.tiling; swctrl = ilo_blitter_blt_begin(blitter, GEN6_XY_COLOR_BLT__SIZE * dst_box->depth, - dst_tex->image.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE); + dst_tex->vma.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE); for (slice = 0; slice < dst_box->depth; slice++) { unsigned x, y; @@ -299,7 +300,7 @@ tex_copy_region(struct ilo_blitter *blitter, const struct pipe_box *src_box) { const struct util_format_description *desc = - util_format_description(dst_tex->image.format); + util_format_description(dst_tex->image_format); const unsigned max_extent = 32767; /* INT16_MAX */ const uint8_t rop = 0xcc; /* SRCCOPY */ struct ilo_builder *builder = &blitter->ilo->cp->builder; @@ -347,13 +348,13 @@ tex_copy_region(struct ilo_blitter *blitter, break; } - dst.bo = dst_tex->image.bo; - dst.offset = 0; + dst.bo = dst_tex->vma.bo; + dst.offset = dst_tex->vma.bo_offset; dst.pitch = dst_tex->image.bo_stride; dst.tiling = dst_tex->image.tiling; - src.bo = src_tex->image.bo; - src.offset = 0; + src.bo = src_tex->vma.bo; + src.offset = src_tex->vma.bo_offset; src.pitch = src_tex->image.bo_stride; src.tiling = src_tex->image.tiling; @@ -423,8 +424,8 @@ ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter, src_box->height == 1 && src_box->depth == 1); - success = buf_copy_region(blitter, - ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size); + success = buf_copy_region(blitter, ilo_buffer_resource(dst), dst_offset, + ilo_buffer_resource(src), src_offset, size); } else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) { success = tex_copy_region(blitter, @@ -488,7 +489,7 @@ ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter, if (offset + size > end) size = end - offset; - success = buf_clear_region(blitter, ilo_buffer(rt->texture), + success = buf_clear_region(blitter, ilo_buffer_resource(rt->texture), offset, packed.ui[0], size, mask, mask); } else { diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 13c8f500680..86e67084d6e 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -318,7 +318,7 @@ hiz_can_clear_zs(const struct ilo_blitter *blitter, * The truth is when HiZ is enabled, separate stencil is also enabled on * all GENs. The depth buffer format cannot be combined depth/stencil. */ - switch (tex->image.format) { + switch (tex->image_format) { case PIPE_FORMAT_Z16_UNORM: if (ilo_dev_gen(blitter->ilo->dev) == ILO_GEN(6) && tex->base.width0 % 16) @@ -355,7 +355,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, if (ilo_dev_gen(blitter->ilo->dev) >= ILO_GEN(8)) clear_value = fui(depth); else - clear_value = util_pack_z(tex->image.format, depth); + clear_value = util_pack_z(tex->image_format, depth); ilo_blit_resolve_surface(blitter->ilo, zs, ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR); diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h index 9ebbf76e81e..3dbe79fb872 100644 --- a/src/gallium/drivers/ilo/ilo_common.h +++ b/src/gallium/drivers/ilo/ilo_common.h @@ -28,6 +28,14 @@ #ifndef ILO_COMMON_H #define ILO_COMMON_H +#include "pipe/p_format.h" +#include "pipe/p_defines.h" + +#include "util/list.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_pointer.h" + #include "core/ilo_core.h" #include "core/ilo_debug.h" #include "core/ilo_dev.h" diff --git a/src/gallium/drivers/ilo/ilo_context.c b/src/gallium/drivers/ilo/ilo_context.c index 3d5c7b636a8..b9a16aab81d 100644 --- a/src/gallium/drivers/ilo/ilo_context.c +++ b/src/gallium/drivers/ilo/ilo_context.c @@ -62,6 +62,8 @@ ilo_flush(struct pipe_context *pipe, (flags & PIPE_FLUSH_END_OF_FRAME) ? "frame end" : "user request"); if (f) { + struct pipe_screen *screen = pipe->screen; + screen->fence_reference(screen, f, NULL); *f = ilo_screen_fence_create(pipe->screen, ilo->cp->last_submitted_bo); } } diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c index e8e1a4cd14c..433348d9326 100644 --- a/src/gallium/drivers/ilo/ilo_draw.c +++ b/src/gallium/drivers/ilo/ilo_draw.c @@ -444,6 +444,7 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo, const struct pipe_draw_info *info) { const struct ilo_ib_state *ib = &ilo->state_vector.ib; + const struct ilo_vma *vma; union { const void *ptr; const uint8_t *u8; @@ -453,10 +454,12 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo, /* we will draw with IB mapped */ if (ib->state.buffer) { - u.ptr = intel_bo_map(ilo_buffer(ib->state.buffer)->bo, false); + vma = ilo_resource_get_vma(ib->state.buffer); + u.ptr = intel_bo_map(vma->bo, false); if (u.ptr) - u.u8 += ib->state.offset; + u.u8 += vma->bo_offset + ib->state.offset; } else { + vma = NULL; u.ptr = ib->state.user_buffer; } @@ -500,8 +503,8 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo, #undef DRAW_VBO_WITH_SW_RESTART - if (ib->state.buffer) - intel_bo_unmap(ilo_buffer(ib->state.buffer)->bo); + if (vma) + intel_bo_unmap(vma->bo); } static bool diff --git a/src/gallium/drivers/ilo/ilo_format.h b/src/gallium/drivers/ilo/ilo_format.h index 4e955c09c14..0a19c02659e 100644 --- a/src/gallium/drivers/ilo/ilo_format.h +++ b/src/gallium/drivers/ilo/ilo_format.h @@ -165,4 +165,39 @@ ilo_format_translate_vertex(const struct ilo_dev *dev, return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER); } +static inline enum gen_depth_format +ilo_format_translate_depth(const struct ilo_dev *dev, + enum pipe_format format) +{ + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + switch (format) { + case PIPE_FORMAT_Z32_FLOAT: + return GEN6_ZFORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24X8_UNORM: + return GEN6_ZFORMAT_D24_UNORM_X8_UINT; + case PIPE_FORMAT_Z16_UNORM: + return GEN6_ZFORMAT_D16_UNORM; + default: + assert(!"unknown depth format"); + return GEN6_ZFORMAT_D32_FLOAT; + } + } else { + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; + case PIPE_FORMAT_Z32_FLOAT: + return GEN6_ZFORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return GEN6_ZFORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z24X8_UNORM: + return GEN6_ZFORMAT_D24_UNORM_X8_UINT; + case PIPE_FORMAT_Z16_UNORM: + return GEN6_ZFORMAT_D16_UNORM; + default: + assert(!"unknown depth format"); + return GEN6_ZFORMAT_D32_FLOAT; + } + } +} + #endif /* ILO_FORMAT_H */ diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c index ad053564294..3bf8646b344 100644 --- a/src/gallium/drivers/ilo/ilo_render_surface.c +++ b/src/gallium/drivers/ilo/ilo_render_surface.c @@ -42,14 +42,17 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder, const struct pipe_stream_output_info *so_info, int so_index) { - struct ilo_buffer *buf = ilo_buffer(so->buffer); struct ilo_state_surface_buffer_info info; struct ilo_state_surface surf; ILO_DEV_ASSERT(builder->dev, 6, 6); memset(&info, 0, sizeof(info)); - info.buf = buf; + + info.vma = ilo_resource_get_vma(so->buffer); + info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4; + info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB; switch (so_info->output[so_index].num_components) { @@ -78,12 +81,9 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder, info.struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4; memset(&surf, 0, sizeof(surf)); ilo_state_surface_init_for_buffer(&surf, builder->dev, &info); - surf.bo = info.buf->bo; return gen6_SURFACE_STATE(builder, &surf); } @@ -482,18 +482,19 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r, return; memset(&info, 0, sizeof(info)); - info.buf = ilo_buffer(session->input->buffer); + + info.vma = ilo_resource_get_vma(session->input->buffer); + info.offset = session->input->buffer_offset; + info.size = session->input->buffer_size; + info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED; info.format = GEN6_FORMAT_RAW; info.format_size = 1; info.struct_size = 1; info.readonly = true; - info.offset = session->input->buffer_offset; - info.size = session->input->buffer_size; memset(&surf, 0, sizeof(surf)); ilo_state_surface_init_for_buffer(&surf, r->dev, &info); - surf.bo = info.buf->bo; assert(count == 1 && session->input->buffer); surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf); @@ -538,23 +539,23 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r, surface_state += base; for (i = 0; i < count; i++) { if (i < vec->global_binding.count && bindings[i].resource) { - const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource); struct ilo_state_surface_buffer_info info; struct ilo_state_surface surf; assert(bindings[i].resource->target == PIPE_BUFFER); memset(&info, 0, sizeof(info)); - info.buf = buf; + + info.vma = ilo_resource_get_vma(bindings[i].resource); + info.size = info.vma->vm_size; + info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED; info.format = GEN6_FORMAT_RAW; info.format_size = 1; info.struct_size = 1; - info.size = buf->bo_size; memset(&surf, 0, sizeof(surf)); ilo_state_surface_init_for_buffer(&surf, r->dev, &info); - surf.bo = info.buf->bo; surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf); } else { diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index be9fd10a84c..9026ba9a983 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -25,7 +25,12 @@ * Chia-I Wu <[email protected]> */ +#include "core/ilo_state_vf.h" +#include "core/ilo_state_sol.h" +#include "core/ilo_state_surface.h" + #include "ilo_screen.h" +#include "ilo_format.h" #include "ilo_resource.h" /* @@ -83,6 +88,134 @@ resource_get_cpu_init(const struct pipe_resource *templ) PIPE_BIND_STREAM_OUTPUT)) ? false : true; } +static enum gen_surface_type +get_surface_type(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return GEN6_SURFTYPE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + return GEN6_SURFTYPE_2D; + case PIPE_TEXTURE_3D: + return GEN6_SURFTYPE_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return GEN6_SURFTYPE_CUBE; + default: + assert(!"unknown texture target"); + return GEN6_SURFTYPE_NULL; + } +} + +static enum pipe_format +resource_get_image_format(const struct pipe_resource *templ, + const struct ilo_dev *dev, + bool *separate_stencil_ret) +{ + enum pipe_format format = templ->format; + bool separate_stencil; + + /* silently promote ETC1 */ + if (templ->format == PIPE_FORMAT_ETC1_RGB8) + format = PIPE_FORMAT_R8G8B8X8_UNORM; + + /* separate stencil buffers */ + separate_stencil = false; + if ((templ->bind & PIPE_BIND_DEPTH_STENCIL) && + util_format_is_depth_and_stencil(templ->format)) { + switch (templ->format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + /* Gen6 requires HiZ to be available for all levels */ + if (ilo_dev_gen(dev) >= ILO_GEN(7) || templ->last_level == 0) { + format = PIPE_FORMAT_Z32_FLOAT; + separate_stencil = true; + } + break; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + format = PIPE_FORMAT_Z24X8_UNORM; + separate_stencil = true; + break; + default: + break; + } + } + + if (separate_stencil_ret) + *separate_stencil_ret = separate_stencil; + + return format; +} + +static inline enum gen_surface_format +pipe_to_surface_format(const struct ilo_dev *dev, enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS; + case PIPE_FORMAT_Z32_FLOAT: + return GEN6_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + return GEN6_FORMAT_R24_UNORM_X8_TYPELESS; + case PIPE_FORMAT_Z16_UNORM: + return GEN6_FORMAT_R16_UNORM; + case PIPE_FORMAT_S8_UINT: + return GEN6_FORMAT_R8_UINT; + default: + return ilo_format_translate_color(dev, format); + } +} + +static void +resource_get_image_info(const struct pipe_resource *templ, + const struct ilo_dev *dev, + enum pipe_format image_format, + struct ilo_image_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->type = get_surface_type(templ->target); + + info->format = pipe_to_surface_format(dev, image_format); + info->interleaved_stencil = util_format_is_depth_and_stencil(image_format); + info->is_integer = util_format_is_pure_integer(image_format); + info->compressed = util_format_is_compressed(image_format); + info->block_width = util_format_get_blockwidth(image_format); + info->block_height = util_format_get_blockheight(image_format); + info->block_size = util_format_get_blocksize(image_format); + + info->width = templ->width0; + info->height = templ->height0; + info->depth = templ->depth0; + info->array_size = templ->array_size; + info->level_count = templ->last_level + 1; + info->sample_count = (templ->nr_samples) ? templ->nr_samples : 1; + + info->aux_disable = (templ->usage == PIPE_USAGE_STAGING); + + if (templ->bind & PIPE_BIND_LINEAR) + info->valid_tilings = 1 << GEN6_TILING_NONE; + + /* + * Tiled images must be mapped via GTT to get a linear view. Prefer linear + * images when the image size is greater than one-fourth of the mappable + * aperture. + */ + if (templ->bind & (PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_TRANSFER_READ)) + info->prefer_linear_threshold = dev->aperture_mappable / 4; + + info->bind_surface_sampler = (templ->bind & PIPE_BIND_SAMPLER_VIEW); + info->bind_surface_dp_render = (templ->bind & PIPE_BIND_RENDER_TARGET); + info->bind_surface_dp_typed = (templ->bind & + (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE)); + info->bind_zs = (templ->bind & PIPE_BIND_DEPTH_STENCIL); + info->bind_scanout = (templ->bind & PIPE_BIND_SCANOUT); + info->bind_cursor = (templ->bind & PIPE_BIND_CURSOR); +} + static enum gen_surface_tiling winsys_to_surface_tiling(enum intel_tiling_mode tiling) { @@ -178,8 +311,8 @@ tex_create_bo(struct ilo_texture *tex) if (!bo) return false; - intel_bo_unref(tex->image.bo); - tex->image.bo = bo; + intel_bo_unref(tex->vma.bo); + ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0); return true; } @@ -206,7 +339,7 @@ tex_create_separate_stencil(struct ilo_texture *tex) tex->separate_s8 = ilo_texture(s8); - assert(tex->separate_s8->image.format == PIPE_FORMAT_S8_UINT); + assert(tex->separate_s8->image_format == PIPE_FORMAT_S8_UINT); return true; } @@ -215,15 +348,16 @@ static bool tex_create_hiz(struct ilo_texture *tex) { const struct pipe_resource *templ = &tex->base; + const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height; struct ilo_screen *is = ilo_screen(tex->base.screen); struct intel_bo *bo; - bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", - tex->image.aux.bo_stride * tex->image.aux.bo_height, false); + bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", size, false); if (!bo) return false; - tex->image.aux.bo = bo; + ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096); + ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0); if (tex->imported) { unsigned lv; @@ -246,17 +380,18 @@ tex_create_hiz(struct ilo_texture *tex) static bool tex_create_mcs(struct ilo_texture *tex) { + const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height; struct ilo_screen *is = ilo_screen(tex->base.screen); struct intel_bo *bo; assert(tex->image.aux.enables == (1 << (tex->base.last_level + 1)) - 1); - bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", - tex->image.aux.bo_stride * tex->image.aux.bo_height, false); + bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", size, false); if (!bo) return false; - tex->image.aux.bo = bo; + ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096); + ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0); return true; } @@ -267,8 +402,8 @@ tex_destroy(struct ilo_texture *tex) if (tex->separate_s8) tex_destroy(tex->separate_s8); - intel_bo_unref(tex->image.bo); - intel_bo_unref(tex->image.aux.bo); + intel_bo_unref(tex->vma.bo); + intel_bo_unref(tex->aux_vma.bo); tex_free_slices(tex); FREE(tex); @@ -277,24 +412,16 @@ tex_destroy(struct ilo_texture *tex) static bool tex_alloc_bos(struct ilo_texture *tex) { - struct ilo_screen *is = ilo_screen(tex->base.screen); - if (!tex->imported && !tex_create_bo(tex)) return false; - /* allocate separate stencil resource */ - if (tex->image.separate_stencil && !tex_create_separate_stencil(tex)) - return false; - switch (tex->image.aux.type) { case ILO_IMAGE_AUX_HIZ: - if (!tex_create_hiz(tex) && - !ilo_image_disable_aux(&tex->image, &is->dev)) + if (!tex_create_hiz(tex)) return false; break; case ILO_IMAGE_AUX_MCS: - if (!tex_create_mcs(tex) && - !ilo_image_disable_aux(&tex->image, &is->dev)) + if (!tex_create_mcs(tex)) return false; break; default: @@ -304,9 +431,10 @@ tex_alloc_bos(struct ilo_texture *tex) return true; } -static bool +static struct intel_bo * tex_import_handle(struct ilo_texture *tex, - const struct winsys_handle *handle) + const struct winsys_handle *handle, + struct ilo_image_info *info) { struct ilo_screen *is = ilo_screen(tex->base.screen); const struct pipe_resource *templ = &tex->base; @@ -317,45 +445,94 @@ tex_import_handle(struct ilo_texture *tex, bo = intel_winsys_import_handle(is->dev.winsys, name, handle, tex->image.bo_height, &tiling, &pitch); - if (!bo) - return false; + /* modify image info */ + if (bo) { + const uint8_t valid_tilings = 1 << winsys_to_surface_tiling(tiling); - if (!ilo_image_init_for_imported(&tex->image, &is->dev, templ, - winsys_to_surface_tiling(tiling), pitch)) { - ilo_err("failed to import handle for texture\n"); - intel_bo_unref(bo); - return false; - } + if (info->valid_tilings && !(info->valid_tilings & valid_tilings)) { + intel_bo_unref(bo); + return NULL; + } - tex->image.bo = bo; + info->valid_tilings = valid_tilings; + info->force_bo_stride = pitch; - tex->imported = true; + /* assume imported RTs are also scanouts */ + if (!info->bind_scanout) + info->bind_scanout = (templ->usage & PIPE_BIND_RENDER_TARGET); + } - return true; + return bo; } static bool tex_init_image(struct ilo_texture *tex, - const struct winsys_handle *handle) + const struct winsys_handle *handle, + bool *separate_stencil) { struct ilo_screen *is = ilo_screen(tex->base.screen); const struct pipe_resource *templ = &tex->base; struct ilo_image *img = &tex->image; + struct intel_bo *imported_bo = NULL;; + struct ilo_image_info info; + + tex->image_format = resource_get_image_format(templ, + &is->dev, separate_stencil); + resource_get_image_info(templ, &is->dev, tex->image_format, &info); if (handle) { - if (!tex_import_handle(tex, handle)) + imported_bo = tex_import_handle(tex, handle, &info); + if (!imported_bo) return false; - } else { - ilo_image_init(img, &is->dev, templ); } - if (img->bo_height > ilo_max_resource_size / img->bo_stride) + if (!ilo_image_init(img, &is->dev, &info)) { + intel_bo_unref(imported_bo); return false; + } + + /* + * HiZ requires 8x4 alignment and some levels might need HiZ disabled. It + * is generally fine except on Gen6, where HiZ and separate stencil must be + * enabled together. For PIPE_FORMAT_Z24X8_UNORM with separate stencil, we + * can live with stencil values being interleaved for levels where HiZ is + * disabled. But it is not the case for PIPE_FORMAT_Z32_FLOAT with + * separate stencil. If HiZ was disabled for a level, we had to change the + * format to PIPE_FORMAT_Z32_FLOAT_S8X24_UINT for the level and that format + * had a different bpp. In other words, HiZ has to be available for all + * levels. + */ + if (ilo_dev_gen(&is->dev) == ILO_GEN(6) && + templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + tex->image_format == PIPE_FORMAT_Z32_FLOAT && + img->aux.enables != (1 << templ->last_level)) { + tex->image_format = templ->format; + info.format = pipe_to_surface_format(&is->dev, tex->image_format); + info.interleaved_stencil = true; + + memset(img, 0, sizeof(*img)); + if (!ilo_image_init(img, &is->dev, &info)) { + intel_bo_unref(imported_bo); + return false; + } + } + + if (img->bo_height > ilo_max_resource_size / img->bo_stride || + !ilo_vma_init(&tex->vma, &is->dev, img->bo_stride * img->bo_height, + 4096)) { + intel_bo_unref(imported_bo); + return false; + } + + if (imported_bo) { + ilo_vma_set_bo(&tex->vma, &is->dev, imported_bo, 0); + tex->imported = true; + } if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) { /* require on-the-fly tiling/untiling or format conversion */ - if (img->tiling == GEN8_TILING_W || img->separate_stencil || - img->format != templ->format) + if (img->tiling == GEN8_TILING_W || *separate_stencil || + tex->image_format != templ->format) return false; } @@ -371,6 +548,7 @@ tex_create(struct pipe_screen *screen, const struct winsys_handle *handle) { struct ilo_texture *tex; + bool separate_stencil; tex = CALLOC_STRUCT(ilo_texture); if (!tex) @@ -380,12 +558,13 @@ tex_create(struct pipe_screen *screen, tex->base.screen = screen; pipe_reference_init(&tex->base.reference, 1); - if (!tex_init_image(tex, handle)) { + if (!tex_init_image(tex, handle, &separate_stencil)) { FREE(tex); return NULL; } - if (!tex_alloc_bos(tex)) { + if (!tex_alloc_bos(tex) || + (separate_stencil && !tex_create_separate_stencil(tex))) { tex_destroy(tex); return NULL; } @@ -406,7 +585,7 @@ tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle) else tiling = surface_to_winsys_tiling(tex->image.tiling); - err = intel_winsys_export_handle(is->dev.winsys, tex->image.bo, tiling, + err = intel_winsys_export_handle(is->dev.winsys, tex->vma.bo, tiling, tex->image.bo_stride, tex->image.bo_height, handle); return !err; @@ -420,13 +599,12 @@ buf_create_bo(struct ilo_buffer_resource *buf) const bool cpu_init = resource_get_cpu_init(&buf->base); struct intel_bo *bo; - bo = intel_winsys_alloc_bo(is->dev.winsys, name, - buf->buffer.bo_size, cpu_init); + bo = intel_winsys_alloc_bo(is->dev.winsys, name, buf->bo_size, cpu_init); if (!bo) return false; - intel_bo_unref(buf->buffer.bo); - buf->buffer.bo = bo; + intel_bo_unref(buf->vma.bo); + ilo_vma_set_bo(&buf->vma, &is->dev, bo, 0); return true; } @@ -434,7 +612,7 @@ buf_create_bo(struct ilo_buffer_resource *buf) static void buf_destroy(struct ilo_buffer_resource *buf) { - intel_bo_unref(buf->buffer.bo); + intel_bo_unref(buf->vma.bo); FREE(buf); } @@ -443,6 +621,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ) { const struct ilo_screen *is = ilo_screen(screen); struct ilo_buffer_resource *buf; + uint32_t alignment; unsigned size; buf = CALLOC_STRUCT(ilo_buffer_resource); @@ -471,10 +650,17 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ) ilo_dev_gen(&is->dev) < ILO_GEN(7.5)) size = align(size, 4096); - ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags); + if (templ->bind & PIPE_BIND_VERTEX_BUFFER) + size = ilo_state_vertex_buffer_size(&is->dev, size, &alignment); + if (templ->bind & PIPE_BIND_INDEX_BUFFER) + size = ilo_state_index_buffer_size(&is->dev, size, &alignment); + if (templ->bind & PIPE_BIND_STREAM_OUTPUT) + size = ilo_state_sol_buffer_size(&is->dev, size, &alignment); + + buf->bo_size = size; + ilo_vma_init(&buf->vma, &is->dev, buf->bo_size, 4096); - if (buf->buffer.bo_size < templ->width0 || - buf->buffer.bo_size > ilo_max_resource_size || + if (buf->bo_size < templ->width0 || buf->bo_size > ilo_max_resource_size || !buf_create_bo(buf)) { FREE(buf); return NULL; @@ -487,13 +673,30 @@ static boolean ilo_can_create_resource(struct pipe_screen *screen, const struct pipe_resource *templ) { + struct ilo_screen *is = ilo_screen(screen); + enum pipe_format image_format; + struct ilo_image_info info; struct ilo_image img; if (templ->target == PIPE_BUFFER) return (templ->width0 <= ilo_max_resource_size); + image_format = resource_get_image_format(templ, &is->dev, NULL); + resource_get_image_info(templ, &is->dev, image_format, &info); + memset(&img, 0, sizeof(img)); - ilo_image_init(&img, &ilo_screen(screen)->dev, templ); + ilo_image_init(&img, &ilo_screen(screen)->dev, &info); + + /* as in tex_init_image() */ + if (ilo_dev_gen(&is->dev) == ILO_GEN(6) && + templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + image_format == PIPE_FORMAT_Z32_FLOAT && + img.aux.enables != (1 << templ->last_level)) { + info.format = pipe_to_surface_format(&is->dev, templ->format); + info.interleaved_stencil = true; + memset(&img, 0, sizeof(img)); + ilo_image_init(&img, &ilo_screen(screen)->dev, &info); + } return (img.bo_height <= ilo_max_resource_size / img.bo_stride); } diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h index d602e0cbf70..8378af54741 100644 --- a/src/gallium/drivers/ilo/ilo_resource.h +++ b/src/gallium/drivers/ilo/ilo_resource.h @@ -29,8 +29,8 @@ #define ILO_RESOURCE_H #include "core/intel_winsys.h" -#include "core/ilo_buffer.h" #include "core/ilo_image.h" +#include "core/ilo_vma.h" #include "ilo_common.h" #include "ilo_screen.h" @@ -92,7 +92,10 @@ struct ilo_texture { bool imported; + enum pipe_format image_format; struct ilo_image image; + struct ilo_vma vma; + struct ilo_vma aux_vma; /* XXX thread-safety */ struct ilo_texture_slice *slices[PIPE_MAX_TEXTURE_LEVELS]; @@ -103,14 +106,15 @@ struct ilo_texture { struct ilo_buffer_resource { struct pipe_resource base; - struct ilo_buffer buffer; + uint32_t bo_size; + struct ilo_vma vma; }; -static inline struct ilo_buffer * -ilo_buffer(struct pipe_resource *res) +static inline struct ilo_buffer_resource * +ilo_buffer_resource(struct pipe_resource *res) { - return (res && res->target == PIPE_BUFFER) ? - &((struct ilo_buffer_resource *) res)->buffer : NULL; + return (struct ilo_buffer_resource *) + ((res && res->target == PIPE_BUFFER) ? res : NULL); } static inline struct ilo_texture * @@ -127,13 +131,14 @@ bool ilo_resource_rename_bo(struct pipe_resource *res); /** - * Return the bo of the resource. + * Return the VMA of the resource. */ -static inline struct intel_bo * -ilo_resource_get_bo(struct pipe_resource *res) +static inline const struct ilo_vma * +ilo_resource_get_vma(struct pipe_resource *res) { return (res->target == PIPE_BUFFER) ? - ilo_buffer(res)->bo : ilo_texture(res)->image.bo; + &((struct ilo_buffer_resource *) res)->vma : + &((struct ilo_texture *) res)->vma; } static inline struct ilo_texture_slice * diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 94105559b80..ab4d1377c9f 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -193,6 +193,7 @@ ilo_get_compute_param(struct pipe_screen *screen, uint32_t max_clock_frequency; uint32_t max_compute_units; uint32_t images_supported; + uint32_t subgroup_size; } val; const void *ptr; int size; @@ -284,6 +285,13 @@ ilo_get_compute_param(struct pipe_screen *screen, ptr = &val.images_supported; size = sizeof(val.images_supported); break; + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + /* best case is actually SIMD32 */ + val.subgroup_size = 16; + + ptr = &val.subgroup_size; + size = sizeof(val.subgroup_size); + break; default: ptr = NULL; size = 0; @@ -443,6 +451,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: return 0; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: return true; case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_QUERY_LOD: @@ -457,6 +467,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_DEPTH_BOUNDS_TEST: return 0; case PIPE_CAP_VENDOR_ID: @@ -665,13 +677,6 @@ ilo_screen_fence_finish(struct pipe_screen *screen, return signaled; } -static boolean -ilo_screen_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence) -{ - return ilo_screen_fence_finish(screen, fence, 0); -} - /** * Create a fence for \p bo. When \p bo is not NULL, it must be submitted * before waited on or checked. @@ -738,7 +743,6 @@ ilo_screen_create(struct intel_winsys *ws) is->base.flush_frontbuffer = NULL; is->base.fence_reference = ilo_screen_fence_reference; - is->base.fence_signalled = ilo_screen_fence_signalled; is->base.fence_finish = ilo_screen_fence_finish; is->base.get_driver_query_info = NULL; diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 63534f33fa7..d89765a9d23 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -379,13 +379,12 @@ finalize_cbuf_state(struct ilo_context *ilo, u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size, cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource); - cbuf->cso[i].info.buf = ilo_buffer(cbuf->cso[i].resource); + cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource); cbuf->cso[i].info.offset = offset; memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface)); ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface, ilo->dev, &cbuf->cso[i].info); - cbuf->cso[i].surface.bo = cbuf->cso[i].info.buf->bo; ilo->state_vector.dirty |= ILO_DIRTY_CBUF; } @@ -466,11 +465,9 @@ finalize_index_buffer(struct ilo_context *ilo) memset(&info, 0, sizeof(info)); if (vec->ib.hw_resource) { - info.buf = ilo_buffer(vec->ib.hw_resource); - info.size = info.buf->bo_size; + info.vma = ilo_resource_get_vma(vec->ib.hw_resource); + info.size = info.vma->vm_size; info.format = ilo_translate_index_size(vec->ib.hw_index_size); - - vec->ib.ib.bo = info.buf->bo; } ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info); @@ -532,13 +529,11 @@ finalize_vertex_buffers(struct ilo_context *ilo) const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx]; if (cso->buffer) { - info.buf = ilo_buffer(cso->buffer); + info.vma = ilo_resource_get_vma(cso->buffer); info.offset = cso->buffer_offset; - info.size = info.buf->bo_size; + info.size = info.vma->vm_size - cso->buffer_offset; info.stride = cso->stride; - - vec->vb.vb[i].bo = info.buf->bo; } else { memset(&info, 0, sizeof(info)); } @@ -1566,24 +1561,23 @@ ilo_set_constant_buffer(struct pipe_context *pipe, cso->info.size = buf[i].buffer_size; if (buf[i].buffer) { - cso->info.buf = ilo_buffer(buf[i].buffer); + cso->info.vma = ilo_resource_get_vma(buf[i].buffer); cso->info.offset = buf[i].buffer_offset; memset(&cso->surface, 0, sizeof(cso->surface)); ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info); - cso->surface.bo = cso->info.buf->bo; cso->user_buffer = NULL; cbuf->enabled_mask |= 1 << (index + i); } else if (buf[i].user_buffer) { - cso->info.buf = NULL; + cso->info.vma = NULL; /* buffer_offset does not apply for user buffer */ cso->user_buffer = buf[i].user_buffer; cbuf->enabled_mask |= 1 << (index + i); } else { - cso->info.buf = NULL; + cso->info.vma = NULL; cso->info.size = 0; cso->user_buffer = NULL; @@ -1596,7 +1590,7 @@ ilo_set_constant_buffer(struct pipe_context *pipe, pipe_resource_reference(&cso->resource, NULL); - cso->info.buf = NULL; + cso->info.vma = NULL; cso->info.size = 0; cso->user_buffer = NULL; @@ -1705,10 +1699,11 @@ ilo_set_framebuffer_state(struct pipe_context *pipe, if (state->zsbuf) { const struct ilo_surface_cso *cso = (const struct ilo_surface_cso *) state->zsbuf; + const struct ilo_texture *tex = ilo_texture(cso->base.texture); - fb->has_hiz = cso->u.zs.hiz_bo; + fb->has_hiz = cso->u.zs.hiz_vma; fb->depth_offset_format = - ilo_state_zs_get_depth_format(&cso->u.zs, dev); + ilo_format_translate_depth(dev, tex->image_format); } else { fb->has_hiz = false; fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT; @@ -1854,10 +1849,11 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader, } static void -ilo_set_shader_resources(struct pipe_context *pipe, - unsigned start, unsigned count, - struct pipe_surface **surfaces) +ilo_set_shader_images(struct pipe_context *pipe, unsigned shader, + unsigned start, unsigned count, + struct pipe_image_view **views) { +#if 0 struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; struct ilo_resource_state *dst = &vec->resource; unsigned i; @@ -1886,6 +1882,7 @@ ilo_set_shader_resources(struct pipe_context *pipe, } vec->dirty |= ILO_DIRTY_RESOURCE; +#endif } static void @@ -1945,12 +1942,11 @@ ilo_create_stream_output_target(struct pipe_context *pipe, target->base.buffer_size = buffer_size; memset(&info, 0, sizeof(info)); - info.buf = ilo_buffer(res); + info.vma = ilo_resource_get_vma(res); info.offset = buffer_offset; info.size = buffer_size; ilo_state_sol_buffer_init(&target->sb, dev, &info); - target->sb.bo = info.buf->bo; return &target->base; } @@ -2018,18 +2014,17 @@ ilo_create_sampler_view(struct pipe_context *pipe, struct ilo_state_surface_buffer_info info; memset(&info, 0, sizeof(info)); - info.buf = ilo_buffer(res); + info.vma = ilo_resource_get_vma(res); + info.offset = templ->u.buf.first_element * info.struct_size; + info.size = (templ->u.buf.last_element - + templ->u.buf.first_element + 1) * info.struct_size; info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER; info.format = ilo_format_translate_color(dev, templ->format); info.format_size = util_format_get_blocksize(templ->format); info.struct_size = info.format_size; info.readonly = true; - info.offset = templ->u.buf.first_element * info.struct_size; - info.size = (templ->u.buf.last_element - - templ->u.buf.first_element + 1) * info.struct_size; ilo_state_surface_init_for_buffer(&view->surface, dev, &info); - view->surface.bo = info.buf->bo; } else { struct ilo_texture *tex = ilo_texture(res); struct ilo_state_surface_image_info info; @@ -2042,32 +2037,31 @@ ilo_create_sampler_view(struct pipe_context *pipe, } memset(&info, 0, sizeof(info)); + info.img = &tex->image; + info.level_base = templ->u.tex.first_level; + info.level_count = templ->u.tex.last_level - + templ->u.tex.first_level + 1; + info.slice_base = templ->u.tex.first_layer; + info.slice_count = templ->u.tex.last_layer - + templ->u.tex.first_layer + 1; + info.vma = &tex->vma; info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER; + info.type = tex->image.type; if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && - tex->image.separate_stencil) { + tex->separate_s8) { info.format = ilo_format_translate_texture(dev, PIPE_FORMAT_Z32_FLOAT); } else { info.format = ilo_format_translate_texture(dev, templ->format); } - info.is_cube_map = (tex->image.target == PIPE_TEXTURE_CUBE || - tex->image.target == PIPE_TEXTURE_CUBE_ARRAY); info.is_array = util_resource_is_array_texture(&tex->base); info.readonly = true; - info.level_base = templ->u.tex.first_level; - info.level_count = templ->u.tex.last_level - - templ->u.tex.first_level + 1; - info.slice_base = templ->u.tex.first_layer; - info.slice_count = templ->u.tex.last_layer - - templ->u.tex.first_layer + 1; - ilo_state_surface_init_for_image(&view->surface, dev, &info); - view->surface.bo = info.img->bo; } return &view->base; @@ -2111,18 +2105,27 @@ ilo_create_surface(struct pipe_context *pipe, assert(tex->base.target != PIPE_BUFFER); memset(&info, 0, sizeof(info)); + info.img = &tex->image; - info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER; - info.format = ilo_format_translate_render(dev, templ->format); - info.is_array = util_resource_is_array_texture(&tex->base); info.level_base = templ->u.tex.level; info.level_count = 1; info.slice_base = templ->u.tex.first_layer; info.slice_count = templ->u.tex.last_layer - templ->u.tex.first_layer + 1; + info.vma = &tex->vma; + if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level)) + info.aux_vma = &tex->aux_vma; + + info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER; + + info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ? + GEN6_SURFTYPE_2D : tex->image.type; + + info.format = ilo_format_translate_render(dev, templ->format); + info.is_array = util_resource_is_array_texture(&tex->base); + ilo_state_surface_init_for_image(&surf->u.rt, dev, &info); - surf->u.rt.bo = info.img->bo; } else { struct ilo_state_zs_info info; @@ -2131,13 +2134,19 @@ ilo_create_surface(struct pipe_context *pipe, memset(&info, 0, sizeof(info)); if (templ->format == PIPE_FORMAT_S8_UINT) { + info.s_vma = &tex->vma; info.s_img = &tex->image; } else { + info.z_vma = &tex->vma; info.z_img = &tex->image; - info.s_img = (tex->separate_s8) ? &tex->separate_s8->image : NULL; - info.hiz_enable = - ilo_image_can_enable_aux(&tex->image, templ->u.tex.level); + if (tex->separate_s8) { + info.s_vma = &tex->separate_s8->vma; + info.s_img = &tex->separate_s8->image; + } + + if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level)) + info.hiz_vma = &tex->aux_vma; } info.level = templ->u.tex.level; @@ -2145,16 +2154,15 @@ ilo_create_surface(struct pipe_context *pipe, info.slice_count = templ->u.tex.last_layer - templ->u.tex.first_layer + 1; - ilo_state_zs_init(&surf->u.zs, dev, &info); + info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ? + GEN6_SURFTYPE_2D : tex->image.type; - if (info.z_img) { - surf->u.zs.depth_bo = info.z_img->bo; - if (info.hiz_enable) - surf->u.zs.hiz_bo = info.z_img->aux.bo; - } + info.format = ilo_format_translate_depth(dev, tex->image_format); + if (ilo_dev_gen(dev) == ILO_GEN(6) && !info.hiz_vma && + tex->image_format == PIPE_FORMAT_Z24X8_UNORM) + info.format = GEN6_ZFORMAT_D24_UNORM_S8_UINT; - if (info.s_img) - surf->u.zs.stencil_bo = info.s_img->bo; + ilo_state_zs_init(&surf->u.zs, dev, &info); } return &surf->base; @@ -2339,7 +2347,7 @@ ilo_init_state_functions(struct ilo_context *ilo) ilo->base.set_scissor_states = ilo_set_scissor_states; ilo->base.set_viewport_states = ilo_set_viewport_states; ilo->base.set_sampler_views = ilo_set_sampler_views; - ilo->base.set_shader_resources = ilo_set_shader_resources; + ilo->base.set_shader_images = ilo_set_shader_images; ilo->base.set_vertex_buffers = ilo_set_vertex_buffers; ilo->base.set_index_buffer = ilo_set_index_buffer; @@ -2451,7 +2459,6 @@ void ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, struct pipe_resource *res) { - struct intel_bo *bo = ilo_resource_get_bo(res); uint32_t states = 0; unsigned sh, i; @@ -2482,10 +2489,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, for (i = 0; i < vec->so.count; i++) { if (vec->so.states[i]->buffer == res) { - struct ilo_stream_output_target *target = - (struct ilo_stream_output_target *) vec->so.states[i]; - - target->sb.bo = ilo_buffer(res)->bo; states |= ILO_DIRTY_SO; break; } @@ -2503,7 +2506,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, [PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS, [PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS, }; - cso->surface.bo = bo; states |= view_dirty_bits[sh]; break; @@ -2515,7 +2517,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i]; if (cbuf->resource == res) { - cbuf->surface.bo = bo; states |= ILO_DIRTY_CBUF; break; } @@ -2528,7 +2529,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, (struct ilo_surface_cso *) vec->resource.states[i]; if (cso->base.texture == res) { - cso->u.rt.bo = bo; states |= ILO_DIRTY_RESOURCE; break; } @@ -2540,27 +2540,19 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec, struct ilo_surface_cso *cso = (struct ilo_surface_cso *) vec->fb.state.cbufs[i]; if (cso && cso->base.texture == res) { - cso->u.rt.bo = bo; states |= ILO_DIRTY_FB; break; } } - if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) { - struct ilo_surface_cso *cso = - (struct ilo_surface_cso *) vec->fb.state.zsbuf; - - cso->u.zs.depth_bo = bo; - + if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) states |= ILO_DIRTY_FB; - } } for (i = 0; i < vec->cs_resource.count; i++) { struct ilo_surface_cso *cso = (struct ilo_surface_cso *) vec->cs_resource.states[i]; if (cso->base.texture == res) { - cso->u.rt.bo = bo; states |= ILO_DIRTY_CS_RESOURCE; break; } diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 3e6fd8a2554..66c93007eb1 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -202,7 +202,7 @@ struct ilo_cbuf_state { }; struct ilo_resource_state { - struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; + struct pipe_surface *states[PIPE_MAX_SHADER_IMAGES]; unsigned count; }; diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c index ec41473f94a..5abd3bebf68 100644 --- a/src/gallium/drivers/ilo/ilo_transfer.c +++ b/src/gallium/drivers/ilo/ilo_transfer.c @@ -100,7 +100,7 @@ resource_get_transfer_method(struct pipe_resource *res, m = ILO_TRANSFER_MAP_SW_ZS; need_convert = true; } - } else if (tex->image.format != tex->base.format) { + } else if (tex->image_format != tex->base.format) { m = ILO_TRANSFER_MAP_SW_CONVERT; need_convert = true; } @@ -268,23 +268,27 @@ xfer_alloc_staging_sys(struct ilo_transfer *xfer) static void * xfer_map(struct ilo_transfer *xfer) { + const struct ilo_vma *vma; void *ptr; switch (xfer->method) { case ILO_TRANSFER_MAP_CPU: - ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource), - xfer->base.usage & PIPE_TRANSFER_WRITE); + vma = ilo_resource_get_vma(xfer->base.resource); + ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE); break; case ILO_TRANSFER_MAP_GTT: - ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource)); + vma = ilo_resource_get_vma(xfer->base.resource); + ptr = intel_bo_map_gtt(vma->bo); break; case ILO_TRANSFER_MAP_GTT_ASYNC: - ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource)); + vma = ilo_resource_get_vma(xfer->base.resource); + ptr = intel_bo_map_gtt_async(vma->bo); break; case ILO_TRANSFER_MAP_STAGING: { const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen); - struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res); + + vma = ilo_resource_get_vma(xfer->staging.res); /* * We want a writable, optionally persistent and coherent, mapping @@ -292,25 +296,29 @@ xfer_map(struct ilo_transfer *xfer) * this turns out to be fairly simple. */ if (is->dev.has_llc) - ptr = intel_bo_map(bo, true); + ptr = intel_bo_map(vma->bo, true); else - ptr = intel_bo_map_gtt(bo); + ptr = intel_bo_map_gtt(vma->bo); if (ptr && xfer->staging.res->target == PIPE_BUFFER) ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT); - } break; case ILO_TRANSFER_MAP_SW_CONVERT: case ILO_TRANSFER_MAP_SW_ZS: + vma = NULL; ptr = xfer->staging.sys; break; default: assert(!"unknown mapping method"); + vma = NULL; ptr = NULL; break; } + if (ptr && vma) + ptr = (void *) ((char *) ptr + vma->bo_offset); + return ptr; } @@ -324,10 +332,10 @@ xfer_unmap(struct ilo_transfer *xfer) case ILO_TRANSFER_MAP_CPU: case ILO_TRANSFER_MAP_GTT: case ILO_TRANSFER_MAP_GTT_ASYNC: - intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource)); + intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo); break; case ILO_TRANSFER_MAP_STAGING: - intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res)); + intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo); break; default: break; @@ -541,9 +549,12 @@ tex_staging_sys_map_bo(struct ilo_texture *tex, if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE || !linear_view)) - ptr = intel_bo_map(tex->image.bo, !for_read_back); + ptr = intel_bo_map(tex->vma.bo, !for_read_back); else - ptr = intel_bo_map_gtt(tex->image.bo); + ptr = intel_bo_map_gtt(tex->vma.bo); + + if (ptr) + ptr = (void *) ((char *) ptr + tex->vma.bo_offset); return ptr; } @@ -551,7 +562,7 @@ tex_staging_sys_map_bo(struct ilo_texture *tex, static void tex_staging_sys_unmap_bo(struct ilo_texture *tex) { - intel_bo_unmap(tex->image.bo); + intel_bo_unmap(tex->vma.bo); } static bool @@ -590,7 +601,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { - assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM); + assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM); dst_cpp = 4; dst_s8_pos = 3; @@ -598,7 +609,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, } else { assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); - assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT); + assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT); dst_cpp = 8; dst_s8_pos = 4; @@ -644,7 +655,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, tex_staging_sys_unmap_bo(s8_tex); } else { - assert(tex->image.format == PIPE_FORMAT_S8_UINT); + assert(tex->image_format == PIPE_FORMAT_S8_UINT); for (slice = 0; slice < box->depth; slice++) { unsigned mem_x, mem_y; @@ -717,7 +728,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { - assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM); + assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM); src_cpp = 4; src_s8_pos = 3; @@ -725,7 +736,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, } else { assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); - assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT); + assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT); src_cpp = 8; src_s8_pos = 4; @@ -771,7 +782,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, tex_staging_sys_unmap_bo(s8_tex); } else { - assert(tex->image.format == PIPE_FORMAT_S8_UINT); + assert(tex->image_format == PIPE_FORMAT_S8_UINT); for (slice = 0; slice < box->depth; slice++) { unsigned mem_x, mem_y; @@ -829,8 +840,8 @@ tex_staging_sys_convert_write(struct ilo_texture *tex, else dst_slice_stride = 0; - if (unlikely(tex->image.format == tex->base.format)) { - util_copy_box(dst, tex->image.format, tex->image.bo_stride, + if (unlikely(tex->image_format == tex->base.format)) { + util_copy_box(dst, tex->image_format, tex->image.bo_stride, dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth, xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride, 0, 0, 0); @@ -842,7 +853,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex, switch (tex->base.format) { case PIPE_FORMAT_ETC1_RGB8: - assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM); + assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM); for (slice = 0; slice < box->depth; slice++) { const void *src = @@ -1055,7 +1066,7 @@ choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer) return false; /* see if we can avoid blocking */ - if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) { + if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) { bool resource_renamed; if (!xfer_unblock(xfer, &resource_renamed)) { @@ -1078,11 +1089,11 @@ static void buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res, unsigned usage, int offset, int size, const void *data) { - struct ilo_buffer *buf = ilo_buffer(res); + struct ilo_buffer_resource *buf = ilo_buffer_resource(res); bool need_submit; /* see if we can avoid blocking */ - if (is_bo_busy(ilo, buf->bo, &need_submit)) { + if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) { bool unblocked = false; if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) && @@ -1103,9 +1114,12 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res, templ.bind = PIPE_BIND_TRANSFER_WRITE; staging = ilo->base.screen->resource_create(ilo->base.screen, &templ); if (staging) { + const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging); struct pipe_box staging_box; - intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data); + /* offset by staging_vma->bo_offset for pwrite */ + intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset, + size, data); u_box_1d(0, size, &staging_box); ilo_blitter_blt_copy_resource(ilo->blitter, @@ -1123,7 +1137,8 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res, ilo_cp_submit(ilo->cp, "syncing for pwrites"); } - intel_bo_pwrite(buf->bo, offset, size, data); + /* offset by buf->vma.bo_offset for pwrite */ + intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data); } static void |