diff options
-rw-r--r-- | src/intel/blorp/blorp.h | 5 | ||||
-rw-r--r-- | src/intel/blorp/blorp_clear.c | 153 |
2 files changed, 158 insertions, 0 deletions
diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index a1dd57118bb..478a9af5ed1 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -204,6 +204,11 @@ blorp_ccs_resolve(struct blorp_batch *batch, enum blorp_fast_clear_op resolve_op); void +blorp_ccs_ambiguate(struct blorp_batch *batch, + struct blorp_surf *surf, + uint32_t level, uint32_t layer); + +void blorp_mcs_partial_resolve(struct blorp_batch *batch, struct blorp_surf *surf, enum isl_format format, diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 8e7bc9f76a5..165dbca8c79 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -881,3 +881,156 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch, batch->blorp->exec(batch, ¶ms); } + +/** Clear a CCS to the "uncompressed" state + * + * This pass is the CCS equivalent of a "HiZ resolve". It sets the CCS values + * for a given layer/level of a surface to 0x0 which is the "uncompressed" + * state which tells the sampler to go look at the main surface. + */ +void +blorp_ccs_ambiguate(struct blorp_batch *batch, + struct blorp_surf *surf, + uint32_t level, uint32_t layer) +{ + struct blorp_params params; + blorp_params_init(¶ms); + + assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7); + + const struct isl_format_layout *aux_fmtl = + isl_format_get_layout(surf->aux_surf->format); + assert(aux_fmtl->txc == ISL_TXC_CCS); + + params.dst = (struct brw_blorp_surface_info) { + .enabled = true, + .addr = surf->aux_addr, + .view = { + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + .format = ISL_FORMAT_R32G32B32A32_UINT, + .base_level = 0, + .base_array_layer = 0, + .levels = 1, + .array_len = 1, + .swizzle = ISL_SWIZZLE_IDENTITY, + }, + }; + + uint32_t z = 0; + if (surf->surf->dim == ISL_SURF_DIM_3D) { + z = layer; + layer = 0; + } + + uint32_t offset_B, x_offset_el, y_offset_el; + isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z, + &x_offset_el, &y_offset_el); + isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb, + surf->aux_surf->row_pitch, + x_offset_el, y_offset_el, + &offset_B, &x_offset_el, &y_offset_el); + params.dst.addr.offset += offset_B; + + const uint32_t width_px = + minify(surf->aux_surf->logical_level0_px.width, level); + const uint32_t height_px = + minify(surf->aux_surf->logical_level0_px.height, level); + const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw); + const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh); + + struct isl_tile_info ccs_tile_info; + isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info); + + /* We're going to map it as a regular RGBA32_UINT surface. We need to + * downscale a good deal. We start by computing the area on the CCS to + * clear in units of Y-tiled cache lines. + */ + uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl; + if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) { + /* From the Sky Lake PRM Vol. 12 in the section on planes: + * + * "The Color Control Surface (CCS) contains the compression status + * of the cache-line pairs. The compression state of the cache-line + * pair is specified by 2 bits in the CCS. Each CCS cache-line + * represents an area on the main surface of 16x16 sets of 128 byte + * Y-tiled cache-line-pairs. CCS is always Y tiled." + * + * Each 2-bit surface element in the CCS corresponds to a single + * cache-line pair in the main surface. This means that 16x16 el block + * in the CCS maps to a Y-tiled cache line. Fortunately, CCS layouts + * are calculated with a very large alignment so we can round up to a + * whole cache line without worrying about overdraw. + */ + + /* On Broadwell and above, a CCS tile is the same as a Y tile when + * viewed at the cache-line granularity. Fortunately, the horizontal + * and vertical alignment requirements of the CCS are such that we can + * align to an entire cache line without worrying about crossing over + * from one LOD to another. + */ + const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8; + const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8; + assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0); + assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0); + + assert(x_offset_el % x_el_per_cl == 0); + assert(y_offset_el % y_el_per_cl == 0); + x_offset_cl = x_offset_el / x_el_per_cl; + y_offset_cl = y_offset_el / y_el_per_cl; + width_cl = DIV_ROUND_UP(width_el, x_el_per_cl); + height_cl = DIV_ROUND_UP(height_el, y_el_per_cl); + } else { + /* On gen7, the CCS tiling is not so nice. However, there we are + * guaranteed that we only have a single level and slice so we don't + * have to worry about it and can just align to a whole tile. + */ + assert(surf->aux_surf->logical_level0_px.depth == 1); + assert(surf->aux_surf->logical_level0_px.array_len == 1); + assert(x_offset_el == 0 && y_offset_el == 0); + const uint32_t width_tl = + DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w); + const uint32_t height_tl = + DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h); + x_offset_cl = 0; + y_offset_cl = 0; + width_cl = width_tl * 8; + height_cl = height_tl * 8; + } + + /* We're going to use a RGBA32 format so as to write data as quickly as + * possible. A y-tiled cache line will then be 1x4 px. + */ + const uint32_t x_offset_rgba_px = x_offset_cl; + const uint32_t y_offset_rgba_px = y_offset_cl * 4; + const uint32_t width_rgba_px = width_cl; + const uint32_t height_rgba_px = height_cl * 4; + + MAYBE_UNUSED bool ok = + isl_surf_init(batch->blorp->isl_dev, ¶ms.dst.surf, + .dim = ISL_SURF_DIM_2D, + .format = ISL_FORMAT_R32G32B32A32_UINT, + .width = width_rgba_px + x_offset_rgba_px, + .height = height_rgba_px + y_offset_rgba_px, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = 1, + .row_pitch = surf->aux_surf->row_pitch, + .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + .tiling_flags = ISL_TILING_Y0_BIT); + assert(ok); + + params.x0 = x_offset_rgba_px; + params.y0 = y_offset_rgba_px; + params.x1 = x_offset_rgba_px + width_rgba_px; + params.y1 = y_offset_rgba_px + height_rgba_px; + + /* A CCS value of 0 means "uncompressed." */ + memset(¶ms.wm_inputs.clear_color, 0, + sizeof(params.wm_inputs.clear_color)); + + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) + return; + + batch->blorp->exec(batch, ¶ms); +} |