diff options
author | Marek Olšák <[email protected]> | 2016-10-11 23:19:46 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-10-13 19:00:51 +0200 |
commit | d4d9ec55c589156df4edc227a86b4a8c41048d58 (patch) | |
tree | 646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers | |
parent | a077185ea9d685967844b68aa09da6bd8aa430da (diff) |
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.
Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.
This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.
I don't see a measurable increase in performance though.
(I tested Talos Principle and DiRT: Showdown, the latter is improved by
0.5%, which is almost noise, and it originally used layered Z16,
so at least we know that Z16 promoted to Z32F isn't slower now)
Tested-by: Edmondo Tommasina <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_texture.c | 67 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 39 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 3 |
8 files changed, 132 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 290b228b73f..5cfcad688c4 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -245,6 +245,7 @@ struct r600_htile_info { unsigned height; unsigned xalign; unsigned yalign; + unsigned alignment; }; struct r600_texture { @@ -252,6 +253,7 @@ struct r600_texture { uint64_t size; unsigned num_level0_transfers; + enum pipe_format db_render_format; bool is_depth; bool db_compatible; bool can_sample_z; @@ -273,6 +275,7 @@ struct r600_texture { /* Depth buffer compression and fast clear. */ struct r600_htile_info htile; struct r600_resource *htile_buffer; + bool tc_compatible_htile; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; bool stencil_cleared; /* if it was cleared at least once */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 57cdbcf615d..625d091fe40 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen, struct radeon_surf *surface, const struct pipe_resource *ptex, unsigned array_mode, - bool is_flushed_depth) + bool is_flushed_depth, + bool tc_compatible_htile) { const struct util_format_description *desc = util_format_description(ptex->format); @@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen, if (!is_flushed_depth && is_depth) { surface->flags |= RADEON_SURF_ZBUFFER; + if (tc_compatible_htile && + array_mode == RADEON_SURF_MODE_2D) { + /* TC-compatible HTILE only supports Z32_FLOAT. + * Promote Z16 to Z32. DB->CB copies will convert + * the format for transfers. + */ + surface->bpe = 4; + surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; + } + if (is_stencil) { surface->flags |= RADEON_SURF_SBUFFER | RADEON_SURF_HAS_SBUFFER_MIPTREE; } } + if (rscreen->chip_class >= SI) { surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; } @@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, rtex->htile.height = height; rtex->htile.xalign = cl_width * 8; rtex->htile.yalign = cl_height * 8; + rtex->htile.alignment = base_align; return (util_max_layer(&rtex->resource.b.b, 0) + 1) * align(slice_bytes, base_align); @@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex); + uint64_t htile_size, alignment; + uint32_t clear_value; + + if (rtex->tc_compatible_htile) { + htile_size = rtex->surface.htile_size; + alignment = rtex->surface.htile_alignment; + clear_value = 0x0000030F; + } else { + htile_size = r600_texture_get_htile_size(rscreen, rtex); + alignment = rtex->htile.alignment; + clear_value = 0; + } if (!htile_size) return; rtex->htile_buffer = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, htile_size); + r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, + htile_size, alignment); if (rtex->htile_buffer == NULL) { /* this is not a fatal error as we can still keep rendering * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { - r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, - htile_size, 0, R600_COHERENCY_NONE); + r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, + 0, htile_size, clear_value, + R600_COHERENCY_NONE); } } @@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f) if (rtex->htile_buffer) fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, " - "xalign=%u, yalign=%u\n", + "xalign=%u, yalign=%u, TC_compatible = %u\n", rtex->htile_buffer->b.b.width0, rtex->htile_buffer->buf->alignment, rtex->htile.pitch, - rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign); + rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign, + rtex->tc_compatible_htile); if (rtex->dcc_offset) { fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n", @@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } + rtex->tc_compatible_htile = rtex->surface.htile_size != 0; + assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) == + rtex->tc_compatible_htile); + + /* TC-compatible HTILE only supports Z32_FLOAT. */ + if (rtex->tc_compatible_htile) + rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; + else + rtex->db_render_format = base->format; + /* Tiled depth textures utilize the non-displayable tile order. * This must be done after r600_setup_surface. * Applies to R600-Cayman. */ @@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_surf surface = {0}; + bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; + bool tc_compatible_htile = + rscreen->chip_class >= VI && + (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && + !(rscreen->debug_flags & DBG_NO_HYPERZ) && + !is_flushed_depth && + templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ + util_format_is_depth_or_stencil(templ->format); + int r; r = r600_init_surface(rscreen, &surface, templ, r600_choose_tiling(rscreen, templ), - templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); + is_flushed_depth, tc_compatible_htile); if (r) { return NULL; } @@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen else array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - r = r600_init_surface(rscreen, &surface, templ, array_mode, false); + r = r600_init_surface(rscreen, &surface, templ, array_mode, + false, false); if (r) { return NULL; } diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 7146737c826..8946209d337 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -278,6 +278,7 @@ enum radeon_feature_id { #define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) #define RADEON_SURF_FMASK (1 << 21) #define RADEON_SURF_DISABLE_DCC (1 << 22) +#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23) #define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) #define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) @@ -344,6 +345,9 @@ struct radeon_surf { uint64_t dcc_size; uint64_t dcc_alignment; + /* TC-compatible HTILE only. */ + uint64_t htile_size; + uint64_t htile_alignment; }; struct radeon_bo_list_item { diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index c143601d55c..db41f565a94 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx, } } + assert(!tex->tc_compatible_htile || levels_z == 0); + /* We may have to allocate the flushed texture here when called from * si_decompress_subresource. */ @@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, zsbuf->u.tex.level == 0 && zsbuf->u.tex.first_layer == 0 && zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) { - if (buffers & PIPE_CLEAR_DEPTH) { + /* TC-compatible HTILE only supports depth clears to 0 or 1. */ + if (buffers & PIPE_CLEAR_DEPTH && + (!zstex->tc_compatible_htile || + depth == 0 || depth == 1)) { /* Need to disable EXPCLEAR temporarily if clearing * to a new value. */ if (!zstex->depth_cleared || zstex->depth_clear_value != depth) { @@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, si_mark_atom_dirty(sctx, &sctx->db_render_state); } - if (buffers & PIPE_CLEAR_STENCIL) { + /* TC-compatible HTILE only supports stencil clears to 0. */ + if (buffers & PIPE_CLEAR_STENCIL && + (!zstex->tc_compatible_htile || stencil == 0)) { stencil &= 0xff; /* Need to disable EXPCLEAR temporarily if clearing diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 350242aeed5..19cae65e75e 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex, state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + tex->dcc_offset + base_level_info->dcc_offset) >> 8; + } else if (tex->tc_compatible_htile) { + state[6] |= S_008F28_COMPRESSION_EN(1); + state[7] = tex->htile_buffer->gpu_address >> 8; } } @@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx, if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { struct r600_texture *rtex = (struct r600_texture*)views[i]->texture; + struct si_sampler_view *rview = (struct si_sampler_view *)views[i]; - if (rtex->db_compatible) { + if (rtex->db_compatible && + (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) { samplers->depth_texture_mask |= 1u << slot; } else { samplers->depth_texture_mask &= ~(1u << slot); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fc50205633d..b2d76994996 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4607,12 +4607,26 @@ static void tex_fetch_args( /* Pack depth comparison value */ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { + LLVMValueRef z; + if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); } else { assert(ref_pos >= 0); - address[count++] = coords[ref_pos]; + z = coords[ref_pos]; } + + /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here. + * + * It's unnecessary if the original texture format was + * Z32_FLOAT, but we don't know that here. + */ + if (ctx->screen->b.chip_class == VI) + z = radeon_llvm_saturate(bld_base, z); + + address[count++] = z; } /* Pack user derivatives */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ad65fc22f60..b23749c6d89 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx) if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) return; + /* Use the user format, not db_render_format, so that the polygon + * offset behaves as expected by applications. + */ switch (sctx->framebuffer.state.zsbuf->texture->format) { case PIPE_FORMAT_Z16_UNORM: si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); @@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx, uint64_t z_offs, s_offs; uint32_t db_htile_data_base, db_htile_surface; - format = si_translate_dbformat(rtex->resource.b.b.format); + format = si_translate_dbformat(rtex->db_render_format); if (format == V_028040_Z_INVALID) { R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); @@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx, z_offs += rtex->surface.level[level].offset; s_offs += rtex->surface.stencil_level[level].offset; - db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); + db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); z_info = S_028040_FORMAT(format); if (rtex->resource.b.b.nr_samples > 1) { @@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx, */ if (rtex->resource.b.b.nr_samples <= 1) s_info |= S_028044_ALLOW_EXPCLEAR(1); - } else - /* Use all of the htile_buffer for depth if there's no stencil. */ + } else if (!rtex->tc_compatible_htile) { + /* Use all of the htile_buffer for depth if there's no stencil. + * This must not be set when TC-compatible HTILE is enabled + * due to a hw bug. + */ s_info |= S_028044_TILE_STENCIL_DISABLE(1); + } uint64_t va = rtex->htile_buffer->gpu_address; db_htile_data_base = va >> 8; db_htile_surface = S_028ABC_FULL_CACHE(1); + + if (rtex->tc_compatible_htile) { + db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); + + switch (rtex->resource.b.b.nr_samples) { + case 0: + case 1: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); + break; + case 2: + case 4: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); + break; + case 8: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); + break; + default: + assert(0); + } + } } else { db_htile_data_base = 0; db_htile_surface = 0; @@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf) { surf = (struct r600_surface*)state->zsbuf; + rtex = (struct r600_texture*)surf->base.texture; if (!surf->depth_initialized) { si_init_depth_surface(sctx, surf); @@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx, surflevel = tmp->surface.level; if (tmp->db_compatible) { + if (!view->is_stencil_sampler) + pipe_format = tmp->db_render_format; + switch (pipe_format) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: pipe_format = PIPE_FORMAT_Z32_FLOAT; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c14e852bec2..d18137b6691 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1118,7 +1118,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; struct r600_texture *rtex = (struct r600_texture *)surf->texture; - rtex->dirty_level_mask |= 1 << surf->u.tex.level; + if (!rtex->tc_compatible_htile) + rtex->dirty_level_mask |= 1 << surf->u.tex.level; if (rtex->surface.flags & RADEON_SURF_SBUFFER) rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; |