diff options
author | Marek Olšák <[email protected]> | 2016-10-11 23:19:46 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-10-13 19:00:51 +0200 |
commit | d4d9ec55c589156df4edc227a86b4a8c41048d58 (patch) | |
tree | 646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers/radeon | |
parent | a077185ea9d685967844b68aa09da6bd8aa430da (diff) |
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.
Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.
This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.
I don't see a measurable increase in performance though.
(I tested Talos Principle and DiRT: Showdown, the latter is improved by
0.5%, which is almost noise, and it originally used layered Z16,
so at least we know that Z16 promoted to Z32F isn't slower now)
Tested-by: Edmondo Tommasina <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_texture.c | 67 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 4 |
3 files changed, 64 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 290b228b73f..5cfcad688c4 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -245,6 +245,7 @@ struct r600_htile_info { unsigned height; unsigned xalign; unsigned yalign; + unsigned alignment; }; struct r600_texture { @@ -252,6 +253,7 @@ struct r600_texture { uint64_t size; unsigned num_level0_transfers; + enum pipe_format db_render_format; bool is_depth; bool db_compatible; bool can_sample_z; @@ -273,6 +275,7 @@ struct r600_texture { /* Depth buffer compression and fast clear. */ struct r600_htile_info htile; struct r600_resource *htile_buffer; + bool tc_compatible_htile; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; bool stencil_cleared; /* if it was cleared at least once */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 57cdbcf615d..625d091fe40 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen, struct radeon_surf *surface, const struct pipe_resource *ptex, unsigned array_mode, - bool is_flushed_depth) + bool is_flushed_depth, + bool tc_compatible_htile) { const struct util_format_description *desc = util_format_description(ptex->format); @@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen, if (!is_flushed_depth && is_depth) { surface->flags |= RADEON_SURF_ZBUFFER; + if (tc_compatible_htile && + array_mode == RADEON_SURF_MODE_2D) { + /* TC-compatible HTILE only supports Z32_FLOAT. + * Promote Z16 to Z32. DB->CB copies will convert + * the format for transfers. + */ + surface->bpe = 4; + surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; + } + if (is_stencil) { surface->flags |= RADEON_SURF_SBUFFER | RADEON_SURF_HAS_SBUFFER_MIPTREE; } } + if (rscreen->chip_class >= SI) { surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; } @@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, rtex->htile.height = height; rtex->htile.xalign = cl_width * 8; rtex->htile.yalign = cl_height * 8; + rtex->htile.alignment = base_align; return (util_max_layer(&rtex->resource.b.b, 0) + 1) * align(slice_bytes, base_align); @@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex); + uint64_t htile_size, alignment; + uint32_t clear_value; + + if (rtex->tc_compatible_htile) { + htile_size = rtex->surface.htile_size; + alignment = rtex->surface.htile_alignment; + clear_value = 0x0000030F; + } else { + htile_size = r600_texture_get_htile_size(rscreen, rtex); + alignment = rtex->htile.alignment; + clear_value = 0; + } if (!htile_size) return; rtex->htile_buffer = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, htile_size); + r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, + htile_size, alignment); if (rtex->htile_buffer == NULL) { /* this is not a fatal error as we can still keep rendering * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { - r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, - htile_size, 0, R600_COHERENCY_NONE); + r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, + 0, htile_size, clear_value, + R600_COHERENCY_NONE); } } @@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f) if (rtex->htile_buffer) fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, " - "xalign=%u, yalign=%u\n", + "xalign=%u, yalign=%u, TC_compatible = %u\n", rtex->htile_buffer->b.b.width0, rtex->htile_buffer->buf->alignment, rtex->htile.pitch, - rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign); + rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign, + rtex->tc_compatible_htile); if (rtex->dcc_offset) { fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n", @@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } + rtex->tc_compatible_htile = rtex->surface.htile_size != 0; + assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) == + rtex->tc_compatible_htile); + + /* TC-compatible HTILE only supports Z32_FLOAT. */ + if (rtex->tc_compatible_htile) + rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; + else + rtex->db_render_format = base->format; + /* Tiled depth textures utilize the non-displayable tile order. * This must be done after r600_setup_surface. * Applies to R600-Cayman. */ @@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_surf surface = {0}; + bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; + bool tc_compatible_htile = + rscreen->chip_class >= VI && + (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && + !(rscreen->debug_flags & DBG_NO_HYPERZ) && + !is_flushed_depth && + templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ + util_format_is_depth_or_stencil(templ->format); + int r; r = r600_init_surface(rscreen, &surface, templ, r600_choose_tiling(rscreen, templ), - templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); + is_flushed_depth, tc_compatible_htile); if (r) { return NULL; } @@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen else array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - r = r600_init_surface(rscreen, &surface, templ, array_mode, false); + r = r600_init_surface(rscreen, &surface, templ, array_mode, + false, false); if (r) { return NULL; } diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 7146737c826..8946209d337 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -278,6 +278,7 @@ enum radeon_feature_id { #define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) #define RADEON_SURF_FMASK (1 << 21) #define RADEON_SURF_DISABLE_DCC (1 << 22) +#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23) #define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) #define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) @@ -344,6 +345,9 @@ struct radeon_surf { uint64_t dcc_size; uint64_t dcc_alignment; + /* TC-compatible HTILE only. */ + uint64_t htile_size; + uint64_t htile_alignment; }; struct radeon_bo_list_item { |