diff options
author | Marek Olšák <[email protected]> | 2016-10-11 23:19:46 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-10-13 19:00:51 +0200 |
commit | d4d9ec55c589156df4edc227a86b4a8c41048d58 (patch) | |
tree | 646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers/radeonsi/si_state.c | |
parent | a077185ea9d685967844b68aa09da6bd8aa430da (diff) |
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.
Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.
This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.
I don't see a measurable increase in performance though.
(I tested Talos Principle and DiRT: Showdown, the latter is improved by
0.5%, which is almost noise, and it originally used layered Z16,
so at least we know that Z16 promoted to Z32F isn't slower now)
Tested-by: Edmondo Tommasina <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 39 |
1 files changed, 35 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ad65fc22f60..b23749c6d89 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx) if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) return; + /* Use the user format, not db_render_format, so that the polygon + * offset behaves as expected by applications. + */ switch (sctx->framebuffer.state.zsbuf->texture->format) { case PIPE_FORMAT_Z16_UNORM: si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); @@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx, uint64_t z_offs, s_offs; uint32_t db_htile_data_base, db_htile_surface; - format = si_translate_dbformat(rtex->resource.b.b.format); + format = si_translate_dbformat(rtex->db_render_format); if (format == V_028040_Z_INVALID) { R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); @@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx, z_offs += rtex->surface.level[level].offset; s_offs += rtex->surface.stencil_level[level].offset; - db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); + db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); z_info = S_028040_FORMAT(format); if (rtex->resource.b.b.nr_samples > 1) { @@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx, */ if (rtex->resource.b.b.nr_samples <= 1) s_info |= S_028044_ALLOW_EXPCLEAR(1); - } else - /* Use all of the htile_buffer for depth if there's no stencil. */ + } else if (!rtex->tc_compatible_htile) { + /* Use all of the htile_buffer for depth if there's no stencil. + * This must not be set when TC-compatible HTILE is enabled + * due to a hw bug. + */ s_info |= S_028044_TILE_STENCIL_DISABLE(1); + } uint64_t va = rtex->htile_buffer->gpu_address; db_htile_data_base = va >> 8; db_htile_surface = S_028ABC_FULL_CACHE(1); + + if (rtex->tc_compatible_htile) { + db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); + + switch (rtex->resource.b.b.nr_samples) { + case 0: + case 1: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); + break; + case 2: + case 4: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); + break; + case 8: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); + break; + default: + assert(0); + } + } } else { db_htile_data_base = 0; db_htile_surface = 0; @@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf) { surf = (struct r600_surface*)state->zsbuf; + rtex = (struct r600_texture*)surf->base.texture; if (!surf->depth_initialized) { si_init_depth_surface(sctx, surf); @@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx, surflevel = tmp->surface.level; if (tmp->db_compatible) { + if (!view->is_stencil_sampler) + pipe_format = tmp->db_render_format; + switch (pipe_format) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: pipe_format = PIPE_FORMAT_Z32_FLOAT; |