summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-06-15 00:34:08 +0200
committerMarek Olšák <[email protected]>2017-06-22 01:51:02 +0200
commit226361082705be990804569ee731cd3d21749deb (patch)
tree6eee370851e190e596f4c99b801560f44e008f17
parentfdca690e91f932b494611a9e8808778405138808 (diff)
radeonsi: flush DB caches only when transitioning from DB to texturing
Use the mechanism of si_decompress_textures, but instead of doing the actual decompression, just flag the DB cache flush there. This removes a lot of unnecessary DB cache flushes. Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c36
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c17
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c21
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c6
5 files changed, 56 insertions, 25 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c62efbfa7d3..74f46705b3c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -344,10 +344,6 @@ si_decompress_depth(struct si_context *sctx,
}
}
- assert(!tex->tc_compatible_htile || levels_z == 0);
- assert(!tex->tc_compatible_htile || levels_s == 0 ||
- !r600_can_sample_zs(tex, true));
-
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
@@ -384,10 +380,30 @@ si_decompress_depth(struct si_context *sctx,
}
if (inplace_planes) {
- si_blit_decompress_zs_in_place(
- sctx, tex,
- levels_z, levels_s,
- first_layer, last_layer);
+ if (!tex->tc_compatible_htile) {
+ si_blit_decompress_zs_in_place(
+ sctx, tex,
+ levels_z, levels_s,
+ first_layer, last_layer);
+ }
+
+ /* Only in-place decompression needs to flush DB caches, or
+ * when we don't decompress but TC-compatible planes are dirty.
+ */
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ /* If we flush DB caches for TC-compatible depth, the dirty
+ * state becomes 0 for the whole mipmap tree and all planes.
+ * (there is nothing else to flush)
+ */
+ if (tex->tc_compatible_htile) {
+ if (r600_can_sample_zs(tex, false))
+ tex->dirty_level_mask = 0;
+ if (r600_can_sample_zs(tex, true))
+ tex->stencil_dirty_level_mask = 0;
+ }
}
}
@@ -1352,11 +1368,15 @@ static boolean si_generate_mipmap(struct pipe_context *ctx,
rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
last_level - base_level);
+ sctx->generate_mipmap_for_depth = rtex->is_depth;
+
si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
util_blitter_generate_mipmap(sctx->blitter, tex, format,
base_level, last_level,
first_layer, last_layer);
si_blitter_end(ctx);
+
+ sctx->generate_mipmap_for_depth = false;
return true;
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a7031ec3632..acf6fb47fae 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -584,12 +584,14 @@ static bool color_needs_decompression(struct r600_texture *rtex)
(rtex->cmask.size || rtex->dcc_offset));
}
-static bool depth_needs_decompression(struct r600_texture *rtex,
- struct si_sampler_view *sview)
+static bool depth_needs_decompression(struct r600_texture *rtex)
{
- return rtex->db_compatible &&
- (!rtex->tc_compatible_htile ||
- !r600_can_sample_zs(rtex, sview->is_stencil_sampler));
+ /* If the depth/stencil texture is TC-compatible, no decompression
+ * will be done. The decompression function will only flush DB caches
+ * to make it coherent with shaders. That's necessary because the driver
+ * doesn't flush DB caches in any other case.
+ */
+ return rtex->db_compatible;
}
static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
@@ -633,9 +635,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
- struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
- if (depth_needs_decompression(rtex, rview)) {
+ if (depth_needs_decompression(rtex)) {
samplers->needs_depth_decompress_mask |= 1u << slot;
} else {
samplers->needs_depth_decompress_mask &= ~(1u << slot);
@@ -2470,7 +2471,7 @@ static void si_make_texture_handle_resident(struct pipe_context *ctx,
struct r600_texture *rtex =
(struct r600_texture *)sview->base.texture;
- if (depth_needs_decompression(rtex, sview)) {
+ if (depth_needs_decompression(rtex)) {
util_dynarray_append(
&sctx->resident_tex_needs_depth_decompress,
struct si_texture_handle *,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 0c77f933127..1c174083819 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -362,6 +362,7 @@ struct si_context {
bool db_stencil_clear:1;
bool db_stencil_disable_expclear:1;
bool occlusion_queries_disabled:1;
+ bool generate_mipmap_for_depth:1;
/* Emitted draw state. */
bool gs_tri_strip_adj_fix:1;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 831c6b8ea32..193816d2bf7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2525,15 +2525,26 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* the only client not using TC that can change textures is
* the framebuffer.
*
- * Flush all CB and DB caches here because all buffers can be used
- * for write by both TC (with shader image stores) and CB/DB.
+ * Wait for compute shaders because of possible transitions:
+ * - FB write -> shader read
+ * - shader write -> FB read
+ *
+ * DB caches are flushed on demand (using si_decompress_textures).
*/
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_CS_PARTIAL_FLUSH;
+ /* u_blitter doesn't invoke depth decompression when it does multiple
+ * blits in a row, but the only case when it matters for DB is when
+ * doing generate_mipmap. So here we flush DB manually between
+ * individual generate_mipmap blits.
+ * Note that lower mipmap levels aren't compressed.
+ */
+ if (sctx->generate_mipmap_for_depth)
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
@@ -3990,9 +4001,9 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
+ /* Depth and stencil are flushed in si_decompress_textures when needed. */
if (flags & PIPE_BARRIER_FRAMEBUFFER)
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB;
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
if (flags & (PIPE_BARRIER_FRAMEBUFFER |
PIPE_BARRIER_INDIRECT_BUFFER))
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index d039e015cc6..d13c8b7086f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1402,11 +1402,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
- if (!rtex->tc_compatible_htile)
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER &&
- (!rtex->tc_compatible_htile || !rtex->can_sample_s))
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {