summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-08-19 15:06:22 +0200
committerMarek Olšák <[email protected]>2017-08-22 13:29:47 +0200
commitaa64e24cb16cc63613ba99909059c1a0f7610ae2 (patch)
tree607b2b4b98178500f7db876f08e31b06158c4795
parent5b62eb237c5253f5315c6fd948307e7f2247de2c (diff)
radeonsi/gfx9: don't flush L2 metadata for CB if not needed
Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h23
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c13
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c11
4 files changed, 38 insertions, 17 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 1b001cc04c8..5545f5b2a5b 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -413,7 +413,8 @@ si_decompress_depth(struct si_context *sctx,
* The DB->CB copy uses CB for the final writes.
*/
if (copy_planes && tex->resource.b.b.nr_samples > 1)
- si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
+ si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+ false);
}
static void
@@ -524,7 +525,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
}
sctx->decompression_enabled = false;
- si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
+ si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
+ vi_dcc_enabled(rtex, first_level));
}
static void
@@ -1213,7 +1215,7 @@ static void si_do_CB_resolve(struct si_context *sctx,
si_blitter_end(&sctx->b.b);
/* Flush caches for possible texturing. */
- si_make_CB_shader_coherent(sctx, 1);
+ si_make_CB_shader_coherent(sctx, 1, false);
}
static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index f2a20ba4668..e5aeb9a7e88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -58,7 +58,10 @@
/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
* invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4)
-/* gaps */
+/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
+ * a CB or DB flush. */
+#define SI_CONTEXT_INV_L2_METADATA (R600_CONTEXT_PRIVATE_FLAG << 5)
+/* gap */
/* Framebuffer caches. */
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 7)
#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 8)
@@ -198,6 +201,7 @@ struct si_framebuffer {
ubyte dirty_cbufs;
bool dirty_zsbuf;
bool any_dst_linear;
+ bool CB_has_shader_readable_metadata;
};
struct si_clip_state {
@@ -612,14 +616,25 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
}
static inline void
-si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples)
+si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
+ bool shaders_read_metadata)
{
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_INV_VMEM_L1;
- /* Single-sample color is coherent with shaders on GFX9. */
- if (sctx->b.chip_class <= VI || num_samples >= 2)
+ if (sctx->b.chip_class >= GFX9) {
+ /* Single-sample color is coherent with shaders on GFX9, but
+ * L2 metadata must be flushed if shaders read metadata.
+ * (DCC, CMASK).
+ */
+ if (num_samples >= 2)
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ else if (shaders_read_metadata)
+ sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+ } else {
+ /* SI-CI-VI */
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
}
static inline void
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index d116c07ee6e..fb97052aacf 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2573,7 +2573,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
*/
if (sctx->framebuffer.nr_samples <= 1 &&
sctx->framebuffer.state.nr_cbufs)
- si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
+ si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
+ sctx->framebuffer.CB_has_shader_readable_metadata);
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
@@ -2608,6 +2609,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
sctx->framebuffer.any_dst_linear = false;
+ sctx->framebuffer.CB_has_shader_readable_metadata = false;
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
@@ -2642,6 +2644,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (rtex->surface.is_linear)
sctx->framebuffer.any_dst_linear = true;
+ if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
+ sctx->framebuffer.CB_has_shader_readable_metadata = true;
+
r600_context_add_resource_size(ctx, surf->base.texture);
p_atomic_inc(&rtex->framebuffers_bound);
@@ -4022,7 +4027,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
/* Multisample surfaces are flushed in si_decompress_textures. */
if (sctx->framebuffer.nr_samples <= 1 &&
sctx->framebuffer.state.nr_cbufs)
- si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
+ si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
+ sctx->framebuffer.CB_has_shader_readable_metadata);
}
/* This only ensures coherency for shader image/buffer stores. */
@@ -4067,8 +4073,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
sctx->framebuffer.state.nr_cbufs) {
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
- /* Single-sample color is coherent with TC on GFX9. */
- if (sctx->screen->b.chip_class <= VI)
+ if (sctx->b.chip_class <= VI)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 7bc52f29e25..1d8be49a480 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -988,13 +988,12 @@ void si_emit_cache_flush(struct si_context *sctx)
* TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.)
* TCL1 = invalidate L1
*/
+ tc_flags = 0;
- /* When flushing CB or DB, L2 metadata should always be invali-
- * dated before texturing. Invalidating L2 data is not needed
- * in some cases.
- */
- tc_flags = EVENT_TC_ACTION_ENA |
- EVENT_TC_MD_ACTION_ENA;
+ if (rctx->flags & SI_CONTEXT_INV_L2_METADATA) {
+ tc_flags = EVENT_TC_ACTION_ENA |
+ EVENT_TC_MD_ACTION_ENA;
+ }
/* Ideally flush TC together with CB/DB. */
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {