From 2638250fecab821c27c95d7bc48a212cd269c708 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 15 Jun 2017 01:42:46 +0200 Subject: radeonsi: flush CB after MSAA only when transitioning from CB to textures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main flush before texturing is done after the FMASK decompress pass. CB after MSAA rendering is not flushed in set_framebuffer_state and also not in memory_barrier if the current color buffer is MSAA. We fully rely on the FMASK decompress pass for the flushing. Some CB decompress and resolve passes need an explicit flush before and after. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_blit.c | 29 +++++++++++++++++++++ src/gallium/drivers/radeonsi/si_state.c | 45 +++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index d137a1fbec3..631676bcd79 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -405,6 +405,14 @@ si_decompress_depth(struct si_context *sctx, tex->stencil_dirty_level_mask = 0; } } + /* set_framebuffer_state takes care of coherency for single-sample. + * The DB->CB copy uses CB for the final writes. + */ + if (copy_planes && tex->resource.b.b.nr_samples > 1) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; + } } static void @@ -487,10 +495,19 @@ static void si_blit_decompress_color(struct pipe_context *ctx, surf_tmpl.u.tex.last_layer = layer; cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); + /* Required before and after FMASK and DCC_DECOMPRESS. */ + if (custom_blend == sctx->custom_blend_fmask_decompress || + custom_blend == sctx->custom_blend_dcc_decompress) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + si_blitter_begin(ctx, SI_DECOMPRESS); util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); si_blitter_end(ctx); + if (custom_blend == sctx->custom_blend_fmask_decompress || + custom_blend == sctx->custom_blend_dcc_decompress) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + pipe_surface_reference(&cbsurf, NULL); } @@ -503,6 +520,10 @@ static void si_blit_decompress_color(struct pipe_context *ctx, sctx->decompression_enabled = false; sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; + + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_INV_VMEM_L1; } static void @@ -1157,6 +1178,9 @@ static void si_do_CB_resolve(struct si_context *sctx, unsigned dst_level, unsigned dst_z, enum pipe_format format) { + /* Required before and after CB_RESOLVE. */ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, @@ -1164,6 +1188,11 @@ static void si_do_CB_resolve(struct si_context *sctx, ~0, sctx->custom_blend_resolve, format); si_blitter_end(&sctx->b.b); + + /* Flush caches for possible texturing. */ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_INV_VMEM_L1; } static bool do_hardware_msaa_resolve(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 193816d2bf7..a0d790ac2a9 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2530,11 +2530,18 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * - shader write -> FB read * * DB caches are flushed on demand (using si_decompress_textures). + * + * When MSAA is enabled, CB and TC caches are flushed on demand + * (after FMASK decompression). Shader write -> FB read transitions + * cannot happen for MSAA textures, because MSAA shader images are + * not supported. */ - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_CS_PARTIAL_FLUSH; + if (sctx->framebuffer.nr_samples <= 1) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; + } + sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; /* u_blitter doesn't invoke depth decompression when it does multiple * blits in a row, but the only case when it matters for DB is when @@ -2542,8 +2549,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * individual generate_mipmap blits. * Note that lower mipmap levels aren't compressed. */ - if (sctx->generate_mipmap_for_depth) - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB; + if (sctx->generate_mipmap_for_depth) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_DB; + } /* Take the maximum of the old and new count. If the new count is lower, * dirtying is needed to disable the unbound colorbuffers. @@ -3961,9 +3971,12 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) { struct si_context *sctx = (struct si_context *)ctx; - sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_FLUSH_AND_INV_CB; + /* Multisample surfaces are flushed in si_decompress_textures. */ + if (sctx->framebuffer.nr_samples <= 1) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_FLUSH_AND_INV_CB; + } sctx->framebuffer.do_update_surf_dirtiness = true; } @@ -4001,12 +4014,16 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } - /* Depth and stencil are flushed in si_decompress_textures when needed. */ - if (flags & PIPE_BARRIER_FRAMEBUFFER) - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + /* MSAA color, any depth and any stencil are flushed in + * si_decompress_textures when needed. + */ + if (flags & PIPE_BARRIER_FRAMEBUFFER && + sctx->framebuffer.nr_samples <= 1) { + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_WRITEBACK_GLOBAL_L2; + } - if (flags & (PIPE_BARRIER_FRAMEBUFFER | - PIPE_BARRIER_INDIRECT_BUFFER)) + if (flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } -- cgit v1.2.3