diff options
author | Marek Olšák <[email protected]> | 2019-06-04 22:02:25 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-07-03 15:51:13 -0400 |
commit | 395185912d7766749d8a2fc66eb10b0bf3a2da16 (patch) | |
tree | b1fb80bacb74ddd79d130c040e99faf0a18f820a /src | |
parent | 5ff3aff0d683882bd3746aa04c0296d3b728bc96 (diff) |
radeonsi/gfx10: make sure GDS is idle between IBs
Acked-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_gfx_cs.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_streamout.c | 17 |
2 files changed, 28 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 4a4a7eecd63..13ef470af3c 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -75,22 +75,21 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, { struct radeon_cmdbuf *cs = ctx->gfx_cs; struct radeon_winsys *ws = ctx->ws; + const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH | + SI_CONTEXT_CS_PARTIAL_FLUSH; unsigned wait_flags = 0; if (ctx->gfx_flush_in_progress) return; if (!ctx->screen->info.kernel_flushes_tc_l2_after_ib) { - wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | - SI_CONTEXT_CS_PARTIAL_FLUSH | + wait_flags |= wait_ps_cs | SI_CONTEXT_INV_L2; } else if (ctx->chip_class == GFX6) { /* The kernel flushes L2 before shaders are finished. */ - wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | - SI_CONTEXT_CS_PARTIAL_FLUSH; + wait_flags |= wait_ps_cs; } else if (!(flags & RADEON_FLUSH_START_NEXT_GFX_IB_NOW)) { - wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | - SI_CONTEXT_CS_PARTIAL_FLUSH; + wait_flags |= wait_ps_cs; } /* Drop this flush if it's a no-op. */ @@ -162,6 +161,13 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, if (ctx->streamout.begin_emitted) { si_emit_streamout_end(ctx); ctx->streamout.suspended = true; + + /* Since streamout uses GDS on gfx10, we need to make + * GDS idle when we leave the IB, otherwise another + * process might overwrite it while our shaders are busy. + */ + if (ctx->chip_class >= GFX10) + wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; } } @@ -175,7 +181,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, ctx->flags |= wait_flags; ctx->emit_cache_flush(ctx); } - ctx->gfx_last_ib_is_busy = wait_flags == 0; + ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs; if (ctx->current_saved_cs) { si_trace_emit(ctx); diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 6b727498ea1..1eb06b7430b 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -96,6 +96,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, struct si_context *sctx = (struct si_context *)ctx; unsigned old_num_targets = sctx->streamout.num_targets; unsigned i; + bool wait_now = false; /* We are going to unbind the buffers. Mark which caches need to be flushed. */ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) { @@ -126,10 +127,19 @@ static void si_set_streamout_targets(struct pipe_context *ctx, SI_CONTEXT_INV_VCACHE; /* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */ - if (sctx->chip_class >= GFX10) + if (sctx->chip_class >= GFX10) { sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; - else + + /* Wait now. This is needed to make sure that GDS is not + * busy at the end of IBs. + * + * Also, the next streamout operation will overwrite GDS, + * so we need to make sure that it's idle. + */ + wait_now = true; + } else { sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH; + } } /* All readers of the streamout targets need to be finished before we can @@ -200,6 +210,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx, } for (; i < old_num_targets; i++) si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); + + if (wait_now) + sctx->emit_cache_flush(sctx); } static void gfx10_emit_streamout_begin(struct si_context *sctx) |