radeonsi/gfx10: make sure GDS is idle between IBs

Acked-by: Bas Nieuwenhuizen <[email protected]>
author: Marek Olšák <[email protected]> 2019-06-04 22:02:25 -0400
committer: Marek Olšák <[email protected]> 2019-07-03 15:51:13 -0400
commit: 395185912d7766749d8a2fc66eb10b0bf3a2da16 (patch)
tree: b1fb80bacb74ddd79d130c040e99faf0a18f820a
parent: 5ff3aff0d683882bd3746aa04c0296d3b728bc96 (diff)
2 files changed, 28 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 4a4a7eecd63..13ef470af3c 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -75,22 +75,21 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 {
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
 	struct radeon_winsys *ws = ctx->ws;
+	const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH |
+				    SI_CONTEXT_CS_PARTIAL_FLUSH;
 	unsigned wait_flags = 0;
 
 	if (ctx->gfx_flush_in_progress)
 		return;
 
 	if (!ctx->screen->info.kernel_flushes_tc_l2_after_ib) {
-		wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			      SI_CONTEXT_CS_PARTIAL_FLUSH |
+		wait_flags |= wait_ps_cs |
 			      SI_CONTEXT_INV_L2;
 	} else if (ctx->chip_class == GFX6) {
 		/* The kernel flushes L2 before shaders are finished. */
-		wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			      SI_CONTEXT_CS_PARTIAL_FLUSH;
+		wait_flags |= wait_ps_cs;
 	} else if (!(flags & RADEON_FLUSH_START_NEXT_GFX_IB_NOW)) {
-		wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			      SI_CONTEXT_CS_PARTIAL_FLUSH;
+		wait_flags |= wait_ps_cs;
 	}
 
 	/* Drop this flush if it's a no-op. */
@@ -162,6 +161,13 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 		if (ctx->streamout.begin_emitted) {
 			si_emit_streamout_end(ctx);
 			ctx->streamout.suspended = true;
+
+			/* Since streamout uses GDS on gfx10, we need to make
+			 * GDS idle when we leave the IB, otherwise another
+			 * process might overwrite it while our shaders are busy.
+			 */
+			if (ctx->chip_class >= GFX10)
+				wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
 		}
 	}
 
@@ -175,7 +181,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 		ctx->flags |= wait_flags;
 		ctx->emit_cache_flush(ctx);
 	}
-	ctx->gfx_last_ib_is_busy = wait_flags == 0;
+	ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs;
 
 	if (ctx->current_saved_cs) {
 		si_trace_emit(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 6b727498ea1..1eb06b7430b 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -96,6 +96,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 	struct si_context *sctx = (struct si_context *)ctx;
 	unsigned old_num_targets = sctx->streamout.num_targets;
 	unsigned i;
+	bool wait_now = false;
 
 	/* We are going to unbind the buffers. Mark which caches need to be flushed. */
 	if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
@@ -126,10 +127,19 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 			       SI_CONTEXT_INV_VCACHE;
 
 		/* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
-		if (sctx->chip_class >= GFX10)
+		if (sctx->chip_class >= GFX10) {
 			sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
-		else
+
+			/* Wait now. This is needed to make sure that GDS is not
+			 * busy at the end of IBs.
+			 *
+			 * Also, the next streamout operation will overwrite GDS,
+			 * so we need to make sure that it's idle.
+			 */
+			wait_now = true;
+		} else {
 			sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
+		}
 	}
 
 	/* All readers of the streamout targets need to be finished before we can
@@ -200,6 +210,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 	}
 	for (; i < old_num_targets; i++)
 		si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
+
+	if (wait_now)
+		sctx->emit_cache_flush(sctx);
 }
 
 static void gfx10_emit_streamout_begin(struct si_context *sctx)
author	Marek Olšák <[email protected]>	2019-06-04 22:02:25 -0400
committer	Marek Olšák <[email protected]>	2019-07-03 15:51:13 -0400
commit	395185912d7766749d8a2fc66eb10b0bf3a2da16 (patch)
tree	b1fb80bacb74ddd79d130c040e99faf0a18f820a
parent	5ff3aff0d683882bd3746aa04c0296d3b728bc96 (diff)