freedreno: avoid stalling at ringbuffer wraparound

Because of how the tiling works, we can't really flush at arbitrary points very easily. So wraparound is handled by resetting to top of ringbuffer. Previously this would stall until current rendering is complete. Instead cycle through multiple ringbuffers to avoid a stall. Signed-off-by: Rob Clark <[email protected]>
author: Rob Clark <[email protected]> 2013-09-06 13:20:46 -0400
committer: Rob Clark <[email protected]> 2013-09-14 13:31:58 -0400
commit: 6e9c386d16b3b38be6d1496758ef983b64744844 (patch)
tree: dd9df0e10d4548821ff27d2904d1994434212073
parent: ca505303a72970f40792f16d79eedab35b27b6ed (diff)
2 files changed, 41 insertions, 22 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 1d03351f041..96e1ef6e579 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -34,27 +34,32 @@
 #include "freedreno_gmem.h"
 #include "freedreno_util.h"
 
-/* there are two cases where we currently need to wait for render complete:
- * 1) pctx->flush() .. since at the moment we have no way for DDX to sync
- *    the presentation blit with the 3d core
- * 2) wrap-around for ringbuffer.. possibly we can do something more
- *    Intelligent here.  Right now we need to ensure there is enough room
- *    at the end of the drawcmds in the cmdstream buffer for all the per-
- *    tile cmds.  We do this the lamest way possible, by making the ringbuffer
- *    big, and flushing and resetting back to the beginning if we get too
- *    close to the end.
- */
 static void
-fd_context_wait(struct pipe_context *pctx)
+fd_context_next_rb(struct pipe_context *pctx)
 {
 	struct fd_context *ctx = fd_context(pctx);
-	uint32_t ts = fd_ringbuffer_timestamp(ctx->ring);
+	struct fd_ringbuffer *ring;
+	uint32_t ts;
+
+	fd_ringmarker_del(ctx->draw_start);
+	fd_ringmarker_del(ctx->draw_end);
+
+	/* grab next ringbuffer: */
+	ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
 
-	DBG("wait: %u", ts);
+	/* wait for new rb to be idle: */
+	ts = fd_ringbuffer_timestamp(ring);
+	if (ts) {
+		DBG("wait: %u", ts);
+		fd_pipe_wait(ctx->screen->pipe, ts);
+	}
+
+	fd_ringbuffer_reset(ring);
+
+	ctx->draw_start = fd_ringmarker_new(ring);
+	ctx->draw_end = fd_ringmarker_new(ring);
 
-	fd_pipe_wait(ctx->screen->pipe, ts);
-	fd_ringbuffer_reset(ctx->ring);
-	fd_ringmarker_mark(ctx->draw_start);
+	ctx->ring = ring;
 }
 
 /* emit accumulated render cmds, needed for example if render target has
@@ -79,7 +84,7 @@ fd_context_render(struct pipe_context *pctx)
 	 * wrap around:
 	 */
 	if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
-		fd_context_wait(pctx);
+		fd_context_next_rb(pctx);
 
 	ctx->needs_flush = false;
 	ctx->cleared = ctx->restore = ctx->resolve = 0;
@@ -131,6 +136,7 @@ fd_context_init(struct fd_context *ctx,
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct pipe_context *pctx;
+	int i;
 
 	ctx->screen = screen;
 
@@ -144,12 +150,13 @@ fd_context_init(struct fd_context *ctx,
 	pctx->priv = priv;
 	pctx->flush = fd_context_flush;
 
-	ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000);
-	if (!ctx->ring)
-		goto fail;
+	for (i = 0; i < ARRAY_SIZE(ctx->rings); i++) {
+		ctx->rings[i] = fd_ringbuffer_new(screen->pipe, 0x400000);
+		if (!ctx->rings[i])
+			goto fail;
+	}
 
-	ctx->draw_start = fd_ringmarker_new(ctx->ring);
-	ctx->draw_end = fd_ringmarker_new(ctx->ring);
+	fd_context_next_rb(pctx);
 
 	util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
 			16, UTIL_SLAB_SINGLETHREADED);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 3d18260445f..808e3a72f02 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -134,6 +134,18 @@ struct fd_context {
 	} gmem_reason;
 	unsigned num_draws;
 
+	/* we can't really sanely deal with wraparound point in ringbuffer
+	 * and because of the way tiling works we can't really flush at
+	 * arbitrary points (without a big performance hit).  When we get
+	 * too close to the end of the current ringbuffer, cycle to the next
+	 * one (and wait for pending rendering from next rb to complete).
+	 * We want the # of ringbuffers to be high enough that we don't
+	 * normally have to wait before resetting to the start of the next
+	 * rb.
+	 */
+	struct fd_ringbuffer *rings[4];
+	unsigned rings_idx;
+
 	struct fd_ringbuffer *ring;
 	struct fd_ringmarker *draw_start, *draw_end;
author	Rob Clark <[email protected]>	2013-09-06 13:20:46 -0400
committer	Rob Clark <[email protected]>	2013-09-14 13:31:58 -0400
commit	6e9c386d16b3b38be6d1496758ef983b64744844 (patch)
tree	dd9df0e10d4548821ff27d2904d1994434212073
parent	ca505303a72970f40792f16d79eedab35b27b6ed (diff)