summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2016-07-13 09:49:53 -0400
committerRob Clark <[email protected]>2016-07-30 09:23:42 -0400
commit00bed8a794de3d80a46b65b9ab23c6df83e416a8 (patch)
tree7098b39af0e3a10665314e97487e6773c2c9312f
parentc44163876a2858aea219a08bd2e048b76953cff9 (diff)
freedreno: threaded batch flush
With the state accessed from GMEM+submit factored out of fd_context and into fd_batch, now it is possible to punt this off to a helper thread. And more importantly, since there are cases where one context might force the batch-cache to flush another context's batches (ie. when there are too many in-flight batches), using a per-context helper thread keeps various different flushes for a given context serialized. TODO as with batch-cache, there are a few places where we'll need a mutex to protect critical sections, which is completely missing at the moment. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch.c64
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch_cache.c15
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c10
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.c4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c16
-rw-r--r--src/gallium/drivers/freedreno/freedreno_state.c4
9 files changed, 99 insertions, 26 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5008f5dbe56..219e0a80988 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -40,6 +40,9 @@ batch_init(struct fd_batch *batch)
struct fd_context *ctx = batch->ctx;
unsigned size = 0;
+ if (ctx->screen->reorder)
+ util_queue_fence_init(&batch->flush_fence);
+
/* if kernel is too old to support unlimited # of cmd buffers, we
* have no option but to allocate large worst-case sizes so that
* we don't need to grow the ringbuffer. Performance is likely to
@@ -119,6 +122,9 @@ batch_fini(struct fd_batch *batch)
fd_hw_sample_reference(batch->ctx, &samp, NULL);
}
util_dynarray_fini(&batch->samples);
+
+ if (batch->ctx->screen->reorder)
+ util_queue_fence_destroy(&batch->flush_fence);
}
static void
@@ -129,7 +135,7 @@ batch_flush_reset_dependencies(struct fd_batch *batch, bool flush)
foreach_batch(dep, cache, batch->dependents_mask) {
if (flush)
- fd_batch_flush(dep);
+ fd_batch_flush(dep, false);
fd_batch_reference(&dep, NULL);
}
@@ -156,6 +162,8 @@ batch_reset(struct fd_batch *batch)
{
DBG("%p", batch);
+ fd_batch_sync(batch);
+
batch_flush_reset_dependencies(batch, false);
batch_reset_resources(batch);
@@ -197,6 +205,31 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch)
util_sprintf(buf, "fd_batch<%u>", batch->seqno);
}
+void
+fd_batch_sync(struct fd_batch *batch)
+{
+ if (!batch->ctx->screen->reorder)
+ return;
+ util_queue_job_wait(&batch->flush_fence);
+}
+
+static void
+batch_flush_func(void *job, int id)
+{
+ struct fd_batch *batch = job;
+
+ fd_gmem_render_tiles(batch);
+ batch_reset_resources(batch);
+ batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
+}
+
+static void
+batch_cleanup_func(void *job, int id)
+{
+ struct fd_batch *batch = job;
+ fd_batch_reference(&batch, NULL);
+}
+
static void
batch_flush(struct fd_batch *batch)
{
@@ -207,11 +240,25 @@ batch_flush(struct fd_batch *batch)
batch->needs_flush = false;
- batch_flush_reset_dependencies(batch, true);
+ /* close out the draw cmds by making sure any active queries are
+ * paused:
+ */
+ fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_NULL);
- fd_gmem_render_tiles(batch);
+ batch->ctx->dirty = ~0;
+ batch_flush_reset_dependencies(batch, true);
- batch_reset_resources(batch);
+ if (batch->ctx->screen->reorder) {
+ struct fd_batch *tmp = NULL;
+ fd_batch_reference(&tmp, batch);
+ util_queue_add_job(&batch->ctx->flush_queue,
+ batch, &batch->flush_fence,
+ batch_flush_func, batch_cleanup_func);
+ } else {
+ fd_gmem_render_tiles(batch);
+ batch_reset_resources(batch);
+ batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
+ }
debug_assert(batch->reference.count > 0);
@@ -222,8 +269,9 @@ batch_flush(struct fd_batch *batch)
}
}
+/* NOTE: could drop the last ref to batch */
void
-fd_batch_flush(struct fd_batch *batch)
+fd_batch_flush(struct fd_batch *batch, bool sync)
{
/* NOTE: we need to hold an extra ref across the body of flush,
* since the last ref to this batch could be dropped when cleaning
@@ -232,6 +280,8 @@ fd_batch_flush(struct fd_batch *batch)
struct fd_batch *tmp = NULL;
fd_batch_reference(&tmp, batch);
batch_flush(tmp);
+ if (sync)
+ fd_batch_sync(tmp);
fd_batch_reference(&tmp, NULL);
}
@@ -263,7 +313,7 @@ batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
*/
if (batch_depends_on(dep, batch)) {
DBG("%p: flush forced on %p!", batch, dep);
- fd_batch_flush(dep);
+ fd_batch_flush(dep, false);
} else {
struct fd_batch *other = NULL;
fd_batch_reference(&other, dep);
@@ -327,5 +377,5 @@ fd_batch_check_size(struct fd_batch *batch)
struct fd_ringbuffer *ring = batch->draw;
if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) ||
(fd_mesa_debug & FD_DBG_FLUSH))
- fd_batch_flush(batch);
+ fd_batch_flush(batch, true);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index 6be196534ab..047044a9538 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -28,6 +28,7 @@
#define FREEDRENO_BATCH_H_
#include "util/u_inlines.h"
+#include "util/u_queue.h"
#include "util/list.h"
#include "freedreno_util.h"
@@ -76,6 +77,8 @@ struct fd_batch {
struct fd_context *ctx;
+ struct util_queue_fence flush_fence;
+
/* do we need to mem2gmem before rendering. We don't, if for example,
* there was a glClear() that invalidated the entire previous buffer
* contents. Keep track of which buffer(s) are cleared, or needs
@@ -197,7 +200,8 @@ struct fd_batch {
struct fd_batch * fd_batch_create(struct fd_context *ctx);
void fd_batch_reset(struct fd_batch *batch);
-void fd_batch_flush(struct fd_batch *batch);
+void fd_batch_sync(struct fd_batch *batch);
+void fd_batch_flush(struct fd_batch *batch, bool sync);
void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write);
void fd_batch_check_size(struct fd_batch *batch);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/src/gallium/drivers/freedreno/freedreno_batch_cache.c
index c947a559df9..635f2a7c994 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c
@@ -128,19 +128,24 @@ uint32_t
fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
{
struct hash_entry *entry;
- uint32_t timestamp = 0;
+ struct fd_batch *last_batch = NULL;
hash_table_foreach(cache->ht, entry) {
struct fd_batch *batch = NULL;
fd_batch_reference(&batch, (struct fd_batch *)entry->data);
if (batch->ctx == ctx) {
- fd_batch_flush(batch);
- timestamp = MAX2(timestamp, fd_ringbuffer_timestamp(batch->gmem));
+ fd_batch_reference(&last_batch, batch);
+ fd_batch_flush(batch, false);
}
fd_batch_reference(&batch, NULL);
}
- return timestamp;
+ if (last_batch) {
+ fd_batch_sync(last_batch);
+ fd_batch_reference(&last_batch, NULL);
+ }
+
+ return ctx->last_fence;
}
void
@@ -238,7 +243,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
fd_batch_reference(&flush_batch, cache->batches[i]);
}
DBG("%p: too many batches! flush forced!", flush_batch);
- fd_batch_flush(flush_batch);
+ fd_batch_flush(flush_batch, true);
/* While the resources get cleaned up automatically, the flush_batch
* doesn't get removed from the dependencies of other batches, so
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 1c32cd9ae92..599f94ffec1 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -48,7 +48,7 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
if (!ctx->screen->reorder) {
struct fd_batch *batch = NULL;
fd_batch_reference(&batch, ctx->batch);
- fd_batch_flush(batch);
+ fd_batch_flush(batch, true);
timestamp = fd_ringbuffer_timestamp(batch->gmem);
fd_batch_reference(&batch, NULL);
} else {
@@ -103,6 +103,9 @@ fd_context_destroy(struct pipe_context *pctx)
DBG("");
+ if (ctx->screen->reorder)
+ util_queue_destroy(&ctx->flush_queue);
+
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
fd_bc_invalidate_context(ctx);
@@ -179,8 +182,11 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
* batches per compute job (since it isn't using tiling, so no point
* in getting involved with the re-ordering madness)..
*/
- if (!screen->reorder)
+ if (!screen->reorder) {
ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx);
+ } else {
+ util_queue_init(&ctx->flush_queue, "flush_queue", 16, 1);
+ }
fd_reset_wfi(ctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 7e25e57d43b..2d88cdcbd8c 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -114,6 +114,8 @@ struct fd_context {
struct fd_device *dev;
struct fd_screen *screen;
+ struct util_queue flush_queue;
+
struct blitter_context *blitter;
struct primconvert_context *primconvert;
@@ -161,6 +163,8 @@ struct fd_context {
*/
struct fd_batch *batch;
+ uint32_t last_fence;
+
/* Are we in process of shadowing a resource? Used to detect recursion
* in transfer_map, and skip unneeded synchronization.
*/
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index d57b6a36d8b..ed013d9d037 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -405,8 +405,6 @@ fd_gmem_render_tiles(struct fd_batch *batch)
fd_ringbuffer_flush(batch->gmem);
fd_reset_wfi(ctx);
-
- ctx->dirty = ~0;
}
/* tile needs restore if it isn't completely contained within the
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index 12d40d04cda..b61ea0d5e08 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -238,7 +238,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
* spin forever:
*/
if (hq->no_wait_cnt++ > 5)
- fd_batch_flush(rsc->write_batch);
+ fd_batch_flush(rsc->write_batch, false);
return false;
}
@@ -266,7 +266,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
struct fd_resource *rsc = fd_resource(start->prsc);
if (rsc->write_batch)
- fd_batch_flush(rsc->write_batch);
+ fd_batch_flush(rsc->write_batch, true);
/* some piglit tests at least do query with no draws, I guess: */
if (!rsc->bo)
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 0e0305885a7..a091f5f1774 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -516,12 +516,18 @@ fd_resource_transfer_map(struct pipe_context *pctx,
if (needs_flush) {
if (usage & PIPE_TRANSFER_WRITE) {
- struct fd_batch *batch;
- foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask)
- fd_batch_flush(batch);
+ struct fd_batch *batch, *last_batch = NULL;
+ foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
+ fd_batch_reference(&last_batch, batch);
+ fd_batch_flush(batch, false);
+ }
+ if (last_batch) {
+ fd_batch_sync(last_batch);
+ fd_batch_reference(&last_batch, NULL);
+ }
assert(rsc->batch_mask == 0);
} else {
- fd_batch_flush(rsc->write_batch);
+ fd_batch_flush(rsc->write_batch, true);
}
assert(!rsc->write_batch);
}
@@ -1080,7 +1086,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
struct fd_resource *rsc = fd_resource(prsc);
if (rsc->write_batch)
- fd_batch_flush(rsc->write_batch);
+ fd_batch_flush(rsc->write_batch, true);
assert(!rsc->write_batch);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index f83fd219f0a..c7d83692741 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -137,14 +137,14 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
* multiple times to the same surface), so we might as
* well go ahead and flush this one:
*/
- fd_batch_flush(old_batch);
+ fd_batch_flush(old_batch, false);
}
fd_batch_reference(&old_batch, NULL);
} else {
DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
framebuffer->cbufs[0], framebuffer->zsbuf);
- fd_batch_flush(ctx->batch);
+ fd_batch_flush(ctx->batch, false);
}
cso = &ctx->batch->framebuffer;