iris: Implement PIPE_FLUSH_DEFERRED support.

(Co-authored with Chris Wilson.) Frequently, games create fences and later check them with a timeout of 0 to see if that work has completed yet. They do not want the work to be flushed immediately upon fence creation. This is what PIPE_FLUSH_DEFERRED does - it inhibits the flush at fence creation time, but still guarantees that a flush will occur later on once fence_finish() is called. Since syncpts can only occur at batch boundaries, when deferring a flush, we have to wait for the syncpt at the end of the batch being constructed. This is later than desired, but safe if blocking. To avoid extra delays, we additionally insert a PIPE_CONTROL to write an availability bit at the exact point of the fence. We can poll this on the CPU, allowing us to check whether the fence has gone by, even if the batch hasn't completed. It can also let us skip kernel calls. Improves performance in Bioshock Infinite by 10% on Icelake GT2 on -ForceCompatLevel=5 settings. Thanks to Felix Degrood and Mark Janes for helping notice the extraneous stalls and batches, Marek Olšák for adding deferred flush support to Gallium to solve this issue, and Chris Wilson for reworking a lot of the internals of this work. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3802>
author: Kenneth Graunke <[email protected]> 2019-08-05 13:18:39 -0700
committer: Marge Bot <[email protected]> 2020-05-01 19:00:02 +0000
commit: 1800e4b58caaa89acfe45c95d0d22e533b50ee03 (patch)
tree: ba5c00720849eb9f298be79aa5c78121dbf17fd5
parent: df09efe8df40f39dc791f39fde07b37a48157eea (diff)
1 files changed, 90 insertions, 6 deletions
diff --git a/src/gallium/drivers/iris/iris_fence.c b/src/gallium/drivers/iris/iris_fence.c
index ff280599060..d4f11886f02 100644
--- a/src/gallium/drivers/iris/iris_fence.c
+++ b/src/gallium/drivers/iris/iris_fence.c
@@ -28,6 +28,7 @@
 
 #include <linux/sync_file.h>
 
+#include "util/u_debug.h"
 #include "util/u_inlines.h"
 #include "intel/common/gen_gem.h"
 
@@ -114,6 +115,9 @@ iris_batch_add_syncobj(struct iris_batch *batch,
 
 struct pipe_fence_handle {
    struct pipe_reference ref;
+
+   struct pipe_context *unflushed_ctx;
+
    struct iris_seqno *seqno[IRIS_BATCH_COUNT];
 };
 
@@ -170,6 +174,14 @@ iris_fence_flush(struct pipe_context *ctx,
    struct iris_screen *screen = (void *) ctx->screen;
    struct iris_context *ice = (struct iris_context *)ctx;
 
+   /* We require DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (kernel 5.2+) for
+    * deferred flushes.  Just ignore the request to defer on older kernels.
+    */
+   if (!(screen->kernel_features & KERNEL_HAS_WAIT_FOR_SUBMIT))
+      flags &= ~PIPE_FLUSH_DEFERRED;
+
+   const bool deferred = flags & PIPE_FLUSH_DEFERRED;
+
    if (flags & PIPE_FLUSH_END_OF_FRAME) {
       ice->frame++;
 
@@ -181,9 +193,10 @@ iris_fence_flush(struct pipe_context *ctx,
       }
    }
 
-   /* XXX PIPE_FLUSH_DEFERRED */
-   for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++)
-      iris_batch_flush(&ice->batches[i]);
+   if (!deferred) {
+      for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++)
+         iris_batch_flush(&ice->batches[i]);
+   }
 
    if (!out_fence)
       return;
@@ -194,13 +207,27 @@ iris_fence_flush(struct pipe_context *ctx,
 
    pipe_reference_init(&fence->ref, 1);
 
+   if (deferred)
+      fence->unflushed_ctx = ctx;
+
    for (unsigned b = 0; b < IRIS_BATCH_COUNT; b++) {
       struct iris_batch *batch = &ice->batches[b];
 
-      if (iris_seqno_signaled(batch->last_seqno))
-         continue;
+      if (deferred && iris_batch_bytes_used(batch) > 0) {
+         struct iris_seqno *seqno =
+            iris_seqno_new(batch, IRIS_SEQNO_BOTTOM_OF_PIPE);
+         iris_seqno_reference(screen, &fence->seqno[b], seqno);
+         iris_seqno_reference(screen, &seqno, NULL);
+      } else {
+         /* This batch has no commands queued up (perhaps we just flushed,
+          * or all the commands are on the other batch).  Wait for the last
+          * syncobj on this engine - unless it's already finished by now.
+          */
+         if (iris_seqno_signaled(batch->last_seqno))
+            continue;
 
-      iris_seqno_reference(screen, &fence->seqno[b], batch->last_seqno);
+         iris_seqno_reference(screen, &fence->seqno[b], batch->last_seqno);
+      }
    }
 
    iris_fence_reference(ctx->screen, out_fence, NULL);
@@ -213,6 +240,23 @@ iris_fence_await(struct pipe_context *ctx,
 {
    struct iris_context *ice = (struct iris_context *)ctx;
 
+   /* Unflushed fences from the same context are no-ops. */
+   if (ctx && ctx == fence->unflushed_ctx)
+      return;
+
+   /* XXX: We can't safely flush the other context, because it might be
+    *      bound to another thread, and poking at its internals wouldn't
+    *      be safe.  In the future we should use MI_SEMAPHORE_WAIT and
+    *      block until the other job has been submitted, relying on
+    *      kernel timeslicing to preempt us until the other job is
+    *      actually flushed and the seqno finally passes.
+    */
+   if (fence->unflushed_ctx) {
+      pipe_debug_message(&ice->dbg, CONFORMANCE, "%s",
+                         "glWaitSync on unflushed fence from another context "
+                         "is unlikely to work without kernel 5.8+\n");
+   }
+
    /* Flush any current work in our context as it doesn't need to wait
     * for this fence.  Any future work in our context must wait.
     */
@@ -263,8 +307,32 @@ iris_fence_finish(struct pipe_screen *p_screen,
                   struct pipe_fence_handle *fence,
                   uint64_t timeout)
 {
+   struct iris_context *ice = (struct iris_context *)ctx;
    struct iris_screen *screen = (struct iris_screen *)p_screen;
 
+   /* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have
+    * flushed yet.  Check if our syncobj is the current batch's signalling
+    * syncobj - if so, we haven't flushed and need to now.
+    *
+    * The Gallium docs mention that a flush will occur if \p ctx matches
+    * the context the fence was created with.  It may be NULL, so we check
+    * that it matches first.
+    */
+   if (ctx && ctx == fence->unflushed_ctx) {
+      for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++) {
+         struct iris_seqno *seqno = fence->seqno[i];
+
+         if (iris_seqno_signaled(seqno))
+            continue;
+
+         if (seqno->syncobj == iris_batch_get_signal_syncobj(&ice->batches[i]))
+            iris_batch_flush(&ice->batches[i]);
+      }
+
+      /* The fence is no longer deferred. */
+      fence->unflushed_ctx = NULL;
+   }
+
    unsigned int handle_count = 0;
    uint32_t handles[ARRAY_SIZE(fence->seqno)];
    for (unsigned i = 0; i < ARRAY_SIZE(fence->seqno); i++) {
@@ -285,6 +353,18 @@ iris_fence_finish(struct pipe_screen *p_screen,
       .timeout_nsec = rel2abs(timeout),
       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
    };
+
+   if (fence->unflushed_ctx) {
+      /* This fence had a deferred flush from another context.  We can't
+       * safely flush it here, because the context might be bound to a
+       * different thread, and poking at its internals wouldn't be safe.
+       *
+       * Instead, use the WAIT_FOR_SUBMIT flag to block and hope that
+       * another thread submits the work.
+       */
+      args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+   }
+
    return gen_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
 }
 
@@ -317,6 +397,10 @@ iris_fence_get_fd(struct pipe_screen *p_screen,
    struct iris_screen *screen = (struct iris_screen *)p_screen;
    int fd = -1;
 
+   /* Deferred fences aren't supported. */
+   if (fence->unflushed_ctx)
+      return -1;
+
    for (unsigned i = 0; i < ARRAY_SIZE(fence->seqno); i++) {
       struct iris_seqno *seqno = fence->seqno[i];
author	Kenneth Graunke <[email protected]>	2019-08-05 13:18:39 -0700
committer	Marge Bot <[email protected]>	2020-05-01 19:00:02 +0000
commit	1800e4b58caaa89acfe45c95d0d22e533b50ee03 (patch)
tree	ba5c00720849eb9f298be79aa5c78121dbf17fd5
parent	df09efe8df40f39dc791f39fde07b37a48157eea (diff)