summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-11-19 17:39:04 -0800
committerEric Anholt <[email protected]>2014-11-20 13:07:07 -0800
commit21577571b37e68edc0422fbf80932588a4614abc (patch)
tree031a8f9d5adbb2c2bab7d0fd65137e1d7396cd36
parent390799c496d363e7476afb0dbb8f28cbc6e20807 (diff)
vc4: Update for new kernel ABI with async execution and waits.
Our submits now return immediately and you have to manually wait for things to complete if you want to (like a normal driver).
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c65
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.h10
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c10
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h3
-rw-r--r--src/gallium/drivers/vc4/vc4_drm.h38
-rw-r--r--src/gallium/drivers/vc4/vc4_fence.c108
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.h13
9 files changed, 250 insertions, 3 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 23365659c3a..6ec48ab36be 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -9,6 +9,7 @@ C_SOURCES := \
vc4_draw.c \
vc4_drm.h \
vc4_emit.c \
+ vc4_fence.c \
vc4_formats.c \
vc4_opt_algebraic.c \
vc4_opt_copy_propagation.c \
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 33592e84527..3b73ac80bf6 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -152,8 +152,57 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
return true;
}
+bool
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+{
+#ifndef USE_VC4_SIMULATOR
+ struct drm_vc4_wait_seqno wait;
+ memset(&wait, 0, sizeof(wait));
+ wait.seqno = seqno;
+ wait.timeout_ns = timeout_ns;
+
+ int ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
+ if (ret == -ETIME) {
+ return false;
+ } else if (ret != 0) {
+ fprintf(stderr, "wait failed\n");
+ abort();
+ } else {
+ screen->finished_seqno = wait.seqno;
+ return true;
+ }
+#else
+ return true;
+#endif
+}
+
+bool
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+{
+#ifndef USE_VC4_SIMULATOR
+ struct vc4_screen *screen = bo->screen;
+
+ struct drm_vc4_wait_bo wait;
+ memset(&wait, 0, sizeof(wait));
+ wait.handle = bo->handle;
+ wait.timeout_ns = timeout_ns;
+
+ int ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
+ if (ret == -ETIME) {
+ return false;
+ } else if (ret != 0) {
+ fprintf(stderr, "wait failed\n");
+ abort();
+ } else {
+ return true;
+ }
+#else
+ return true;
+#endif
+}
+
void *
-vc4_bo_map(struct vc4_bo *bo)
+vc4_bo_map_unsynchronized(struct vc4_bo *bo)
{
int ret;
@@ -179,3 +228,17 @@ vc4_bo_map(struct vc4_bo *bo)
return bo->map;
}
+
+void *
+vc4_bo_map(struct vc4_bo *bo)
+{
+ void *map = vc4_bo_map_unsynchronized(bo);
+
+ bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+ if (!ok) {
+ fprintf(stderr, "BO wait for map failed\n");
+ abort();
+ }
+
+ return map;
+}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index 00ea149bd5f..4a1d4a4ef0d 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -78,9 +78,17 @@ vc4_bo_unreference(struct vc4_bo **bo)
*bo = NULL;
}
-
void *
vc4_bo_map(struct vc4_bo *bo);
+void *
+vc4_bo_map_unsynchronized(struct vc4_bo *bo);
+
+bool
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+
+bool
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+
#endif /* VC4_BUFMGR_H */
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index a6becaf73fc..bb30c0e8ca7 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -322,6 +322,8 @@ vc4_flush(struct pipe_context *pctx)
}
}
+ vc4->last_emit_seqno = submit.seqno;
+
vc4_reset_cl(&vc4->bcl);
vc4_reset_cl(&vc4->rcl);
vc4_reset_cl(&vc4->shader_rec);
@@ -350,7 +352,15 @@ static void
vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
unsigned flags)
{
+ struct vc4_context *vc4 = vc4_context(pctx);
+
vc4_flush(pctx);
+
+ if (fence) {
+ struct vc4_fence *f = vc4_fence_create(vc4->screen,
+ vc4->last_emit_seqno);
+ *fence = (struct pipe_fence_handle *)f;
+ }
}
/**
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 6a82d8fe5a4..207a7b4e672 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -219,6 +219,9 @@ struct vc4_context {
uint8_t prim_mode;
+ /** Seqno of the last CL flush's job. */
+ uint64_t last_emit_seqno;
+
/** @{ Current pipeline state objects */
struct pipe_scissor_state scissor;
struct pipe_blend_state *blend;
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index 7d440742191..34f5a88fa01 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -27,8 +27,13 @@
#include <drm.h>
#define DRM_VC4_SUBMIT_CL 0x00
+#define DRM_VC4_WAIT_SEQNO 0x01
+#define DRM_VC4_WAIT_BO 0x02
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
+
/**
* struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
@@ -109,6 +114,39 @@ struct drm_vc4_submit_cl {
/* Number of BO handles passed in (size is that times 4). */
uint32_t bo_handle_count;
+
+ uint32_t pad;
+
+ /* Returned value of the seqno of this render job (for the
+ * wait ioctl).
+ */
+ uint64_t seqno;
+};
+
+/**
+ * struct drm_vc4_wait_seqno - ioctl argument for waiting for
+ * DRM_VC4_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc4_wait_seqno {
+ uint64_t seqno;
+ uint64_t timeout_ns;
+};
+
+/**
+ * struct drm_vc4_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC4_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc4_wait_bo {
+ uint32_t handle;
+ uint32_t pad;
+ uint64_t timeout_ns;
};
#endif /* _UAPI_VC4_DRM_H_ */
diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c
new file mode 100644
index 00000000000..c081d51b16f
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_fence.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc4_fence.c
+ *
+ * Seqno-based fence management.
+ *
+ * We have two mechanisms for waiting in our kernel API: You can wait on a BO
+ * to have all rendering to from any process to be completed, or wait on a
+ * seqno for that particular seqno to be passed. The fence API we're
+ * implementing is based on waiting for all rendering in the context to have
+ * completed (with no reference to what other processes might be doing with
+ * the same BOs), so we can just use the seqno of the last rendering we'd
+ * fired off as our fence marker.
+ */
+
+#include "util/u_inlines.h"
+
+#include "vc4_screen.h"
+#include "vc4_bufmgr.h"
+
+struct vc4_fence {
+ struct pipe_reference reference;
+ uint64_t seqno;
+};
+
+static void
+vc4_fence_reference(struct pipe_screen *pscreen,
+ struct pipe_fence_handle **pp,
+ struct pipe_fence_handle *pf)
+{
+ struct vc4_fence **p = (struct vc4_fence **)pp;
+ struct vc4_fence *f = (struct vc4_fence *)pf;
+ struct vc4_fence *old = *p;
+
+ if (pipe_reference(&(*p)->reference, &f->reference)) {
+ free(old);
+ }
+}
+
+static boolean
+vc4_fence_signalled(struct pipe_screen *pscreen,
+ struct pipe_fence_handle *pf)
+{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+ struct vc4_fence *f = (struct vc4_fence *)pf;
+
+ if (screen->finished_seqno >= f->seqno)
+ return true;
+
+ return vc4_wait_seqno(screen, f->seqno, 0);
+}
+
+static boolean
+vc4_fence_finish(struct pipe_screen *pscreen,
+ struct pipe_fence_handle *pf,
+ uint64_t timeout_ns)
+{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+ struct vc4_fence *f = (struct vc4_fence *)pf;
+
+ if (screen->finished_seqno >= f->seqno)
+ return true;
+
+ return vc4_wait_seqno(screen, f->seqno, timeout_ns);
+}
+
+struct vc4_fence *
+vc4_fence_create(struct vc4_screen *screen, uint64_t seqno)
+{
+ struct vc4_fence *f = calloc(1, sizeof(*f));
+
+ if (!f)
+ return NULL;
+
+ pipe_reference_init(&f->reference, 1);
+ f->seqno = seqno;
+
+ return f;
+}
+
+void
+vc4_fence_init(struct vc4_screen *screen)
+{
+ screen->base.fence_reference = vc4_fence_reference;
+ screen->base.fence_signalled = vc4_fence_signalled;
+ screen->base.fence_finish = vc4_fence_finish;
+}
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index b02e2899329..a00ce71049b 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -126,7 +126,10 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
* need to do syncing stuff here yet.
*/
- buf = vc4_bo_map(rsc->bo);
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ buf = vc4_bo_map_unsynchronized(rsc->bo);
+ else
+ buf = vc4_bo_map(rsc->bo);
if (!buf) {
fprintf(stderr, "Failed to map bo\n");
goto fail;
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 470cb06e2bf..ba07490fc94 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -47,6 +47,13 @@ struct vc4_screen {
void *simulator_mem_base;
uint32_t simulator_mem_size;
+
+ /** The last seqno we've completed a wait for.
+ *
+ * This lets us slightly optimize our waits by skipping wait syscalls
+ * if we know the job's already done.
+ */
+ uint64_t finished_seqno;
};
static inline struct vc4_screen *
@@ -67,4 +74,10 @@ vc4_screen_bo_from_handle(struct pipe_screen *pscreen,
extern uint32_t vc4_debug;
+void
+vc4_fence_init(struct vc4_screen *screen);
+
+struct vc4_fence *
+vc4_fence_create(struct vc4_screen *screen, uint64_t seqno);
+
#endif /* VC4_SCREEN_H */