diff options
author | Eric Anholt <[email protected]> | 2014-11-19 17:39:04 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2014-11-20 13:07:07 -0800 |
commit | 21577571b37e68edc0422fbf80932588a4614abc (patch) | |
tree | 031a8f9d5adbb2c2bab7d0fd65137e1d7396cd36 | |
parent | 390799c496d363e7476afb0dbb8f28cbc6e20807 (diff) |
vc4: Update for new kernel ABI with async execution and waits.
Our submits now return immediately and you have to manually wait for
things to complete if you want to (like a normal driver).
-rw-r--r-- | src/gallium/drivers/vc4/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_bufmgr.c | 65 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_bufmgr.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_context.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_context.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_drm.h | 38 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_fence.c | 108 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_resource.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_screen.h | 13 |
9 files changed, 250 insertions, 3 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 23365659c3a..6ec48ab36be 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -9,6 +9,7 @@ C_SOURCES := \ vc4_draw.c \ vc4_drm.h \ vc4_emit.c \ + vc4_fence.c \ vc4_formats.c \ vc4_opt_algebraic.c \ vc4_opt_copy_propagation.c \ diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 33592e84527..3b73ac80bf6 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -152,8 +152,57 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name) return true; } +bool +vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns) +{ +#ifndef USE_VC4_SIMULATOR + struct drm_vc4_wait_seqno wait; + memset(&wait, 0, sizeof(wait)); + wait.seqno = seqno; + wait.timeout_ns = timeout_ns; + + int ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait); + if (ret == -ETIME) { + return false; + } else if (ret != 0) { + fprintf(stderr, "wait failed\n"); + abort(); + } else { + screen->finished_seqno = wait.seqno; + return true; + } +#else + return true; +#endif +} + +bool +vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns) +{ +#ifndef USE_VC4_SIMULATOR + struct vc4_screen *screen = bo->screen; + + struct drm_vc4_wait_bo wait; + memset(&wait, 0, sizeof(wait)); + wait.handle = bo->handle; + wait.timeout_ns = timeout_ns; + + int ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait); + if (ret == -ETIME) { + return false; + } else if (ret != 0) { + fprintf(stderr, "wait failed\n"); + abort(); + } else { + return true; + } +#else + return true; +#endif +} + void * -vc4_bo_map(struct vc4_bo *bo) +vc4_bo_map_unsynchronized(struct vc4_bo *bo) { int ret; @@ -179,3 +228,17 @@ vc4_bo_map(struct vc4_bo *bo) return bo->map; } + +void * +vc4_bo_map(struct vc4_bo *bo) +{ + void *map = vc4_bo_map_unsynchronized(bo); + + bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE); + if (!ok) { + fprintf(stderr, "BO wait for map failed\n"); + abort(); + } + + return map; +} diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h index 00ea149bd5f..4a1d4a4ef0d 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.h +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -78,9 +78,17 @@ vc4_bo_unreference(struct vc4_bo **bo) *bo = NULL; } - void * vc4_bo_map(struct vc4_bo *bo); +void * +vc4_bo_map_unsynchronized(struct vc4_bo *bo); + +bool +vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns); + +bool +vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns); + #endif /* VC4_BUFMGR_H */ diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index a6becaf73fc..bb30c0e8ca7 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -322,6 +322,8 @@ vc4_flush(struct pipe_context *pctx) } } + vc4->last_emit_seqno = submit.seqno; + vc4_reset_cl(&vc4->bcl); vc4_reset_cl(&vc4->rcl); vc4_reset_cl(&vc4->shader_rec); @@ -350,7 +352,15 @@ static void vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, unsigned flags) { + struct vc4_context *vc4 = vc4_context(pctx); + vc4_flush(pctx); + + if (fence) { + struct vc4_fence *f = vc4_fence_create(vc4->screen, + vc4->last_emit_seqno); + *fence = (struct pipe_fence_handle *)f; + } } /** diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 6a82d8fe5a4..207a7b4e672 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -219,6 +219,9 @@ struct vc4_context { uint8_t prim_mode; + /** Seqno of the last CL flush's job. */ + uint64_t last_emit_seqno; + /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; struct pipe_blend_state *blend; diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h index 7d440742191..34f5a88fa01 100644 --- a/src/gallium/drivers/vc4/vc4_drm.h +++ b/src/gallium/drivers/vc4/vc4_drm.h @@ -27,8 +27,13 @@ #include <drm.h> #define DRM_VC4_SUBMIT_CL 0x00 +#define DRM_VC4_WAIT_SEQNO 0x01 +#define DRM_VC4_WAIT_BO 0x02 #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) +#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) +#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo) + /** * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D @@ -109,6 +114,39 @@ struct drm_vc4_submit_cl { /* Number of BO handles passed in (size is that times 4). */ uint32_t bo_handle_count; + + uint32_t pad; + + /* Returned value of the seqno of this render job (for the + * wait ioctl). + */ + uint64_t seqno; +}; + +/** + * struct drm_vc4_wait_seqno - ioctl argument for waiting for + * DRM_VC4_SUBMIT_CL completion using its returned seqno. + * + * timeout_ns is the timeout in nanoseconds, where "0" means "don't + * block, just return the status." + */ +struct drm_vc4_wait_seqno { + uint64_t seqno; + uint64_t timeout_ns; +}; + +/** + * struct drm_vc4_wait_bo - ioctl argument for waiting for + * completion of the last DRM_VC4_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_vc4_wait_bo { + uint32_t handle; + uint32_t pad; + uint64_t timeout_ns; }; #endif /* _UAPI_VC4_DRM_H_ */ diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c new file mode 100644 index 00000000000..c081d51b16f --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_fence.c @@ -0,0 +1,108 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc4_fence.c + * + * Seqno-based fence management. + * + * We have two mechanisms for waiting in our kernel API: You can wait on a BO + * to have all rendering to from any process to be completed, or wait on a + * seqno for that particular seqno to be passed. The fence API we're + * implementing is based on waiting for all rendering in the context to have + * completed (with no reference to what other processes might be doing with + * the same BOs), so we can just use the seqno of the last rendering we'd + * fired off as our fence marker. + */ + +#include "util/u_inlines.h" + +#include "vc4_screen.h" +#include "vc4_bufmgr.h" + +struct vc4_fence { + struct pipe_reference reference; + uint64_t seqno; +}; + +static void +vc4_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **pp, + struct pipe_fence_handle *pf) +{ + struct vc4_fence **p = (struct vc4_fence **)pp; + struct vc4_fence *f = (struct vc4_fence *)pf; + struct vc4_fence *old = *p; + + if (pipe_reference(&(*p)->reference, &f->reference)) { + free(old); + } +} + +static boolean +vc4_fence_signalled(struct pipe_screen *pscreen, + struct pipe_fence_handle *pf) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + struct vc4_fence *f = (struct vc4_fence *)pf; + + if (screen->finished_seqno >= f->seqno) + return true; + + return vc4_wait_seqno(screen, f->seqno, 0); +} + +static boolean +vc4_fence_finish(struct pipe_screen *pscreen, + struct pipe_fence_handle *pf, + uint64_t timeout_ns) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + struct vc4_fence *f = (struct vc4_fence *)pf; + + if (screen->finished_seqno >= f->seqno) + return true; + + return vc4_wait_seqno(screen, f->seqno, timeout_ns); +} + +struct vc4_fence * +vc4_fence_create(struct vc4_screen *screen, uint64_t seqno) +{ + struct vc4_fence *f = calloc(1, sizeof(*f)); + + if (!f) + return NULL; + + pipe_reference_init(&f->reference, 1); + f->seqno = seqno; + + return f; +} + +void +vc4_fence_init(struct vc4_screen *screen) +{ + screen->base.fence_reference = vc4_fence_reference; + screen->base.fence_signalled = vc4_fence_signalled; + screen->base.fence_finish = vc4_fence_finish; +} diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index b02e2899329..a00ce71049b 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -126,7 +126,10 @@ vc4_resource_transfer_map(struct pipe_context *pctx, * need to do syncing stuff here yet. */ - buf = vc4_bo_map(rsc->bo); + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + buf = vc4_bo_map_unsynchronized(rsc->bo); + else + buf = vc4_bo_map(rsc->bo); if (!buf) { fprintf(stderr, "Failed to map bo\n"); goto fail; diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 470cb06e2bf..ba07490fc94 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -47,6 +47,13 @@ struct vc4_screen { void *simulator_mem_base; uint32_t simulator_mem_size; + + /** The last seqno we've completed a wait for. + * + * This lets us slightly optimize our waits by skipping wait syscalls + * if we know the job's already done. + */ + uint64_t finished_seqno; }; static inline struct vc4_screen * @@ -67,4 +74,10 @@ vc4_screen_bo_from_handle(struct pipe_screen *pscreen, extern uint32_t vc4_debug; +void +vc4_fence_init(struct vc4_screen *screen); + +struct vc4_fence * +vc4_fence_create(struct vc4_screen *screen, uint64_t seqno); + #endif /* VC4_SCREEN_H */ |