diff options
author | Nicolai Hähnle <[email protected]> | 2017-10-22 17:38:50 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2017-11-09 14:00:42 +0100 |
commit | 609a230375cae7710b2a9f40a42341b4cd853a3e (patch) | |
tree | 1b404da87aa1ac457c8d8320b7e62081becc52c0 /src/gallium/drivers/radeonsi | |
parent | 11b380ed0c61344b461ad2d1b66bf81e2c46b84b (diff) |
gallium/u_threaded: implement asynchronous flushes
This requires out-of-band creation of fences, and will be signaled to
the pipe_context::flush implementation by a special TC_FLUSH_ASYNC flag.
v2:
- remove an incorrect assertion
- handle fence_server_sync for unsubmitted fences by
relying on the improved cs_add_fence_dependency
- only implement asynchronous flushes on amdgpu
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_fence.c | 104 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 2 |
3 files changed, 84 insertions, 25 deletions
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index b416c47aa30..701e8df9cfc 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -26,6 +26,7 @@ #include "util/os_time.h" #include "util/u_memory.h" +#include "util/u_queue.h" #include "si_pipe.h" @@ -33,6 +34,8 @@ struct si_multi_fence { struct pipe_reference reference; struct pipe_fence_handle *gfx; struct pipe_fence_handle *sdma; + struct tc_unflushed_batch_token *tc_token; + struct util_queue_fence ready; /* If the context wasn't flushed at fence creation, this is non-NULL. */ struct { @@ -62,32 +65,48 @@ static void si_fence_reference(struct pipe_screen *screen, if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { ws->fence_reference(&(*rdst)->gfx, NULL); ws->fence_reference(&(*rdst)->sdma, NULL); + tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL); FREE(*rdst); } *rdst = rsrc; } +static struct si_multi_fence *si_create_multi_fence() +{ + struct si_multi_fence *fence = CALLOC_STRUCT(si_multi_fence); + if (!fence) + return NULL; + + pipe_reference_init(&fence->reference, 1); + util_queue_fence_init(&fence->ready); + + return fence; +} + +struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx, + struct tc_unflushed_batch_token *tc_token) +{ + struct si_multi_fence *fence = si_create_multi_fence(); + if (!fence) + return NULL; + + util_queue_fence_reset(&fence->ready); + tc_unflushed_batch_token_reference(&fence->tc_token, tc_token); + + return (struct pipe_fence_handle *)fence; +} + static void si_fence_server_sync(struct pipe_context *ctx, struct pipe_fence_handle *fence) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct si_multi_fence *rfence = (struct si_multi_fence *)fence; - /* Only amdgpu needs to handle fence dependencies (for fence imports). - * radeon synchronizes all rings by default and will not implement - * fence imports. - */ - if (rctx->screen->info.drm_major == 2) - return; + util_queue_fence_wait(&rfence->ready); - /* Only imported fences need to be handled by fence_server_sync, - * because the winsys handles synchronizations automatically for BOs - * within the process. - * - * Simply skip unflushed fences here, and the winsys will drop no-op - * dependencies (i.e. dependencies within the same ring). - */ - if (rfence->gfx_unflushed.ctx) + /* Unflushed fences from the same context are no-ops. */ + if (rfence->gfx_unflushed.ctx && + rfence->gfx_unflushed.ctx == rctx) return; /* All unflushed commands will not start execution before @@ -114,6 +133,30 @@ static boolean si_fence_finish(struct pipe_screen *screen, ctx = threaded_context_unwrap_sync(ctx); rctx = ctx ? (struct r600_common_context*)ctx : NULL; + if (!util_queue_fence_is_signalled(&rfence->ready)) { + if (!timeout) + return false; + + if (rfence->tc_token) { + /* Ensure that si_flush_from_st will be called for + * this fence, but only if we're in the API thread + * where the context is current. + * + * Note that the batch containing the flush may already + * be in flight in the driver thread, so the fence + * may not be ready yet when this call returns. + */ + threaded_context_flush(ctx, rfence->tc_token); + } + + if (timeout == PIPE_TIMEOUT_INFINITE) { + util_queue_fence_wait(&rfence->ready); + } else { + if (!util_queue_fence_wait_timeout(&rfence->ready, abs_timeout)) + return false; + } + } + if (rfence->sdma) { if (!rws->fence_wait(rws, rfence->sdma, timeout)) return false; @@ -160,11 +203,10 @@ static void si_create_fence_fd(struct pipe_context *ctx, if (!rscreen->info.has_sync_file) return; - rfence = CALLOC_STRUCT(si_multi_fence); + rfence = si_create_multi_fence(); if (!rfence) return; - pipe_reference_init(&rfence->reference, 1); rfence->gfx = ws->fence_import_sync_file(ws, fd); if (!rfence->gfx) { FREE(rfence); @@ -185,6 +227,8 @@ static int si_fence_get_fd(struct pipe_screen *screen, if (!rscreen->info.has_sync_file) return -1; + util_queue_fence_wait(&rfence->ready); + /* Deferred fences aren't supported. */ assert(!rfence->gfx_unflushed.ctx); if (rfence->gfx_unflushed.ctx) @@ -260,15 +304,23 @@ static void si_flush_from_st(struct pipe_context *ctx, /* Both engines can signal out of order, so we need to keep both fences. */ if (fence) { - struct si_multi_fence *multi_fence = - CALLOC_STRUCT(si_multi_fence); - if (!multi_fence) { - ws->fence_reference(&sdma_fence, NULL); - ws->fence_reference(&gfx_fence, NULL); - goto finish; + struct si_multi_fence *multi_fence; + + if (flags & TC_FLUSH_ASYNC) { + multi_fence = (struct si_multi_fence *)*fence; + assert(multi_fence); + } else { + multi_fence = si_create_multi_fence(); + if (!multi_fence) { + ws->fence_reference(&sdma_fence, NULL); + ws->fence_reference(&gfx_fence, NULL); + goto finish; + } + + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle*)multi_fence; } - multi_fence->reference.count = 1; /* If both fences are NULL, fence_finish will always return true. */ multi_fence->gfx = gfx_fence; multi_fence->sdma = sdma_fence; @@ -278,8 +330,10 @@ static void si_flush_from_st(struct pipe_context *ctx, multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; } - screen->fence_reference(screen, fence, NULL); - *fence = (struct pipe_fence_handle*)multi_fence; + if (flags & TC_FLUSH_ASYNC) { + util_queue_fence_signal(&multi_fence->ready); + tc_unflushed_batch_token_reference(&multi_fence->tc_token, NULL); + } } finish: if (!(flags & PIPE_FLUSH_DEFERRED)) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index a299ab7aff7..ef5cddda0eb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -397,8 +397,11 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, if (sscreen->b.debug_flags & DBG_ALL_SHADERS) return ctx; + /* Use asynchronous flushes only on amdgpu, since the radeon + * implementation for fence_server_sync is incomplete. */ return threaded_context_create(ctx, &sscreen->b.pool_transfers, si_replace_buffer_storage, + sscreen->b.info.drm_major >= 3 ? si_create_fence : NULL, &((struct si_context*)ctx)->b.tc); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 84e8a4c8673..de089e351ce 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -604,6 +604,8 @@ void si_init_dma_functions(struct si_context *sctx); /* si_fence.c */ void si_init_fence_functions(struct si_context *ctx); void si_init_screen_fence_functions(struct si_screen *screen); +struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx, + struct tc_unflushed_batch_token *tc_token); /* si_hw_context.c */ void si_destroy_saved_cs(struct si_saved_cs *scs); |