diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/meson.build | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_fence.c | 304 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 4 |
5 files changed, 312 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 2d746ab4bae..8dd33f60a8b 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -12,6 +12,7 @@ C_SOURCES := \ si_debug.c \ si_descriptors.c \ si_dma.c \ + si_fence.c \ si_hw_context.c \ si_pipe.c \ si_pipe.h \ diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index b0089c0d56e..4392184dbb6 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -28,6 +28,7 @@ files_libradeonsi = files( 'si_debug.c', 'si_descriptors.c', 'si_dma.c', + 'si_fence.c', 'si_hw_context.c', 'si_pipe.c', 'si_pipe.h', diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c new file mode 100644 index 00000000000..b416c47aa30 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -0,0 +1,304 @@ +/* + * Copyright 2013-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <libsync.h> + +#include "util/os_time.h" +#include "util/u_memory.h" + +#include "si_pipe.h" + +struct si_multi_fence { + struct pipe_reference reference; + struct pipe_fence_handle *gfx; + struct pipe_fence_handle *sdma; + + /* If the context wasn't flushed at fence creation, this is non-NULL. */ + struct { + struct r600_common_context *ctx; + unsigned ib_index; + } gfx_unflushed; +}; + +static void si_add_fence_dependency(struct r600_common_context *rctx, + struct pipe_fence_handle *fence) +{ + struct radeon_winsys *ws = rctx->ws; + + if (rctx->dma.cs) + ws->cs_add_fence_dependency(rctx->dma.cs, fence); + ws->cs_add_fence_dependency(rctx->gfx.cs, fence); +} + +static void si_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **dst, + struct pipe_fence_handle *src) +{ + struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; + struct si_multi_fence **rdst = (struct si_multi_fence **)dst; + struct si_multi_fence *rsrc = (struct si_multi_fence *)src; + + if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { + ws->fence_reference(&(*rdst)->gfx, NULL); + ws->fence_reference(&(*rdst)->sdma, NULL); + FREE(*rdst); + } + *rdst = rsrc; +} + +static void si_fence_server_sync(struct pipe_context *ctx, + struct pipe_fence_handle *fence) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + + /* Only amdgpu needs to handle fence dependencies (for fence imports). + * radeon synchronizes all rings by default and will not implement + * fence imports. + */ + if (rctx->screen->info.drm_major == 2) + return; + + /* Only imported fences need to be handled by fence_server_sync, + * because the winsys handles synchronizations automatically for BOs + * within the process. + * + * Simply skip unflushed fences here, and the winsys will drop no-op + * dependencies (i.e. dependencies within the same ring). + */ + if (rfence->gfx_unflushed.ctx) + return; + + /* All unflushed commands will not start execution before + * this fence dependency is signalled. + * + * Should we flush the context to allow more GPU parallelism? + */ + if (rfence->sdma) + si_add_fence_dependency(rctx, rfence->sdma); + if (rfence->gfx) + si_add_fence_dependency(rctx, rfence->gfx); +} + +static boolean si_fence_finish(struct pipe_screen *screen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; + struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + struct r600_common_context *rctx; + int64_t abs_timeout = os_time_get_absolute_timeout(timeout); + + ctx = threaded_context_unwrap_sync(ctx); + rctx = ctx ? (struct r600_common_context*)ctx : NULL; + + if (rfence->sdma) { + if (!rws->fence_wait(rws, rfence->sdma, timeout)) + return false; + + /* Recompute the timeout after waiting. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; + } + } + + if (!rfence->gfx) + return true; + + /* Flush the gfx IB if it hasn't been flushed yet. */ + if (rctx && + rfence->gfx_unflushed.ctx == rctx && + rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { + rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); + rfence->gfx_unflushed.ctx = NULL; + + if (!timeout) + return false; + + /* Recompute the timeout after all that. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; + } + } + + return rws->fence_wait(rws, rfence->gfx, timeout); +} + +static void si_create_fence_fd(struct pipe_context *ctx, + struct pipe_fence_handle **pfence, int fd) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; + struct radeon_winsys *ws = rscreen->ws; + struct si_multi_fence *rfence; + + *pfence = NULL; + + if (!rscreen->info.has_sync_file) + return; + + rfence = CALLOC_STRUCT(si_multi_fence); + if (!rfence) + return; + + pipe_reference_init(&rfence->reference, 1); + rfence->gfx = ws->fence_import_sync_file(ws, fd); + if (!rfence->gfx) { + FREE(rfence); + return; + } + + *pfence = (struct pipe_fence_handle*)rfence; +} + +static int si_fence_get_fd(struct pipe_screen *screen, + struct pipe_fence_handle *fence) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct radeon_winsys *ws = rscreen->ws; + struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + int gfx_fd = -1, sdma_fd = -1; + + if (!rscreen->info.has_sync_file) + return -1; + + /* Deferred fences aren't supported. */ + assert(!rfence->gfx_unflushed.ctx); + if (rfence->gfx_unflushed.ctx) + return -1; + + if (rfence->sdma) { + sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma); + if (sdma_fd == -1) + return -1; + } + if (rfence->gfx) { + gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx); + if (gfx_fd == -1) { + if (sdma_fd != -1) + close(sdma_fd); + return -1; + } + } + + /* If we don't have FDs at this point, it means we don't have fences + * either. */ + if (sdma_fd == -1) + return gfx_fd; + if (gfx_fd == -1) + return sdma_fd; + + /* Get a fence that will be a combination of both fences. */ + sync_accumulate("radeonsi", &gfx_fd, sdma_fd); + close(sdma_fd); + return gfx_fd; +} + +static void si_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct pipe_screen *screen = ctx->screen; + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct radeon_winsys *ws = rctx->ws; + struct pipe_fence_handle *gfx_fence = NULL; + struct pipe_fence_handle *sdma_fence = NULL; + bool deferred_fence = false; + unsigned rflags = RADEON_FLUSH_ASYNC; + + if (flags & PIPE_FLUSH_END_OF_FRAME) + rflags |= RADEON_FLUSH_END_OF_FRAME; + + /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ + if (rctx->dma.cs) + rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); + + if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { + if (fence) + ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); + if (!(flags & PIPE_FLUSH_DEFERRED)) + ws->cs_sync_flush(rctx->gfx.cs); + } else { + /* Instead of flushing, create a deferred fence. Constraints: + * - The state tracker must allow a deferred flush. + * - The state tracker must request a fence. + * - fence_get_fd is not allowed. + * Thread safety in fence_finish must be ensured by the state tracker. + */ + if (flags & PIPE_FLUSH_DEFERRED && + !(flags & PIPE_FLUSH_FENCE_FD) && + fence) { + gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); + deferred_fence = true; + } else { + rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); + } + } + + /* Both engines can signal out of order, so we need to keep both fences. */ + if (fence) { + struct si_multi_fence *multi_fence = + CALLOC_STRUCT(si_multi_fence); + if (!multi_fence) { + ws->fence_reference(&sdma_fence, NULL); + ws->fence_reference(&gfx_fence, NULL); + goto finish; + } + + multi_fence->reference.count = 1; + /* If both fences are NULL, fence_finish will always return true. */ + multi_fence->gfx = gfx_fence; + multi_fence->sdma = sdma_fence; + + if (deferred_fence) { + multi_fence->gfx_unflushed.ctx = rctx; + multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; + } + + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle*)multi_fence; + } +finish: + if (!(flags & PIPE_FLUSH_DEFERRED)) { + if (rctx->dma.cs) + ws->cs_sync_flush(rctx->dma.cs); + ws->cs_sync_flush(rctx->gfx.cs); + } +} + +void si_init_fence_functions(struct si_context *ctx) +{ + ctx->b.b.flush = si_flush_from_st; + ctx->b.b.create_fence_fd = si_create_fence_fd; + ctx->b.b.fence_server_sync = si_fence_server_sync; +} + +void si_init_screen_fence_functions(struct si_screen *screen) +{ + screen->b.b.fence_finish = si_fence_finish; + screen->b.b.fence_reference = si_fence_reference; + screen->b.b.fence_get_fd = si_fence_get_fd; +} diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 7c50ffae6a3..a299ab7aff7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -248,6 +248,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, goto fail; si_init_all_descriptors(sctx); + si_init_fence_functions(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); si_init_viewport_functions(sctx); @@ -1001,6 +1002,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid; sscreen->b.b.resource_create = si_resource_create_common; + si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6b42a54f00e..84e8a4c8673 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -601,6 +601,10 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary); /* si_dma.c */ void si_init_dma_functions(struct si_context *sctx); +/* si_fence.c */ +void si_init_fence_functions(struct si_context *ctx); +void si_init_screen_fence_functions(struct si_screen *screen); + /* si_hw_context.c */ void si_destroy_saved_cs(struct si_saved_cs *scs); void si_context_gfx_flush(void *context, unsigned flags, |