diff options
author | Nicolai Hähnle <[email protected]> | 2016-09-07 11:01:17 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2016-09-27 16:45:11 +0200 |
commit | 5af9eef719cda5e45b69a0cd1fde135df5dd3fbc (patch) | |
tree | 0c55df13f4118cb640557ecdc3c7740268122f57 /src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | |
parent | 6d89a4067627fdf568c6c4e3d9a201fd45d5352b (diff) |
winsys/amdgpu: do not synchronize unsynchronized buffers
When a buffer is added to a CS without the SYNCHRONIZED usage flag, we now
no longer add a dependency on the buffer's fence(s).
However, we still need to add a fence to the buffer during flush, so that
cache reclaim works correctly (and in the hypothetical case that the buffer
is later added to a CS _with_ the SYNCHRONIZED flag).
It is now possible that the submissions refererring to a buffer are no longer
linearly ordered, and so we may have to keep multiple fences around. We keep
the fences in a FIFO. It should usually stay quite short (# of contexts * 2,
for gfx + dma rings).
While we're at it, extract amdgpu_add_fence_dependency for a single buffer,
which will make adding the distinction between real buffer and slab cases
easier.
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/winsys/amdgpu/drm/amdgpu_bo.c')
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 73 |
1 files changed, 49 insertions, 24 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 0dbd0fb28de..37a7ba1a1bb 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -73,43 +73,58 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, } if (timeout == 0) { + unsigned idle_fences; + bool buffer_idle; + pipe_mutex_lock(ws->bo_fence_lock); - if (bo->fence) { - if (amdgpu_fence_wait(bo->fence, 0, false)) { - /* Release the idle fence to avoid checking it again later. */ - amdgpu_fence_reference(&bo->fence, NULL); - } else { - pipe_mutex_unlock(ws->bo_fence_lock); - return false; - } + + for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) { + if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false)) + break; } + + /* Release the idle fences to avoid checking them again later. */ + for (unsigned i = 0; i < idle_fences; ++i) + amdgpu_fence_reference(&bo->fences[i], NULL); + + memmove(&bo->fences[0], &bo->fences[idle_fences], + (bo->num_fences - idle_fences) * sizeof(*bo->fences)); + bo->num_fences -= idle_fences; + + buffer_idle = !bo->num_fences; pipe_mutex_unlock(ws->bo_fence_lock); - return true; + return buffer_idle; } else { - struct pipe_fence_handle *fence = NULL; - bool fence_idle = false; bool buffer_idle = true; - /* Take a reference to the fences, so that we can wait for it - * without the lock. */ pipe_mutex_lock(ws->bo_fence_lock); - amdgpu_fence_reference(&fence, bo->fence); - pipe_mutex_unlock(ws->bo_fence_lock); + while (bo->num_fences && buffer_idle) { + struct pipe_fence_handle *fence = NULL; + bool fence_idle = false; - /* Now wait for the fence. */ - if (fence) { + amdgpu_fence_reference(&fence, bo->fences[0]); + + /* Wait for the fence. */ + pipe_mutex_unlock(ws->bo_fence_lock); if (amdgpu_fence_wait(fence, abs_timeout, true)) fence_idle = true; else buffer_idle = false; - } + pipe_mutex_lock(ws->bo_fence_lock); + + /* Release an idle fence to avoid checking it again later, keeping in + * mind that the fence array may have been modified by other threads. + */ + if (fence_idle && bo->num_fences && bo->fences[0] == fence) { + amdgpu_fence_reference(&bo->fences[0], NULL); + memmove(&bo->fences[0], &bo->fences[1], + (bo->num_fences - 1) * sizeof(*bo->fences)); + bo->num_fences--; + } - /* Release idle fences to avoid checking them again later. */ - pipe_mutex_lock(ws->bo_fence_lock); - if (fence == bo->fence && fence_idle) - amdgpu_fence_reference(&bo->fence, NULL); - amdgpu_fence_reference(&fence, NULL); + amdgpu_fence_reference(&fence, NULL); + } pipe_mutex_unlock(ws->bo_fence_lock); return buffer_idle; @@ -122,6 +137,16 @@ static enum radeon_bo_domain amdgpu_bo_get_initial_domain( return ((struct amdgpu_winsys_bo*)buf)->initial_domain; } +static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo) +{ + for (unsigned i = 0; i < bo->num_fences; ++i) + amdgpu_fence_reference(&bo->fences[i], NULL); + + FREE(bo->fences); + bo->num_fences = 0; + bo->max_fences = 0; +} + void amdgpu_bo_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); @@ -135,7 +160,7 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) amdgpu_va_range_free(bo->va_handle); amdgpu_bo_free(bo->bo); - amdgpu_fence_reference(&bo->fence, NULL); + amdgpu_bo_remove_fences(bo); if (bo->initial_domain & RADEON_DOMAIN_VRAM) bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size); |