winsys/amdgpu: do not synchronize unsynchronized buffers

When a buffer is added to a CS without the SYNCHRONIZED usage flag, we now no longer add a dependency on the buffer's fence(s). However, we still need to add a fence to the buffer during flush, so that cache reclaim works correctly (and in the hypothetical case that the buffer is later added to a CS _with_ the SYNCHRONIZED flag). It is now possible that the submissions refererring to a buffer are no longer linearly ordered, and so we may have to keep multiple fences around. We keep the fences in a FIFO. It should usually stay quite short (# of contexts * 2, for gfx + dma rings). While we're at it, extract amdgpu_add_fence_dependency for a single buffer, which will make adding the distinction between real buffer and slab cases easier. Reviewed-by: Marek Olšák <[email protected]>
author: Nicolai Hähnle <[email protected]> 2016-09-07 11:01:17 +0200
committer: Nicolai Hähnle <[email protected]> 2016-09-27 16:45:11 +0200
commit: 5af9eef719cda5e45b69a0cd1fde135df5dd3fbc (patch)
tree: 0c55df13f4118cb640557ecdc3c7740268122f57 /src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
parent: 6d89a4067627fdf568c6c4e3d9a201fd45d5352b (diff)
1 files changed, 49 insertions, 24 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 0dbd0fb28de..37a7ba1a1bb 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -73,43 +73,58 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
    }
 
    if (timeout == 0) {
+      unsigned idle_fences;
+      bool buffer_idle;
+
       pipe_mutex_lock(ws->bo_fence_lock);
-      if (bo->fence) {
-         if (amdgpu_fence_wait(bo->fence, 0, false)) {
-            /* Release the idle fence to avoid checking it again later. */
-            amdgpu_fence_reference(&bo->fence, NULL);
-         } else {
-            pipe_mutex_unlock(ws->bo_fence_lock);
-            return false;
-         }
+
+      for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
+         if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
+            break;
       }
+
+      /* Release the idle fences to avoid checking them again later. */
+      for (unsigned i = 0; i < idle_fences; ++i)
+         amdgpu_fence_reference(&bo->fences[i], NULL);
+
+      memmove(&bo->fences[0], &bo->fences[idle_fences],
+              (bo->num_fences - idle_fences) * sizeof(*bo->fences));
+      bo->num_fences -= idle_fences;
+
+      buffer_idle = !bo->num_fences;
       pipe_mutex_unlock(ws->bo_fence_lock);
-      return true;
 
+      return buffer_idle;
    } else {
-      struct pipe_fence_handle *fence = NULL;
-      bool fence_idle = false;
       bool buffer_idle = true;
 
-      /* Take a reference to the fences, so that we can wait for it
-       * without the lock. */
       pipe_mutex_lock(ws->bo_fence_lock);
-      amdgpu_fence_reference(&fence, bo->fence);
-      pipe_mutex_unlock(ws->bo_fence_lock);
+      while (bo->num_fences && buffer_idle) {
+         struct pipe_fence_handle *fence = NULL;
+         bool fence_idle = false;
 
-      /* Now wait for the fence. */
-      if (fence) {
+         amdgpu_fence_reference(&fence, bo->fences[0]);
+
+         /* Wait for the fence. */
+         pipe_mutex_unlock(ws->bo_fence_lock);
          if (amdgpu_fence_wait(fence, abs_timeout, true))
             fence_idle = true;
          else
             buffer_idle = false;
-      }
+         pipe_mutex_lock(ws->bo_fence_lock);
+
+         /* Release an idle fence to avoid checking it again later, keeping in
+          * mind that the fence array may have been modified by other threads.
+          */
+         if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
+            amdgpu_fence_reference(&bo->fences[0], NULL);
+            memmove(&bo->fences[0], &bo->fences[1],
+                    (bo->num_fences - 1) * sizeof(*bo->fences));
+            bo->num_fences--;
+         }
 
-      /* Release idle fences to avoid checking them again later. */
-      pipe_mutex_lock(ws->bo_fence_lock);
-      if (fence == bo->fence && fence_idle)
-         amdgpu_fence_reference(&bo->fence, NULL);
-      amdgpu_fence_reference(&fence, NULL);
+         amdgpu_fence_reference(&fence, NULL);
+      }
       pipe_mutex_unlock(ws->bo_fence_lock);
 
       return buffer_idle;
@@ -122,6 +137,16 @@ static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
    return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
 }
 
+static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
+{
+   for (unsigned i = 0; i < bo->num_fences; ++i)
+      amdgpu_fence_reference(&bo->fences[i], NULL);
+
+   FREE(bo->fences);
+   bo->num_fences = 0;
+   bo->max_fences = 0;
+}
+
 void amdgpu_bo_destroy(struct pb_buffer *_buf)
 {
    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
@@ -135,7 +160,7 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
    amdgpu_va_range_free(bo->va_handle);
    amdgpu_bo_free(bo->bo);
 
-   amdgpu_fence_reference(&bo->fence, NULL);
+   amdgpu_bo_remove_fences(bo);
 
    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
       bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
author	Nicolai Hähnle <[email protected]>	2016-09-07 11:01:17 +0200
committer	Nicolai Hähnle <[email protected]>	2016-09-27 16:45:11 +0200
commit	5af9eef719cda5e45b69a0cd1fde135df5dd3fbc (patch)
tree	0c55df13f4118cb640557ecdc3c7740268122f57 /src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
parent	6d89a4067627fdf568c6c4e3d9a201fd45d5352b (diff)