summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2018-02-26 23:48:27 +0100
committerBas Nieuwenhuizen <[email protected]>2018-03-01 01:07:18 +0100
commit34bd5e2e2e8d9c213b051152f7a8b731151d9be5 (patch)
tree02635b72eebcb6d1417c4bd8332358196142f709
parent6968d782d3063c639e80dbcf6df944902d72692f (diff)
radv: Implement more efficient !waitAll fence waiting.
Reviewed-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/vulkan/radv_device.c36
-rw-r--r--src/amd/vulkan/radv_radeon_winsys.h5
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c34
3 files changed, 75 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 8eadd8f2037..21ccfa679f8 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2907,6 +2907,17 @@ static uint64_t radv_get_absolute_timeout(uint64_t timeout)
return current_time + timeout;
}
+
+static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
+{
+ for (uint32_t i = 0; i < fenceCount; ++i) {
+ RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+ if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
+ return false;
+ }
+ return true;
+}
+
VkResult radv_WaitForFences(
VkDevice _device,
uint32_t fenceCount,
@@ -2918,6 +2929,31 @@ VkResult radv_WaitForFences(
timeout = radv_get_absolute_timeout(timeout);
if (!waitAll && fenceCount > 1) {
+ /* Not doing this by default for waitAll, due to needing to allocate twice. */
+ if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
+ uint32_t wait_count = 0;
+ struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
+ if (!fences)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ for (uint32_t i = 0; i < fenceCount; ++i) {
+ RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+
+ if (fence->signalled) {
+ free(fences);
+ return VK_SUCCESS;
+ }
+
+ fences[wait_count++] = fence->fence;
+ }
+
+ bool success = device->ws->fences_wait(device->ws, fences, wait_count,
+ waitAll, timeout - radv_get_current_time());
+
+ free(fences);
+ return success ? VK_SUCCESS : VK_TIMEOUT;
+ }
+
while(radv_get_current_time() <= timeout) {
for (uint32_t i = 0; i < fenceCount; ++i) {
if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 4c306692e55..643d76a826f 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -270,6 +270,11 @@ struct radeon_winsys {
struct radeon_winsys_fence *fence,
bool absolute,
uint64_t timeout);
+ bool (*fences_wait)(struct radeon_winsys *ws,
+ struct radeon_winsys_fence *const *fences,
+ uint32_t fence_count,
+ bool wait_all,
+ uint64_t timeout);
/* old semaphores - non shareable */
struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 5632b1d4ee2..d2b33546cc4 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -154,6 +154,39 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
return false;
}
+
+static bool radv_amdgpu_fences_wait(struct radeon_winsys *_ws,
+ struct radeon_winsys_fence *const *_fences,
+ uint32_t fence_count,
+ bool wait_all,
+ uint64_t timeout)
+{
+ struct amdgpu_cs_fence *fences = malloc(sizeof(struct amdgpu_cs_fence) * fence_count);
+ int r;
+ uint32_t expired = 0, first = 0;
+
+ if (!fences)
+ return false;
+
+ for (uint32_t i = 0; i < fence_count; ++i)
+ fences[i] = ((struct radv_amdgpu_fence *)_fences[i])->fence;
+
+ /* Now use the libdrm query. */
+ r = amdgpu_cs_wait_fences(fences, fence_count, wait_all,
+ timeout, &expired, &first);
+
+ free(fences);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_cs_wait_fences failed.\n");
+ return false;
+ }
+
+ if (expired)
+ return true;
+
+ return false;
+}
+
static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
{
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
@@ -1387,4 +1420,5 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
ws->base.fence_wait = radv_amdgpu_fence_wait;
+ ws->base.fences_wait = radv_amdgpu_fences_wait;
}