anv/query: Busy-wait for available query entries

Before, we were just looking at whether or not the user wanted us to wait and waiting on the BO. Some clients, such as the Serious engine, use a single query pool for hundreds of individual query results where the writes for those queries may be split across several command buffers. In this scenario, the individual query we're looking for may become available long before the BO is idle so waiting on the query pool BO to be finished is wasteful. This commit makes us instead busy-loop on each query until it's available. This significantly reduces pipeline bubbles and improves performance of The Talos Principle on medium settings (where the GPU isn't overloaded with drawing) by around 20% on my SkyLake gt4. Reviewed-by: Chris Wilson <[email protected]> Tested-by: Eero Tamminen <[email protected]> Tested-by: Grazvydas Ignotas <[email protected]>
author: Jason Ekstrand <[email protected]> 2017-04-04 14:36:46 -0700
committer: Jason Ekstrand <[email protected]> 2017-04-05 21:17:11 -0700
commit: b2c97bc789198427043cd902bc76e194e7e81c7d (patch)
tree: 152b9e6c5d646ae0c5c64488978f687e95650eae /src/intel
parent: f195d40eca49800799d85d110939a125041f4028 (diff)
1 files changed, 56 insertions, 6 deletions
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 7ea94044b12..235da8b8b6e 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -131,6 +131,54 @@ cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
    }
 }
 
+static bool
+query_is_available(struct anv_device *device, uint64_t *slot)
+{
+   if (!device->info.has_llc)
+      __builtin_ia32_clflush(slot);
+
+   return *(volatile uint64_t *)slot;
+}
+
+static VkResult
+wait_for_available(struct anv_device *device,
+                   struct anv_query_pool *pool, uint64_t *slot)
+{
+   while (true) {
+      if (query_is_available(device, slot))
+         return VK_SUCCESS;
+
+      int ret = anv_gem_busy(device, pool->bo.gem_handle);
+      if (ret == 1) {
+         /* The BO is still busy, keep waiting. */
+         continue;
+      } else if (ret == -1) {
+         /* We don't know the real error. */
+         device->lost = true;
+         return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
+      } else {
+         assert(ret == 0);
+         /* The BO is no longer busy. */
+         if (query_is_available(device, slot)) {
+            return VK_SUCCESS;
+         } else {
+            VkResult status = anv_device_query_status(device);
+            if (status != VK_SUCCESS)
+               return status;
+
+            /* If we haven't seen availability yet, then we never will.  This
+             * can only happen if we have a client error where they call
+             * GetQueryPoolResults on a query that they haven't submitted to
+             * the GPU yet.  The spec allows us to do anything in this case,
+             * but returning VK_SUCCESS doesn't seem right and we shouldn't
+             * just keep spinning.
+             */
+            return VK_NOT_READY;
+         }
+      }
+   }
+}
+
 VkResult genX(GetQueryPoolResults)(
     VkDevice                                    _device,
     VkQueryPool                                 queryPool,
@@ -154,12 +202,6 @@ VkResult genX(GetQueryPoolResults)(
    if (pData == NULL)
       return VK_SUCCESS;
 
-   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-      VkResult result = anv_device_wait(device, &pool->bo, INT64_MAX);
-      if (result != VK_SUCCESS)
-         return result;
-   }
-
    void *data_end = pData + dataSize;
 
    if (!device->info.has_llc) {
@@ -176,6 +218,14 @@ VkResult genX(GetQueryPoolResults)(
       /* Availability is always at the start of the slot */
       bool available = slot[0];
 
+      if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
+         status = wait_for_available(device, pool, slot);
+         if (status != VK_SUCCESS)
+            return status;
+
+         available = true;
+      }
+
       /* From the Vulkan 1.0.42 spec:
        *
        *    "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
author	Jason Ekstrand <[email protected]>	2017-04-04 14:36:46 -0700
committer	Jason Ekstrand <[email protected]>	2017-04-05 21:17:11 -0700
commit	b2c97bc789198427043cd902bc76e194e7e81c7d (patch)
tree	152b9e6c5d646ae0c5c64488978f687e95650eae /src/intel
parent	f195d40eca49800799d85d110939a125041f4028 (diff)