anv/query: Use snooping on !LLC platforms

Commit b2c97bc789198427043cd902bc76e194e7e81c7d which made us start using a busy-wait for individual query results also messed up cache flushing on !LLC platforms. For one thing, I forgot the mfence after the clflush so memory access wasn't properly getting fenced. More importantly, however, was that we were clflushing the whole query range and then waiting for individual queries and then trying to read the results without clflushing again. Getting the clflushing both correct and efficient is very subtle and painful. Instead, let's side-step the problem by just snooping. Reviewed-by: Chris Wilson <[email protected]>
author: Jason Ekstrand <[email protected]> 2017-04-06 13:34:38 -0700
committer: Jason Ekstrand <[email protected]> 2017-04-07 12:17:20 -0700
commit: 4e17b59f6cea79df078f553376b0392fbbf1ae0a (patch)
tree: 260d9bdfc9a3c58ea95d2c4684d78c92c0d21471 /src
parent: 5318d1ff94b474103a69e16af25bf641dad20a7b (diff)
1 files changed, 11 insertions, 13 deletions
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 235da8b8b6e..2c70b4f528e 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -90,6 +90,14 @@ VkResult genX(CreateQueryPool)(
    if (result != VK_SUCCESS)
       goto fail;
 
+   /* For query pools, we set the caching mode to I915_CACHING_CACHED.  On LLC
+    * platforms, this does nothing.  On non-LLC platforms, this means snooping
+    * which comes at a slight cost.  However, the buffers aren't big, won't be
+    * written frequently, and trying to handle the flushing manually without
+    * doing too much flushing is extremely painful.
+    */
+   anv_gem_set_caching(device, pool->bo.gem_handle, I915_CACHING_CACHED);
+
    pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
 
    *pQueryPool = anv_query_pool_to_handle(pool);
@@ -132,11 +140,8 @@ cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
 }
 
 static bool
-query_is_available(struct anv_device *device, uint64_t *slot)
+query_is_available(uint64_t *slot)
 {
-   if (!device->info.has_llc)
-      __builtin_ia32_clflush(slot);
-
    return *(volatile uint64_t *)slot;
 }
 
@@ -145,7 +150,7 @@ wait_for_available(struct anv_device *device,
                    struct anv_query_pool *pool, uint64_t *slot)
 {
    while (true) {
-      if (query_is_available(device, slot))
+      if (query_is_available(slot))
          return VK_SUCCESS;
 
       int ret = anv_gem_busy(device, pool->bo.gem_handle);
@@ -159,7 +164,7 @@ wait_for_available(struct anv_device *device,
       } else {
          assert(ret == 0);
          /* The BO is no longer busy. */
-         if (query_is_available(device, slot)) {
+         if (query_is_available(slot)) {
             return VK_SUCCESS;
          } else {
             VkResult status = anv_device_query_status(device);
@@ -204,13 +209,6 @@ VkResult genX(GetQueryPoolResults)(
 
    void *data_end = pData + dataSize;
 
-   if (!device->info.has_llc) {
-      uint64_t offset = firstQuery * pool->stride;
-      uint64_t size = queryCount * pool->stride;
-      anv_invalidate_range(pool->bo.map + offset,
-                           MIN2(size, pool->bo.size - offset));
-   }
-
    VkResult status = VK_SUCCESS;
    for (uint32_t i = 0; i < queryCount; i++) {
       uint64_t *slot = pool->bo.map + (firstQuery + i) * pool->stride;
author	Jason Ekstrand <[email protected]>	2017-04-06 13:34:38 -0700
committer	Jason Ekstrand <[email protected]>	2017-04-07 12:17:20 -0700
commit	4e17b59f6cea79df078f553376b0392fbbf1ae0a (patch)
tree	260d9bdfc9a3c58ea95d2c4684d78c92c0d21471 /src
parent	5318d1ff94b474103a69e16af25bf641dad20a7b (diff)