summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/Makefile.sources1
-rw-r--r--src/intel/vulkan/anv_device.c484
-rw-r--r--src/intel/vulkan/anv_queue.c515
3 files changed, 516 insertions, 484 deletions
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 0d446614c62..e9a39a659ac 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -202,6 +202,7 @@ VULKAN_FILES := \
vulkan/anv_pipeline.c \
vulkan/anv_pipeline_cache.c \
vulkan/anv_private.h \
+ vulkan/anv_queue.c \
vulkan/anv_util.c \
vulkan/anv_wsi.c \
vulkan/vk_format_info.h
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 62974f19d80..cf32df66d2d 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -981,62 +981,6 @@ anv_device_init_border_colors(struct anv_device *device)
border_colors);
}
-VkResult
-anv_device_submit_simple_batch(struct anv_device *device,
- struct anv_batch *batch)
-{
- struct drm_i915_gem_execbuffer2 execbuf;
- struct drm_i915_gem_exec_object2 exec2_objects[1];
- struct anv_bo bo, *exec_bos[1];
- VkResult result = VK_SUCCESS;
- uint32_t size;
-
- /* Kernel driver requires 8 byte aligned batch length */
- size = align_u32(batch->next - batch->start, 8);
- result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size);
- if (result != VK_SUCCESS)
- return result;
-
- memcpy(bo.map, batch->start, size);
- if (!device->info.has_llc)
- anv_flush_range(bo.map, size);
-
- exec_bos[0] = &bo;
- exec2_objects[0].handle = bo.gem_handle;
- exec2_objects[0].relocation_count = 0;
- exec2_objects[0].relocs_ptr = 0;
- exec2_objects[0].alignment = 0;
- exec2_objects[0].offset = bo.offset;
- exec2_objects[0].flags = 0;
- exec2_objects[0].rsvd1 = 0;
- exec2_objects[0].rsvd2 = 0;
-
- execbuf.buffers_ptr = (uintptr_t) exec2_objects;
- execbuf.buffer_count = 1;
- execbuf.batch_start_offset = 0;
- execbuf.batch_len = size;
- execbuf.cliprects_ptr = 0;
- execbuf.num_cliprects = 0;
- execbuf.DR1 = 0;
- execbuf.DR4 = 0;
-
- execbuf.flags =
- I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
- execbuf.rsvd1 = device->context_id;
- execbuf.rsvd2 = 0;
-
- result = anv_device_execbuf(device, &execbuf, exec_bos);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = anv_device_wait(device, &bo, INT64_MAX);
-
- fail:
- anv_bo_pool_free(&device->batch_bo_pool, &bo);
-
- return result;
-}
-
VkResult anv_CreateDevice(
VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo* pCreateInfo,
@@ -1350,26 +1294,6 @@ void anv_GetDeviceQueue(
}
VkResult
-anv_device_execbuf(struct anv_device *device,
- struct drm_i915_gem_execbuffer2 *execbuf,
- struct anv_bo **execbuf_bos)
-{
- int ret = anv_gem_execbuffer(device, execbuf);
- if (ret != 0) {
- /* We don't know the real error. */
- device->lost = true;
- return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
- }
-
- struct drm_i915_gem_exec_object2 *objects =
- (void *)(uintptr_t)execbuf->buffers_ptr;
- for (uint32_t k = 0; k < execbuf->buffer_count; k++)
- execbuf_bos[k]->offset = objects[k].offset;
-
- return VK_SUCCESS;
-}
-
-VkResult
anv_device_query_status(struct anv_device *device)
{
/* This isn't likely as most of the callers of this function already check
@@ -1446,119 +1370,6 @@ anv_device_wait(struct anv_device *device, struct anv_bo *bo,
return anv_device_query_status(device);
}
-VkResult anv_QueueSubmit(
- VkQueue _queue,
- uint32_t submitCount,
- const VkSubmitInfo* pSubmits,
- VkFence _fence)
-{
- ANV_FROM_HANDLE(anv_queue, queue, _queue);
- ANV_FROM_HANDLE(anv_fence, fence, _fence);
- struct anv_device *device = queue->device;
-
- /* Query for device status prior to submitting. Technically, we don't need
- * to do this. However, if we have a client that's submitting piles of
- * garbage, we would rather break as early as possible to keep the GPU
- * hanging contained. If we don't check here, we'll either be waiting for
- * the kernel to kick us or we'll have to wait until the client waits on a
- * fence before we actually know whether or not we've hung.
- */
- VkResult result = anv_device_query_status(device);
- if (result != VK_SUCCESS)
- return result;
-
- /* We lock around QueueSubmit for three main reasons:
- *
- * 1) When a block pool is resized, we create a new gem handle with a
- * different size and, in the case of surface states, possibly a
- * different center offset but we re-use the same anv_bo struct when
- * we do so. If this happens in the middle of setting up an execbuf,
- * we could end up with our list of BOs out of sync with our list of
- * gem handles.
- *
- * 2) The algorithm we use for building the list of unique buffers isn't
- * thread-safe. While the client is supposed to syncronize around
- * QueueSubmit, this would be extremely difficult to debug if it ever
- * came up in the wild due to a broken app. It's better to play it
- * safe and just lock around QueueSubmit.
- *
- * 3) The anv_cmd_buffer_execbuf function may perform relocations in
- * userspace. Due to the fact that the surface state buffer is shared
- * between batches, we can't afford to have that happen from multiple
- * threads at the same time. Even though the user is supposed to
- * ensure this doesn't happen, we play it safe as in (2) above.
- *
- * Since the only other things that ever take the device lock such as block
- * pool resize only rarely happen, this will almost never be contended so
- * taking a lock isn't really an expensive operation in this case.
- */
- pthread_mutex_lock(&device->mutex);
-
- for (uint32_t i = 0; i < submitCount; i++) {
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
- pSubmits[i].pCommandBuffers[j]);
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- assert(!anv_batch_has_error(&cmd_buffer->batch));
-
- result = anv_cmd_buffer_execbuf(device, cmd_buffer);
- if (result != VK_SUCCESS)
- goto out;
- }
- }
-
- if (fence) {
- struct anv_bo *fence_bo = &fence->bo;
- result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
- if (result != VK_SUCCESS)
- goto out;
-
- /* Update the fence and wake up any waiters */
- assert(fence->state == ANV_FENCE_STATE_RESET);
- fence->state = ANV_FENCE_STATE_SUBMITTED;
- pthread_cond_broadcast(&device->queue_submit);
- }
-
-out:
- if (result != VK_SUCCESS) {
- /* In the case that something has gone wrong we may end up with an
- * inconsistent state from which it may not be trivial to recover.
- * For example, we might have computed address relocations and
- * any future attempt to re-submit this job will need to know about
- * this and avoid computing relocation addresses again.
- *
- * To avoid this sort of issues, we assume that if something was
- * wrong during submission we must already be in a really bad situation
- * anyway (such us being out of memory) and return
- * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to
- * submit the same job again to this device.
- */
- result = VK_ERROR_DEVICE_LOST;
- device->lost = true;
-
- /* If we return VK_ERROR_DEVICE LOST here, we need to ensure that
- * vkWaitForFences() and vkGetFenceStatus() return a valid result
- * (VK_SUCCESS or VK_ERROR_DEVICE_LOST) in a finite amount of time.
- * Setting the fence status to SIGNALED ensures this will happen in
- * any case.
- */
- if (fence)
- fence->state = ANV_FENCE_STATE_SIGNALED;
- }
-
- pthread_mutex_unlock(&device->mutex);
-
- return result;
-}
-
-VkResult anv_QueueWaitIdle(
- VkQueue _queue)
-{
- ANV_FROM_HANDLE(anv_queue, queue, _queue);
-
- return anv_DeviceWaitIdle(anv_device_to_handle(queue->device));
-}
-
VkResult anv_DeviceWaitIdle(
VkDevice _device)
{
@@ -1953,301 +1764,6 @@ VkResult anv_QueueBindSparse(
return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
}
-VkResult anv_CreateFence(
- VkDevice _device,
- const VkFenceCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkFence* pFence)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- struct anv_bo fence_bo;
- struct anv_fence *fence;
- struct anv_batch batch;
- VkResult result;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
-
- result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096);
- if (result != VK_SUCCESS)
- return result;
-
- /* Fences are small. Just store the CPU data structure in the BO. */
- fence = fence_bo.map;
- fence->bo = fence_bo;
-
- /* Place the batch after the CPU data but on its own cache line. */
- const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
- batch.next = batch.start = fence->bo.map + batch_offset;
- batch.end = fence->bo.map + fence->bo.size;
- anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
- anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
-
- if (!device->info.has_llc) {
- assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
- assert(batch.next - batch.start <= CACHELINE_SIZE);
- __builtin_ia32_mfence();
- __builtin_ia32_clflush(batch.start);
- }
-
- fence->exec2_objects[0].handle = fence->bo.gem_handle;
- fence->exec2_objects[0].relocation_count = 0;
- fence->exec2_objects[0].relocs_ptr = 0;
- fence->exec2_objects[0].alignment = 0;
- fence->exec2_objects[0].offset = fence->bo.offset;
- fence->exec2_objects[0].flags = 0;
- fence->exec2_objects[0].rsvd1 = 0;
- fence->exec2_objects[0].rsvd2 = 0;
-
- fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
- fence->execbuf.buffer_count = 1;
- fence->execbuf.batch_start_offset = batch.start - fence->bo.map;
- fence->execbuf.batch_len = batch.next - batch.start;
- fence->execbuf.cliprects_ptr = 0;
- fence->execbuf.num_cliprects = 0;
- fence->execbuf.DR1 = 0;
- fence->execbuf.DR4 = 0;
-
- fence->execbuf.flags =
- I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
- fence->execbuf.rsvd1 = device->context_id;
- fence->execbuf.rsvd2 = 0;
-
- if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
- fence->state = ANV_FENCE_STATE_SIGNALED;
- } else {
- fence->state = ANV_FENCE_STATE_RESET;
- }
-
- *pFence = anv_fence_to_handle(fence);
-
- return VK_SUCCESS;
-}
-
-void anv_DestroyFence(
- VkDevice _device,
- VkFence _fence,
- const VkAllocationCallbacks* pAllocator)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_fence, fence, _fence);
-
- if (!fence)
- return;
-
- assert(fence->bo.map == fence);
- anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
-}
-
-VkResult anv_ResetFences(
- VkDevice _device,
- uint32_t fenceCount,
- const VkFence* pFences)
-{
- for (uint32_t i = 0; i < fenceCount; i++) {
- ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- fence->state = ANV_FENCE_STATE_RESET;
- }
-
- return VK_SUCCESS;
-}
-
-VkResult anv_GetFenceStatus(
- VkDevice _device,
- VkFence _fence)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_fence, fence, _fence);
-
- if (unlikely(device->lost))
- return VK_ERROR_DEVICE_LOST;
-
- switch (fence->state) {
- case ANV_FENCE_STATE_RESET:
- /* If it hasn't even been sent off to the GPU yet, it's not ready */
- return VK_NOT_READY;
-
- case ANV_FENCE_STATE_SIGNALED:
- /* It's been signaled, return success */
- return VK_SUCCESS;
-
- case ANV_FENCE_STATE_SUBMITTED: {
- VkResult result = anv_device_bo_busy(device, &fence->bo);
- if (result == VK_SUCCESS) {
- fence->state = ANV_FENCE_STATE_SIGNALED;
- return VK_SUCCESS;
- } else {
- return result;
- }
- }
- default:
- unreachable("Invalid fence status");
- }
-}
-
-#define NSEC_PER_SEC 1000000000
-#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
-
-VkResult anv_WaitForFences(
- VkDevice _device,
- uint32_t fenceCount,
- const VkFence* pFences,
- VkBool32 waitAll,
- uint64_t _timeout)
-{
- ANV_FROM_HANDLE(anv_device, device, _device);
- int ret;
-
- if (unlikely(device->lost))
- return VK_ERROR_DEVICE_LOST;
-
- /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
- * to block indefinitely timeouts <= 0. Unfortunately, this was broken
- * for a couple of kernel releases. Since there's no way to know
- * whether or not the kernel we're using is one of the broken ones, the
- * best we can do is to clamp the timeout to INT64_MAX. This limits the
- * maximum timeout from 584 years to 292 years - likely not a big deal.
- */
- int64_t timeout = MIN2(_timeout, INT64_MAX);
-
- VkResult result = VK_SUCCESS;
- uint32_t pending_fences = fenceCount;
- while (pending_fences) {
- pending_fences = 0;
- bool signaled_fences = false;
- for (uint32_t i = 0; i < fenceCount; i++) {
- ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- switch (fence->state) {
- case ANV_FENCE_STATE_RESET:
- /* This fence hasn't been submitted yet, we'll catch it the next
- * time around. Yes, this may mean we dead-loop but, short of
- * lots of locking and a condition variable, there's not much that
- * we can do about that.
- */
- pending_fences++;
- continue;
-
- case ANV_FENCE_STATE_SIGNALED:
- /* This fence is not pending. If waitAll isn't set, we can return
- * early. Otherwise, we have to keep going.
- */
- if (!waitAll) {
- result = VK_SUCCESS;
- goto done;
- }
- continue;
-
- case ANV_FENCE_STATE_SUBMITTED:
- /* These are the fences we really care about. Go ahead and wait
- * on it until we hit a timeout.
- */
- result = anv_device_wait(device, &fence->bo, timeout);
- switch (result) {
- case VK_SUCCESS:
- fence->state = ANV_FENCE_STATE_SIGNALED;
- signaled_fences = true;
- if (!waitAll)
- goto done;
- break;
-
- case VK_TIMEOUT:
- goto done;
-
- default:
- return result;
- }
- }
- }
-
- if (pending_fences && !signaled_fences) {
- /* If we've hit this then someone decided to vkWaitForFences before
- * they've actually submitted any of them to a queue. This is a
- * fairly pessimal case, so it's ok to lock here and use a standard
- * pthreads condition variable.
- */
- pthread_mutex_lock(&device->mutex);
-
- /* It's possible that some of the fences have changed state since the
- * last time we checked. Now that we have the lock, check for
- * pending fences again and don't wait if it's changed.
- */
- uint32_t now_pending_fences = 0;
- for (uint32_t i = 0; i < fenceCount; i++) {
- ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- if (fence->state == ANV_FENCE_STATE_RESET)
- now_pending_fences++;
- }
- assert(now_pending_fences <= pending_fences);
-
- if (now_pending_fences == pending_fences) {
- struct timespec before;
- clock_gettime(CLOCK_MONOTONIC, &before);
-
- uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
- uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
- (timeout / NSEC_PER_SEC);
- abs_nsec %= NSEC_PER_SEC;
-
- /* Avoid roll-over in tv_sec on 32-bit systems if the user
- * provided timeout is UINT64_MAX
- */
- struct timespec abstime;
- abstime.tv_nsec = abs_nsec;
- abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
-
- ret = pthread_cond_timedwait(&device->queue_submit,
- &device->mutex, &abstime);
- assert(ret != EINVAL);
-
- struct timespec after;
- clock_gettime(CLOCK_MONOTONIC, &after);
- uint64_t time_elapsed =
- ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
- ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
-
- if (time_elapsed >= timeout) {
- pthread_mutex_unlock(&device->mutex);
- result = VK_TIMEOUT;
- goto done;
- }
-
- timeout -= time_elapsed;
- }
-
- pthread_mutex_unlock(&device->mutex);
- }
- }
-
-done:
- if (unlikely(device->lost))
- return VK_ERROR_DEVICE_LOST;
-
- return result;
-}
-
-// Queue semaphore functions
-
-VkResult anv_CreateSemaphore(
- VkDevice device,
- const VkSemaphoreCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSemaphore* pSemaphore)
-{
- /* The DRM execbuffer ioctl always execute in-oder, even between different
- * rings. As such, there's nothing to do for the user space semaphore.
- */
-
- *pSemaphore = (VkSemaphore)1;
-
- return VK_SUCCESS;
-}
-
-void anv_DestroySemaphore(
- VkDevice device,
- VkSemaphore semaphore,
- const VkAllocationCallbacks* pAllocator)
-{
-}
-
// Event functions
VkResult anv_CreateEvent(
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
new file mode 100644
index 00000000000..5a22ff7fe60
--- /dev/null
+++ b/src/intel/vulkan/anv_queue.c
@@ -0,0 +1,515 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * This file implements VkQueue, VkFence, and VkSemaphore
+ */
+
+#include "anv_private.h"
+#include "util/vk_util.h"
+
+#include "genxml/gen7_pack.h"
+
+VkResult
+anv_device_execbuf(struct anv_device *device,
+ struct drm_i915_gem_execbuffer2 *execbuf,
+ struct anv_bo **execbuf_bos)
+{
+ int ret = anv_gem_execbuffer(device, execbuf);
+ if (ret != 0) {
+ /* We don't know the real error. */
+ device->lost = true;
+ return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
+ }
+
+ struct drm_i915_gem_exec_object2 *objects =
+ (void *)(uintptr_t)execbuf->buffers_ptr;
+ for (uint32_t k = 0; k < execbuf->buffer_count; k++)
+ execbuf_bos[k]->offset = objects[k].offset;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_device_submit_simple_batch(struct anv_device *device,
+ struct anv_batch *batch)
+{
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 exec2_objects[1];
+ struct anv_bo bo, *exec_bos[1];
+ VkResult result = VK_SUCCESS;
+ uint32_t size;
+
+ /* Kernel driver requires 8 byte aligned batch length */
+ size = align_u32(batch->next - batch->start, 8);
+ result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size);
+ if (result != VK_SUCCESS)
+ return result;
+
+ memcpy(bo.map, batch->start, size);
+ if (!device->info.has_llc)
+ anv_flush_range(bo.map, size);
+
+ exec_bos[0] = &bo;
+ exec2_objects[0].handle = bo.gem_handle;
+ exec2_objects[0].relocation_count = 0;
+ exec2_objects[0].relocs_ptr = 0;
+ exec2_objects[0].alignment = 0;
+ exec2_objects[0].offset = bo.offset;
+ exec2_objects[0].flags = 0;
+ exec2_objects[0].rsvd1 = 0;
+ exec2_objects[0].rsvd2 = 0;
+
+ execbuf.buffers_ptr = (uintptr_t) exec2_objects;
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 0;
+ execbuf.batch_len = size;
+ execbuf.cliprects_ptr = 0;
+ execbuf.num_cliprects = 0;
+ execbuf.DR1 = 0;
+ execbuf.DR4 = 0;
+
+ execbuf.flags =
+ I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
+ execbuf.rsvd1 = device->context_id;
+ execbuf.rsvd2 = 0;
+
+ result = anv_device_execbuf(device, &execbuf, exec_bos);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = anv_device_wait(device, &bo, INT64_MAX);
+
+ fail:
+ anv_bo_pool_free(&device->batch_bo_pool, &bo);
+
+ return result;
+}
+
+VkResult anv_QueueSubmit(
+ VkQueue _queue,
+ uint32_t submitCount,
+ const VkSubmitInfo* pSubmits,
+ VkFence _fence)
+{
+ ANV_FROM_HANDLE(anv_queue, queue, _queue);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+ struct anv_device *device = queue->device;
+
+ /* Query for device status prior to submitting. Technically, we don't need
+ * to do this. However, if we have a client that's submitting piles of
+ * garbage, we would rather break as early as possible to keep the GPU
+ * hanging contained. If we don't check here, we'll either be waiting for
+ * the kernel to kick us or we'll have to wait until the client waits on a
+ * fence before we actually know whether or not we've hung.
+ */
+ VkResult result = anv_device_query_status(device);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* We lock around QueueSubmit for three main reasons:
+ *
+ * 1) When a block pool is resized, we create a new gem handle with a
+ * different size and, in the case of surface states, possibly a
+ * different center offset but we re-use the same anv_bo struct when
+ * we do so. If this happens in the middle of setting up an execbuf,
+ * we could end up with our list of BOs out of sync with our list of
+ * gem handles.
+ *
+ * 2) The algorithm we use for building the list of unique buffers isn't
+ * thread-safe. While the client is supposed to syncronize around
+ * QueueSubmit, this would be extremely difficult to debug if it ever
+ * came up in the wild due to a broken app. It's better to play it
+ * safe and just lock around QueueSubmit.
+ *
+ * 3) The anv_cmd_buffer_execbuf function may perform relocations in
+ * userspace. Due to the fact that the surface state buffer is shared
+ * between batches, we can't afford to have that happen from multiple
+ * threads at the same time. Even though the user is supposed to
+ * ensure this doesn't happen, we play it safe as in (2) above.
+ *
+ * Since the only other things that ever take the device lock such as block
+ * pool resize only rarely happen, this will almost never be contended so
+ * taking a lock isn't really an expensive operation in this case.
+ */
+ pthread_mutex_lock(&device->mutex);
+
+ for (uint32_t i = 0; i < submitCount; i++) {
+ for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
+ pSubmits[i].pCommandBuffers[j]);
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+ assert(!anv_batch_has_error(&cmd_buffer->batch));
+
+ result = anv_cmd_buffer_execbuf(device, cmd_buffer);
+ if (result != VK_SUCCESS)
+ goto out;
+ }
+ }
+
+ if (fence) {
+ struct anv_bo *fence_bo = &fence->bo;
+ result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
+ if (result != VK_SUCCESS)
+ goto out;
+
+ /* Update the fence and wake up any waiters */
+ assert(fence->state == ANV_FENCE_STATE_RESET);
+ fence->state = ANV_FENCE_STATE_SUBMITTED;
+ pthread_cond_broadcast(&device->queue_submit);
+ }
+
+out:
+ if (result != VK_SUCCESS) {
+ /* In the case that something has gone wrong we may end up with an
+ * inconsistent state from which it may not be trivial to recover.
+ * For example, we might have computed address relocations and
+ * any future attempt to re-submit this job will need to know about
+ * this and avoid computing relocation addresses again.
+ *
+ * To avoid this sort of issues, we assume that if something was
+ * wrong during submission we must already be in a really bad situation
+ * anyway (such us being out of memory) and return
+ * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to
+ * submit the same job again to this device.
+ */
+ result = VK_ERROR_DEVICE_LOST;
+ device->lost = true;
+
+ /* If we return VK_ERROR_DEVICE LOST here, we need to ensure that
+ * vkWaitForFences() and vkGetFenceStatus() return a valid result
+ * (VK_SUCCESS or VK_ERROR_DEVICE_LOST) in a finite amount of time.
+ * Setting the fence status to SIGNALED ensures this will happen in
+ * any case.
+ */
+ if (fence)
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ }
+
+ pthread_mutex_unlock(&device->mutex);
+
+ return result;
+}
+
+VkResult anv_QueueWaitIdle(
+ VkQueue _queue)
+{
+ ANV_FROM_HANDLE(anv_queue, queue, _queue);
+
+ return anv_DeviceWaitIdle(anv_device_to_handle(queue->device));
+}
+
+VkResult anv_CreateFence(
+ VkDevice _device,
+ const VkFenceCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkFence* pFence)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_bo fence_bo;
+ struct anv_fence *fence;
+ struct anv_batch batch;
+ VkResult result;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
+
+ result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Fences are small. Just store the CPU data structure in the BO. */
+ fence = fence_bo.map;
+ fence->bo = fence_bo;
+
+ /* Place the batch after the CPU data but on its own cache line. */
+ const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
+ batch.next = batch.start = fence->bo.map + batch_offset;
+ batch.end = fence->bo.map + fence->bo.size;
+ anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
+ anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
+
+ if (!device->info.has_llc) {
+ assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
+ assert(batch.next - batch.start <= CACHELINE_SIZE);
+ __builtin_ia32_mfence();
+ __builtin_ia32_clflush(batch.start);
+ }
+
+ fence->exec2_objects[0].handle = fence->bo.gem_handle;
+ fence->exec2_objects[0].relocation_count = 0;
+ fence->exec2_objects[0].relocs_ptr = 0;
+ fence->exec2_objects[0].alignment = 0;
+ fence->exec2_objects[0].offset = fence->bo.offset;
+ fence->exec2_objects[0].flags = 0;
+ fence->exec2_objects[0].rsvd1 = 0;
+ fence->exec2_objects[0].rsvd2 = 0;
+
+ fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
+ fence->execbuf.buffer_count = 1;
+ fence->execbuf.batch_start_offset = batch.start - fence->bo.map;
+ fence->execbuf.batch_len = batch.next - batch.start;
+ fence->execbuf.cliprects_ptr = 0;
+ fence->execbuf.num_cliprects = 0;
+ fence->execbuf.DR1 = 0;
+ fence->execbuf.DR4 = 0;
+
+ fence->execbuf.flags =
+ I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
+ fence->execbuf.rsvd1 = device->context_id;
+ fence->execbuf.rsvd2 = 0;
+
+ if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ } else {
+ fence->state = ANV_FENCE_STATE_RESET;
+ }
+
+ *pFence = anv_fence_to_handle(fence);
+
+ return VK_SUCCESS;
+}
+
+void anv_DestroyFence(
+ VkDevice _device,
+ VkFence _fence,
+ const VkAllocationCallbacks* pAllocator)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+
+ if (!fence)
+ return;
+
+ assert(fence->bo.map == fence);
+ anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
+}
+
+VkResult anv_ResetFences(
+ VkDevice _device,
+ uint32_t fenceCount,
+ const VkFence* pFences)
+{
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ fence->state = ANV_FENCE_STATE_RESET;
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetFenceStatus(
+ VkDevice _device,
+ VkFence _fence)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+
+ if (unlikely(device->lost))
+ return VK_ERROR_DEVICE_LOST;
+
+ switch (fence->state) {
+ case ANV_FENCE_STATE_RESET:
+ /* If it hasn't even been sent off to the GPU yet, it's not ready */
+ return VK_NOT_READY;
+
+ case ANV_FENCE_STATE_SIGNALED:
+ /* It's been signaled, return success */
+ return VK_SUCCESS;
+
+ case ANV_FENCE_STATE_SUBMITTED: {
+ VkResult result = anv_device_bo_busy(device, &fence->bo);
+ if (result == VK_SUCCESS) {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ return VK_SUCCESS;
+ } else {
+ return result;
+ }
+ }
+ default:
+ unreachable("Invalid fence status");
+ }
+}
+
+#define NSEC_PER_SEC 1000000000
+#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
+
+VkResult anv_WaitForFences(
+ VkDevice _device,
+ uint32_t fenceCount,
+ const VkFence* pFences,
+ VkBool32 waitAll,
+ uint64_t _timeout)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ int ret;
+
+ if (unlikely(device->lost))
+ return VK_ERROR_DEVICE_LOST;
+
+ /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
+ * to block indefinitely timeouts <= 0. Unfortunately, this was broken
+ * for a couple of kernel releases. Since there's no way to know
+ * whether or not the kernel we're using is one of the broken ones, the
+ * best we can do is to clamp the timeout to INT64_MAX. This limits the
+ * maximum timeout from 584 years to 292 years - likely not a big deal.
+ */
+ int64_t timeout = MIN2(_timeout, INT64_MAX);
+
+ VkResult result = VK_SUCCESS;
+ uint32_t pending_fences = fenceCount;
+ while (pending_fences) {
+ pending_fences = 0;
+ bool signaled_fences = false;
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ switch (fence->state) {
+ case ANV_FENCE_STATE_RESET:
+ /* This fence hasn't been submitted yet, we'll catch it the next
+ * time around. Yes, this may mean we dead-loop but, short of
+ * lots of locking and a condition variable, there's not much that
+ * we can do about that.
+ */
+ pending_fences++;
+ continue;
+
+ case ANV_FENCE_STATE_SIGNALED:
+ /* This fence is not pending. If waitAll isn't set, we can return
+ * early. Otherwise, we have to keep going.
+ */
+ if (!waitAll) {
+ result = VK_SUCCESS;
+ goto done;
+ }
+ continue;
+
+ case ANV_FENCE_STATE_SUBMITTED:
+ /* These are the fences we really care about. Go ahead and wait
+ * on it until we hit a timeout.
+ */
+ result = anv_device_wait(device, &fence->bo, timeout);
+ switch (result) {
+ case VK_SUCCESS:
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ signaled_fences = true;
+ if (!waitAll)
+ goto done;
+ break;
+
+ case VK_TIMEOUT:
+ goto done;
+
+ default:
+ return result;
+ }
+ }
+ }
+
+ if (pending_fences && !signaled_fences) {
+ /* If we've hit this then someone decided to vkWaitForFences before
+ * they've actually submitted any of them to a queue. This is a
+ * fairly pessimal case, so it's ok to lock here and use a standard
+ * pthreads condition variable.
+ */
+ pthread_mutex_lock(&device->mutex);
+
+ /* It's possible that some of the fences have changed state since the
+ * last time we checked. Now that we have the lock, check for
+ * pending fences again and don't wait if it's changed.
+ */
+ uint32_t now_pending_fences = 0;
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ if (fence->state == ANV_FENCE_STATE_RESET)
+ now_pending_fences++;
+ }
+ assert(now_pending_fences <= pending_fences);
+
+ if (now_pending_fences == pending_fences) {
+ struct timespec before;
+ clock_gettime(CLOCK_MONOTONIC, &before);
+
+ uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
+ uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
+ (timeout / NSEC_PER_SEC);
+ abs_nsec %= NSEC_PER_SEC;
+
+ /* Avoid roll-over in tv_sec on 32-bit systems if the user
+ * provided timeout is UINT64_MAX
+ */
+ struct timespec abstime;
+ abstime.tv_nsec = abs_nsec;
+ abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
+
+ ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+
+ struct timespec after;
+ clock_gettime(CLOCK_MONOTONIC, &after);
+ uint64_t time_elapsed =
+ ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
+ ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
+
+ if (time_elapsed >= timeout) {
+ pthread_mutex_unlock(&device->mutex);
+ result = VK_TIMEOUT;
+ goto done;
+ }
+
+ timeout -= time_elapsed;
+ }
+
+ pthread_mutex_unlock(&device->mutex);
+ }
+ }
+
+done:
+ if (unlikely(device->lost))
+ return VK_ERROR_DEVICE_LOST;
+
+ return result;
+}
+
+// Queue semaphore functions
+
+VkResult anv_CreateSemaphore(
+ VkDevice device,
+ const VkSemaphoreCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkSemaphore* pSemaphore)
+{
+ /* The DRM execbuffer ioctl always execute in-oder, even between different
+ * rings. As such, there's nothing to do for the user space semaphore.
+ */
+
+ *pSemaphore = (VkSemaphore)1;
+
+ return VK_SUCCESS;
+}
+
+void anv_DestroySemaphore(
+ VkDevice device,
+ VkSemaphore semaphore,
+ const VkAllocationCallbacks* pAllocator)
+{
+}