diff options
-rw-r--r-- | src/intel/vulkan/anv_batch_chain.c | 327 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 54 | ||||
-rw-r--r-- | src/intel/vulkan/anv_queue.c | 574 | ||||
-rw-r--r-- | src/intel/vulkan/genX_query.c | 37 |
4 files changed, 616 insertions, 376 deletions
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 5720859a674..297cb641f47 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1047,10 +1047,6 @@ struct anv_execbuf { const VkAllocationCallbacks * alloc; VkSystemAllocationScope alloc_scope; - uint32_t fence_count; - uint32_t fence_array_length; - struct drm_i915_gem_exec_fence * fences; - struct anv_syncobj ** syncobjs; }; static void @@ -1064,8 +1060,6 @@ anv_execbuf_finish(struct anv_execbuf *exec) { vk_free(exec->alloc, exec->objects); vk_free(exec->alloc, exec->bos); - vk_free(exec->alloc, exec->fences); - vk_free(exec->alloc, exec->syncobjs); } static VkResult @@ -1195,34 +1189,6 @@ anv_execbuf_add_bo_bitset(struct anv_device *device, return VK_SUCCESS; } -static VkResult -anv_execbuf_add_syncobj(struct anv_execbuf *exec, - uint32_t handle, uint32_t flags) -{ - assert(flags != 0); - - if (exec->fence_count >= exec->fence_array_length) { - uint32_t new_len = MAX2(exec->fence_array_length * 2, 64); - - exec->fences = vk_realloc(exec->alloc, exec->fences, - new_len * sizeof(*exec->fences), - 8, exec->alloc_scope); - if (exec->fences == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - exec->fence_array_length = new_len; - } - - exec->fences[exec->fence_count] = (struct drm_i915_gem_exec_fence) { - .handle = handle, - .flags = flags, - }; - - exec->fence_count++; - - return VK_SUCCESS; -} - static void anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, struct anv_reloc_list *list) @@ -1614,241 +1580,132 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device) } VkResult -anv_cmd_buffer_execbuf(struct anv_queue *queue, - struct anv_cmd_buffer *cmd_buffer, - const VkSemaphore *in_semaphores, - uint32_t num_in_semaphores, - const VkSemaphore *out_semaphores, - uint32_t num_out_semaphores, - VkFence _fence) +anv_queue_execbuf(struct anv_queue *queue, + struct anv_queue_submit *submit) { - ANV_FROM_HANDLE(anv_fence, fence, _fence); struct anv_device *device = queue->device; - UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice; - struct anv_execbuf execbuf; anv_execbuf_init(&execbuf); - execbuf.alloc = &device->alloc; - execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND; + execbuf.alloc = submit->alloc; + execbuf.alloc_scope = submit->alloc_scope; - int in_fence = -1; - VkResult result = VK_SUCCESS; - for (uint32_t i = 0; i < num_in_semaphores; i++) { - ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); - struct anv_semaphore_impl *impl = - semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? - &semaphore->temporary : &semaphore->permanent; - - switch (impl->type) { - case ANV_SEMAPHORE_TYPE_BO: - assert(!pdevice->has_syncobj); - result = anv_execbuf_add_bo(device, &execbuf, impl->bo, NULL, 0); - if (result != VK_SUCCESS) - return result; - break; + VkResult result; - case ANV_SEMAPHORE_TYPE_SYNC_FILE: - assert(!pdevice->has_syncobj); - if (in_fence == -1) { - in_fence = impl->fd; - if (in_fence == -1) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - impl->fd = -1; - } else { - int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd); - if (merge == -1) - return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); - - close(impl->fd); - close(in_fence); - impl->fd = -1; - in_fence = merge; - } - break; + /* We lock around execbuf for three main reasons: + * + * 1) When a block pool is resized, we create a new gem handle with a + * different size and, in the case of surface states, possibly a + * different center offset but we re-use the same anv_bo struct when + * we do so. If this happens in the middle of setting up an execbuf, + * we could end up with our list of BOs out of sync with our list of + * gem handles. + * + * 2) The algorithm we use for building the list of unique buffers isn't + * thread-safe. While the client is supposed to syncronize around + * QueueSubmit, this would be extremely difficult to debug if it ever + * came up in the wild due to a broken app. It's better to play it + * safe and just lock around QueueSubmit. + * + * 3) The anv_cmd_buffer_execbuf function may perform relocations in + * userspace. Due to the fact that the surface state buffer is shared + * between batches, we can't afford to have that happen from multiple + * threads at the same time. Even though the user is supposed to + * ensure this doesn't happen, we play it safe as in (2) above. + * + * Since the only other things that ever take the device lock such as block + * pool resize only rarely happen, this will almost never be contended so + * taking a lock isn't really an expensive operation in this case. + */ + pthread_mutex_lock(&device->mutex); - case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: - result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj, - I915_EXEC_FENCE_WAIT); - if (result != VK_SUCCESS) - return result; - break; + for (uint32_t i = 0; i < submit->fence_bo_count; i++) { + int signaled; + struct anv_bo *bo = anv_unpack_ptr(submit->fence_bos[i], 1, &signaled); - default: - break; - } + result = anv_execbuf_add_bo(device, &execbuf, bo, NULL, + signaled ? EXEC_OBJECT_WRITE : 0); + if (result != VK_SUCCESS) + goto error; } - bool need_out_fence = false; - for (uint32_t i = 0; i < num_out_semaphores; i++) { - ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]); - - /* Under most circumstances, out fences won't be temporary. However, - * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec: - * - * "If the import is temporary, the implementation must restore the - * semaphore to its prior permanent state after submitting the next - * semaphore wait operation." - * - * The spec says nothing whatsoever about signal operations on - * temporarily imported semaphores so it appears they are allowed. - * There are also CTS tests that require this to work. - */ - struct anv_semaphore_impl *impl = - semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? - &semaphore->temporary : &semaphore->permanent; - - switch (impl->type) { - case ANV_SEMAPHORE_TYPE_BO: - assert(!pdevice->has_syncobj); - result = anv_execbuf_add_bo(device, &execbuf, impl->bo, NULL, - EXEC_OBJECT_WRITE); - if (result != VK_SUCCESS) - return result; - break; - - case ANV_SEMAPHORE_TYPE_SYNC_FILE: - assert(!pdevice->has_syncobj); - need_out_fence = true; - break; - - case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: - result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj, - I915_EXEC_FENCE_SIGNAL); - if (result != VK_SUCCESS) - return result; - break; - - default: - break; - } + if (submit->cmd_buffer) { + result = setup_execbuf_for_cmd_buffer(&execbuf, submit->cmd_buffer); + } else if (submit->simple_bo) { + result = anv_execbuf_add_bo(device, &execbuf, submit->simple_bo, NULL, 0); + if (result != VK_SUCCESS) + goto error; + + execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf.objects, + .buffer_count = execbuf.bo_count, + .batch_start_offset = 0, + .batch_len = submit->simple_bo_size, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, + .rsvd1 = device->context_id, + .rsvd2 = 0, + }; + } else { + result = setup_empty_execbuf(&execbuf, queue->device); } - if (fence) { - /* Under most circumstances, out fences won't be temporary. However, - * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec: - * - * "If the import is temporary, the implementation must restore the - * semaphore to its prior permanent state after submitting the next - * semaphore wait operation." - * - * The spec says nothing whatsoever about signal operations on - * temporarily imported semaphores so it appears they are allowed. - * There are also CTS tests that require this to work. - */ - struct anv_fence_impl *impl = - fence->temporary.type != ANV_FENCE_TYPE_NONE ? - &fence->temporary : &fence->permanent; - - switch (impl->type) { - case ANV_FENCE_TYPE_BO: - assert(!pdevice->has_syncobj_wait); - result = anv_execbuf_add_bo(device, &execbuf, impl->bo.bo, NULL, - EXEC_OBJECT_WRITE); - if (result != VK_SUCCESS) - return result; - break; - - case ANV_FENCE_TYPE_SYNCOBJ: - result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj, - I915_EXEC_FENCE_SIGNAL); - if (result != VK_SUCCESS) - return result; - break; - - default: - unreachable("Invalid fence type"); - } - } + if (result != VK_SUCCESS) + goto error; - if (cmd_buffer) { - if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos); + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { + if (submit->cmd_buffer) { + struct anv_batch_bo **bo = u_vector_tail(&submit->cmd_buffer->seen_bbos); - device->cmd_buffer_being_decoded = cmd_buffer; + device->cmd_buffer_being_decoded = submit->cmd_buffer; gen_print_batch(&device->decoder_ctx, (*bo)->bo->map, (*bo)->bo->size, (*bo)->bo->offset, false); device->cmd_buffer_being_decoded = NULL; + } else if (submit->simple_bo) { + gen_print_batch(&device->decoder_ctx, submit->simple_bo->map, + submit->simple_bo->size, submit->simple_bo->offset, false); + } else { + gen_print_batch(&device->decoder_ctx, + device->trivial_batch_bo->map, + device->trivial_batch_bo->size, + device->trivial_batch_bo->offset, false); } - - result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer); - } else { - result = setup_empty_execbuf(&execbuf, device); } - if (result != VK_SUCCESS) - return result; - - if (execbuf.fence_count > 0) { + if (submit->fence_count > 0) { assert(device->instance->physicalDevice.has_syncobj); execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; - execbuf.execbuf.num_cliprects = execbuf.fence_count; - execbuf.execbuf.cliprects_ptr = (uintptr_t) execbuf.fences; + execbuf.execbuf.num_cliprects = submit->fence_count; + execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences; } - if (in_fence != -1) { + if (submit->in_fence != -1) { execbuf.execbuf.flags |= I915_EXEC_FENCE_IN; - execbuf.execbuf.rsvd2 |= (uint32_t)in_fence; + execbuf.execbuf.rsvd2 |= (uint32_t)submit->in_fence; } - if (need_out_fence) + if (submit->need_out_fence) execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT; - result = anv_queue_execbuf(queue, &execbuf.execbuf, execbuf.bos); - - /* Execbuf does not consume the in_fence. It's our job to close it. */ - if (in_fence != -1) - close(in_fence); - - for (uint32_t i = 0; i < num_in_semaphores; i++) { - ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); - /* From the Vulkan 1.0.53 spec: - * - * "If the import is temporary, the implementation must restore the - * semaphore to its prior permanent state after submitting the next - * semaphore wait operation." - * - * This has to happen after the execbuf in case we close any syncobjs in - * the process. - */ - anv_semaphore_reset_temporary(device, semaphore); + int ret = queue->device->no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) { + result = anv_queue_set_lost(queue, + "execbuf2 failed: %s", + strerror(ret)); } - if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) { - assert(!pdevice->has_syncobj_wait); - /* BO fences can't be shared, so they can't be temporary. */ - assert(fence->temporary.type == ANV_FENCE_TYPE_NONE); - - /* Once the execbuf has returned, we need to set the fence state to - * SUBMITTED. We can't do this before calling execbuf because - * anv_GetFenceStatus does take the global device lock before checking - * fence->state. - * - * We set the fence state to SUBMITTED regardless of whether or not the - * execbuf succeeds because we need to ensure that vkWaitForFences() and - * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or - * VK_SUCCESS) in a finite amount of time even if execbuf fails. - */ - fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED; + struct drm_i915_gem_exec_object2 *objects = execbuf.objects; + for (uint32_t k = 0; k < execbuf.bo_count; k++) { + if (execbuf.bos[k]->flags & EXEC_OBJECT_PINNED) + assert(execbuf.bos[k]->offset == objects[k].offset); + execbuf.bos[k]->offset = objects[k].offset; } - if (result == VK_SUCCESS && need_out_fence) { - assert(!pdevice->has_syncobj_wait); - int out_fence = execbuf.execbuf.rsvd2 >> 32; - for (uint32_t i = 0; i < num_out_semaphores; i++) { - ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]); - /* Out fences can't have temporary state because that would imply - * that we imported a sync file and are trying to signal it. - */ - assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE); - struct anv_semaphore_impl *impl = &semaphore->permanent; + if (result == VK_SUCCESS && submit->need_out_fence) + submit->out_fence = execbuf.execbuf.rsvd2 >> 32; - if (impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE) { - assert(impl->fd == -1); - impl->fd = dup(out_fence); - } - } - close(out_fence); - } + error: + pthread_cond_broadcast(&device->queue_submit); + pthread_mutex_unlock(&queue->device->mutex); anv_execbuf_finish(&execbuf); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3aae01e6b2f..67049cc37fe 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -300,6 +300,20 @@ vk_to_isl_color(VkClearColorValue color) }; } +static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags) +{ + uintptr_t mask = (1ull << bits) - 1; + *flags = ptr & mask; + return (void *) (ptr & ~mask); +} + +static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags) +{ + uintptr_t value = (uintptr_t) ptr; + uintptr_t mask = (1ull << bits) - 1; + return value | (mask & flags); +} + #define for_each_bit(b, dword) \ for (uint32_t __dword = (dword); \ (b) = __builtin_ffs(__dword) - 1, __dword; \ @@ -1050,6 +1064,42 @@ uint32_t anv_physical_device_api_version(struct anv_physical_device *dev); bool anv_physical_device_extension_supported(struct anv_physical_device *dev, const char *name); +struct anv_queue_submit { + struct anv_cmd_buffer * cmd_buffer; + + uint32_t fence_count; + uint32_t fence_array_length; + struct drm_i915_gem_exec_fence * fences; + + uint32_t temporary_semaphore_count; + uint32_t temporary_semaphore_array_length; + struct anv_semaphore_impl * temporary_semaphores; + + /* Semaphores to be signaled with a SYNC_FD. */ + struct anv_semaphore ** sync_fd_semaphores; + uint32_t sync_fd_semaphore_count; + uint32_t sync_fd_semaphore_array_length; + + int in_fence; + bool need_out_fence; + int out_fence; + + uint32_t fence_bo_count; + uint32_t fence_bo_array_length; + /* An array of struct anv_bo pointers with lower bit used as a flag to + * signal we will wait on that BO (see anv_(un)pack_ptr). + */ + uintptr_t * fence_bos; + + const VkAllocationCallbacks * alloc; + VkSystemAllocationScope alloc_scope; + + struct anv_bo * simple_bo; + uint32_t simple_bo_size; + + struct list_head link; +}; + struct anv_queue { VK_LOADER_DATA _loader_data; @@ -1318,9 +1368,7 @@ VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo, VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue); void anv_queue_finish(struct anv_queue *queue); -VkResult anv_queue_execbuf(struct anv_queue *queue, - struct drm_i915_gem_execbuffer2 *execbuf, - struct anv_bo **execbuf_bos); +VkResult anv_queue_execbuf(struct anv_queue *queue, struct anv_queue_submit *submit); VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, struct anv_batch *batch); diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 74f3a3c4254..91eed4eeeed 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -76,27 +76,54 @@ static int64_t anv_get_relative_timeout(uint64_t abs_timeout) return rel_timeout; } -VkResult -anv_queue_execbuf(struct anv_queue *queue, - struct drm_i915_gem_execbuffer2 *execbuf, - struct anv_bo **execbuf_bos) +static struct anv_semaphore *anv_semaphore_ref(struct anv_semaphore *semaphore); +static void anv_semaphore_unref(struct anv_device *device, struct anv_semaphore *semaphore); +static void anv_semaphore_impl_cleanup(struct anv_device *device, + struct anv_semaphore_impl *impl); + +static void +anv_queue_submit_free(struct anv_device *device, + struct anv_queue_submit *submit) { - struct anv_device *device = queue->device; - int ret = device->no_hw ? 0 : anv_gem_execbuffer(device, execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return anv_queue_set_lost(queue, "execbuf2 failed: %m"); - } + const VkAllocationCallbacks *alloc = submit->alloc; + + for (uint32_t i = 0; i < submit->temporary_semaphore_count; i++) + anv_semaphore_impl_cleanup(device, &submit->temporary_semaphores[i]); + for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) + anv_semaphore_unref(device, submit->sync_fd_semaphores[i]); + /* Execbuf does not consume the in_fence. It's our job to close it. */ + if (submit->in_fence != -1) + close(submit->in_fence); + if (submit->out_fence != -1) + close(submit->out_fence); + vk_free(alloc, submit->fences); + vk_free(alloc, submit->temporary_semaphores); + vk_free(alloc, submit->fence_bos); + vk_free(alloc, submit); +} - struct drm_i915_gem_exec_object2 *objects = - (void *)(uintptr_t)execbuf->buffers_ptr; - for (uint32_t k = 0; k < execbuf->buffer_count; k++) { - if (execbuf_bos[k]->flags & EXEC_OBJECT_PINNED) - assert(execbuf_bos[k]->offset == objects[k].offset); - execbuf_bos[k]->offset = objects[k].offset; +static VkResult +_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit) +{ + struct anv_queue_submit *submit = *_submit; + VkResult result = anv_queue_execbuf(queue, submit); + + if (result == VK_SUCCESS) { + /* Update signaled semaphores backed by syncfd. */ + for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) { + struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i]; + /* Out fences can't have temporary state because that would imply + * that we imported a sync file and are trying to signal it. + */ + assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE); + struct anv_semaphore_impl *impl = &semaphore->permanent; + + assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE); + impl->fd = dup(submit->out_fence); + } } - return VK_SUCCESS; + return result; } VkResult @@ -114,69 +141,426 @@ anv_queue_finish(struct anv_queue *queue) { } +static VkResult +anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit, + struct anv_bo *bo, + bool signal) +{ + if (submit->fence_bo_count >= submit->fence_bo_array_length) { + uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64); + + submit->fence_bos = + vk_realloc(submit->alloc, + submit->fence_bos, new_len * sizeof(*submit->fence_bos), + 8, submit->alloc_scope); + if (submit->fence_bos == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->fence_bo_array_length = new_len; + } + + /* Take advantage that anv_bo are allocated at 8 byte alignement so we can + * use the lowest bit to store whether this is a BO we need to signal. + */ + submit->fence_bos[submit->fence_bo_count++] = anv_pack_ptr(bo, 1, signal); + + return VK_SUCCESS; +} + +static VkResult +anv_queue_submit_add_syncobj(struct anv_queue_submit* submit, + struct anv_device *device, + uint32_t handle, uint32_t flags) +{ + assert(flags != 0); + + if (submit->fence_count >= submit->fence_array_length) { + uint32_t new_len = MAX2(submit->fence_array_length * 2, 64); + + submit->fences = + vk_realloc(submit->alloc, + submit->fences, new_len * sizeof(*submit->fences), + 8, submit->alloc_scope); + if (submit->fences == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->fence_array_length = new_len; + } + + submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) { + .handle = handle, + .flags = flags, + }; + + return VK_SUCCESS; +} + +static VkResult +anv_queue_submit_add_sync_fd_fence(struct anv_queue_submit *submit, + struct anv_semaphore *semaphore) +{ + if (submit->sync_fd_semaphore_count >= submit->sync_fd_semaphore_array_length) { + uint32_t new_len = MAX2(submit->sync_fd_semaphore_array_length * 2, 64); + struct anv_semaphore **new_semaphores = + vk_realloc(submit->alloc, submit->sync_fd_semaphores, + new_len * sizeof(*submit->sync_fd_semaphores), 8, + submit->alloc_scope); + if (new_semaphores == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->sync_fd_semaphores = new_semaphores; + } + + submit->sync_fd_semaphores[submit->sync_fd_semaphore_count++] = + anv_semaphore_ref(semaphore); + submit->need_out_fence = true; + + return VK_SUCCESS; +} + +static struct anv_queue_submit * +anv_queue_submit_alloc(struct anv_device *device) +{ + const VkAllocationCallbacks *alloc = &device->alloc; + VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND; + + struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope); + if (!submit) + return NULL; + + submit->alloc = alloc; + submit->alloc_scope = alloc_scope; + submit->in_fence = -1; + submit->out_fence = -1; + + return submit; +} + VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, struct anv_batch *batch) { struct anv_device *device = queue->device; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo *bo; - VkResult result = VK_SUCCESS; - uint32_t size; + struct anv_queue_submit *submit = anv_queue_submit_alloc(device); + if (!submit) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + bool has_syncobj_wait = device->instance->physicalDevice.has_syncobj_wait; + VkResult result; + uint32_t syncobj; + struct anv_bo *batch_bo, *sync_bo; + + if (has_syncobj_wait) { + syncobj = anv_gem_syncobj_create(device, 0); + if (!syncobj) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto err_free_submit; + } + + result = anv_queue_submit_add_syncobj(submit, device, syncobj, + I915_EXEC_FENCE_SIGNAL); + } else { + result = anv_device_alloc_bo(device, 4096, + ANV_BO_ALLOC_EXTERNAL | + ANV_BO_ALLOC_IMPLICIT_SYNC, + &sync_bo); + if (result != VK_SUCCESS) + goto err_free_submit; + + result = anv_queue_submit_add_fence_bo(submit, sync_bo, true /* signal */); + } + + if (result != VK_SUCCESS) + goto err_destroy_sync_primitive; if (batch) { - /* Kernel driver requires 8 byte aligned batch length */ - size = align_u32(batch->next - batch->start, 8); - result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &bo); + uint32_t size = align_u32(batch->next - batch->start, 8); + result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &batch_bo); if (result != VK_SUCCESS) - return result; + goto err_destroy_sync_primitive; - memcpy(bo->map, batch->start, size); + memcpy(batch_bo->map, batch->start, size); if (!device->info.has_llc) - gen_flush_range(bo->map, size); - } else { - size = device->trivial_batch_bo->size; - bo = device->trivial_batch_bo; + gen_flush_range(batch_bo->map, size); + + submit->simple_bo = batch_bo; + submit->simple_bo_size = size; + } + + result = _anv_queue_submit(queue, &submit); + + if (result == VK_SUCCESS) { + if (has_syncobj_wait) { + if (anv_gem_syncobj_wait(device, &syncobj, 1, + anv_get_absolute_timeout(INT64_MAX), true)) + result = anv_device_set_lost(device, "anv_gem_syncobj_wait failed: %m"); + anv_gem_syncobj_destroy(device, syncobj); + } else { + result = anv_device_wait(device, sync_bo, + anv_get_relative_timeout(INT64_MAX)); + anv_device_release_bo(device, sync_bo); + } + } + + if (batch) + anv_bo_pool_free(&device->batch_bo_pool, batch_bo); + + if (submit) + anv_queue_submit_free(device, submit); + + return result; + + err_destroy_sync_primitive: + if (has_syncobj_wait) + anv_gem_syncobj_destroy(device, syncobj); + else + anv_device_release_bo(device, sync_bo); + err_free_submit: + if (submit) + anv_queue_submit_free(device, submit); + + return result; +} + +/* Transfer ownership of temporary semaphores from the VkSemaphore object to + * the anv_queue_submit object. Those temporary semaphores are then freed in + * anv_queue_submit_free() once the driver is finished with them. + */ +static VkResult +maybe_transfer_temporary_semaphore(struct anv_queue_submit *submit, + struct anv_semaphore *semaphore, + struct anv_semaphore_impl **out_impl) +{ + struct anv_semaphore_impl *impl = &semaphore->temporary; + + if (impl->type == ANV_SEMAPHORE_TYPE_NONE) { + *out_impl = &semaphore->permanent; + return VK_SUCCESS; } - exec2_objects[0].handle = bo->gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo->offset; - exec2_objects[0].flags = bo->flags; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = 0; - execbuf.batch_len = size; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - gen_print_batch(&device->decoder_ctx, bo->map, - bo->size, bo->offset, false); + /* + * There is a requirement to reset semaphore to their permanent state after + * submission. From the Vulkan 1.0.53 spec: + * + * "If the import is temporary, the implementation must restore the + * semaphore to its prior permanent state after submitting the next + * semaphore wait operation." + * + * In the case we defer the actual submission to a thread because of the + * wait-before-submit behavior required for timeline semaphores, we need to + * make copies of the temporary syncobj to ensure they stay alive until we + * do the actual execbuffer ioctl. + */ + if (submit->temporary_semaphore_count >= submit->temporary_semaphore_array_length) { + uint32_t new_len = MAX2(submit->temporary_semaphore_array_length * 2, 8); + /* Make sure that if the realloc fails, we still have the old semaphore + * array around to properly clean things up on failure. + */ + struct anv_semaphore_impl *new_array = + vk_realloc(submit->alloc, + submit->temporary_semaphores, + new_len * sizeof(*submit->temporary_semaphores), + 8, submit->alloc_scope); + if (new_array == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->temporary_semaphores = new_array; + submit->temporary_semaphore_array_length = new_len; } - result = anv_queue_execbuf(queue, &execbuf, &bo); + /* Copy anv_semaphore_impl into anv_queue_submit. */ + submit->temporary_semaphores[submit->temporary_semaphore_count++] = *impl; + *out_impl = &submit->temporary_semaphores[submit->temporary_semaphore_count - 1]; + + /* Clear the incoming semaphore */ + impl->type = ANV_SEMAPHORE_TYPE_NONE; + + return VK_SUCCESS; +} + +static VkResult +anv_queue_submit(struct anv_queue *queue, + struct anv_cmd_buffer *cmd_buffer, + const VkSemaphore *in_semaphores, + uint32_t num_in_semaphores, + const VkSemaphore *out_semaphores, + uint32_t num_out_semaphores, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice; + struct anv_queue_submit *submit = anv_queue_submit_alloc(device); + if (!submit) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->cmd_buffer = cmd_buffer; + + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < num_in_semaphores; i++) { + ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); + struct anv_semaphore_impl *impl; + + result = maybe_transfer_temporary_semaphore(submit, semaphore, &impl); + if (result != VK_SUCCESS) + goto error; + + switch (impl->type) { + case ANV_SEMAPHORE_TYPE_BO: + assert(!pdevice->has_syncobj); + result = anv_queue_submit_add_fence_bo(submit, impl->bo, false /* signal */); + if (result != VK_SUCCESS) + goto error; + break; + + case ANV_SEMAPHORE_TYPE_SYNC_FILE: + assert(!pdevice->has_syncobj); + if (submit->in_fence == -1) { + submit->in_fence = impl->fd; + if (submit->in_fence == -1) { + result = vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); + goto error; + } + impl->fd = -1; + } else { + int merge = anv_gem_sync_file_merge(device, submit->in_fence, impl->fd); + if (merge == -1) { + result = vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); + goto error; + } + close(impl->fd); + close(submit->in_fence); + impl->fd = -1; + submit->in_fence = merge; + } + break; + + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: { + result = anv_queue_submit_add_syncobj(submit, device, + impl->syncobj, + I915_EXEC_FENCE_WAIT); + if (result != VK_SUCCESS) + goto error; + break; + } + + default: + break; + } + } + + for (uint32_t i = 0; i < num_out_semaphores; i++) { + ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]); + + /* Under most circumstances, out fences won't be temporary. However, + * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec: + * + * "If the import is temporary, the implementation must restore the + * semaphore to its prior permanent state after submitting the next + * semaphore wait operation." + * + * The spec says nothing whatsoever about signal operations on + * temporarily imported semaphores so it appears they are allowed. + * There are also CTS tests that require this to work. + */ + struct anv_semaphore_impl *impl = + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? + &semaphore->temporary : &semaphore->permanent; + + switch (impl->type) { + case ANV_SEMAPHORE_TYPE_BO: + assert(!pdevice->has_syncobj); + result = anv_queue_submit_add_fence_bo(submit, impl->bo, true /* signal */); + if (result != VK_SUCCESS) + goto error; + break; + + case ANV_SEMAPHORE_TYPE_SYNC_FILE: + assert(!pdevice->has_syncobj); + result = anv_queue_submit_add_sync_fd_fence(submit, semaphore); + if (result != VK_SUCCESS) + goto error; + break; + + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: { + result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj, + I915_EXEC_FENCE_SIGNAL); + if (result != VK_SUCCESS) + goto error; + break; + } + + default: + break; + } + } + + if (fence) { + /* Under most circumstances, out fences won't be temporary. However, + * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec: + * + * "If the import is temporary, the implementation must restore the + * semaphore to its prior permanent state after submitting the next + * semaphore wait operation." + * + * The spec says nothing whatsoever about signal operations on + * temporarily imported semaphores so it appears they are allowed. + * There are also CTS tests that require this to work. + */ + struct anv_fence_impl *impl = + fence->temporary.type != ANV_FENCE_TYPE_NONE ? + &fence->temporary : &fence->permanent; + + switch (impl->type) { + case ANV_FENCE_TYPE_BO: + result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */); + if (result != VK_SUCCESS) + goto error; + break; + + case ANV_FENCE_TYPE_SYNCOBJ: { + /* + * For the same reason we reset the signaled binary syncobj above, + * also reset the fence's syncobj so that they don't contain a + * signaled dma-fence. + */ + result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj, + I915_EXEC_FENCE_SIGNAL); + if (result != VK_SUCCESS) + goto error; + break; + } + + default: + unreachable("Invalid fence type"); + } + } + + result = _anv_queue_submit(queue, &submit); if (result != VK_SUCCESS) - goto fail; + goto error; - result = anv_device_wait(device, bo, INT64_MAX); + if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) { + /* BO fences can't be shared, so they can't be temporary. */ + assert(fence->temporary.type == ANV_FENCE_TYPE_NONE); - fail: - if (batch) - anv_bo_pool_free(&device->batch_bo_pool, bo); + /* Once the execbuf has returned, we need to set the fence state to + * SUBMITTED. We can't do this before calling execbuf because + * anv_GetFenceStatus does take the global device lock before checking + * fence->state. + * + * We set the fence state to SUBMITTED regardless of whether or not the + * execbuf succeeds because we need to ensure that vkWaitForFences() and + * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or + * VK_SUCCESS) in a finite amount of time even if execbuf fails. + */ + fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED; + } + + error: + if (submit) + anv_queue_submit_free(device, submit); return result; } @@ -188,7 +572,6 @@ VkResult anv_QueueSubmit( VkFence fence) { ANV_FROM_HANDLE(anv_queue, queue, _queue); - struct anv_device *device = queue->device; /* Query for device status prior to submitting. Technically, we don't need * to do this. However, if we have a client that's submitting piles of @@ -197,44 +580,17 @@ VkResult anv_QueueSubmit( * the kernel to kick us or we'll have to wait until the client waits on a * fence before we actually know whether or not we've hung. */ - VkResult result = anv_device_query_status(device); + VkResult result = anv_device_query_status(queue->device); if (result != VK_SUCCESS) return result; - /* We lock around QueueSubmit for three main reasons: - * - * 1) When a block pool is resized, we create a new gem handle with a - * different size and, in the case of surface states, possibly a - * different center offset but we re-use the same anv_bo struct when - * we do so. If this happens in the middle of setting up an execbuf, - * we could end up with our list of BOs out of sync with our list of - * gem handles. - * - * 2) The algorithm we use for building the list of unique buffers isn't - * thread-safe. While the client is supposed to syncronize around - * QueueSubmit, this would be extremely difficult to debug if it ever - * came up in the wild due to a broken app. It's better to play it - * safe and just lock around QueueSubmit. - * - * 3) The anv_cmd_buffer_execbuf function may perform relocations in - * userspace. Due to the fact that the surface state buffer is shared - * between batches, we can't afford to have that happen from multiple - * threads at the same time. Even though the user is supposed to - * ensure this doesn't happen, we play it safe as in (2) above. - * - * Since the only other things that ever take the device lock such as block - * pool resize only rarely happen, this will almost never be contended so - * taking a lock isn't really an expensive operation in this case. - */ - pthread_mutex_lock(&device->mutex); - if (fence && submitCount == 0) { /* If we don't have any command buffers, we need to submit a dummy * batch to give GEM something to wait on. We could, potentially, * come up with something more efficient but this shouldn't be a * common case. */ - result = anv_cmd_buffer_execbuf(queue, NULL, NULL, 0, NULL, 0, fence); + result = anv_queue_submit(queue, NULL, NULL, 0, NULL, 0, fence); goto out; } @@ -248,12 +604,12 @@ VkResult anv_QueueSubmit( * come up with something more efficient but this shouldn't be a * common case. */ - result = anv_cmd_buffer_execbuf(queue, NULL, - pSubmits[i].pWaitSemaphores, - pSubmits[i].waitSemaphoreCount, - pSubmits[i].pSignalSemaphores, - pSubmits[i].signalSemaphoreCount, - submit_fence); + result = anv_queue_submit(queue, NULL, + pSubmits[i].pWaitSemaphores, + pSubmits[i].waitSemaphoreCount, + pSubmits[i].pSignalSemaphores, + pSubmits[i].signalSemaphoreCount, + submit_fence); if (result != VK_SUCCESS) goto out; @@ -285,19 +641,17 @@ VkResult anv_QueueSubmit( num_out_semaphores = pSubmits[i].signalSemaphoreCount; } - result = anv_cmd_buffer_execbuf(queue, cmd_buffer, - in_semaphores, num_in_semaphores, - out_semaphores, num_out_semaphores, - execbuf_fence); + result = anv_queue_submit(queue, cmd_buffer, + in_semaphores, num_in_semaphores, + out_semaphores, num_out_semaphores, + execbuf_fence); if (result != VK_SUCCESS) goto out; } } - pthread_cond_broadcast(&device->queue_submit); - out: - if (result != VK_SUCCESS) { + if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) { /* In the case that something has gone wrong we may end up with an * inconsistent state from which it may not be trivial to recover. * For example, we might have computed address relocations and @@ -309,12 +663,14 @@ out: * anyway (such us being out of memory) and return * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to * submit the same job again to this device. + * + * We skip doing this on VK_ERROR_DEVICE_LOST because + * anv_device_set_lost() would have been called already by a callee of + * anv_queue_submit(). */ - result = anv_device_set_lost(device, "vkQueueSubmit() failed"); + result = anv_device_set_lost(queue->device, "vkQueueSubmit() failed"); } - pthread_mutex_unlock(&device->mutex); - return result; } diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 4ff85f7ae66..0a295cebb87 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -240,38 +240,17 @@ static VkResult wait_for_available(struct anv_device *device, struct anv_query_pool *pool, uint32_t query) { - while (true) { + uint64_t abs_timeout = anv_get_absolute_timeout(5 * NSEC_PER_SEC); + + while (anv_gettime_ns() < abs_timeout) { if (query_is_available(pool, query)) return VK_SUCCESS; - - int ret = anv_gem_busy(device, pool->bo->gem_handle); - if (ret == 1) { - /* The BO is still busy, keep waiting. */ - continue; - } else if (ret == -1) { - /* We don't know the real error. */ - return anv_device_set_lost(device, "gem wait failed: %m"); - } else { - assert(ret == 0); - /* The BO is no longer busy. */ - if (query_is_available(pool, query)) { - return VK_SUCCESS; - } else { - VkResult status = anv_device_query_status(device); - if (status != VK_SUCCESS) - return status; - - /* If we haven't seen availability yet, then we never will. This - * can only happen if we have a client error where they call - * GetQueryPoolResults on a query that they haven't submitted to - * the GPU yet. The spec allows us to do anything in this case, - * but returning VK_SUCCESS doesn't seem right and we shouldn't - * just keep spinning. - */ - return VK_NOT_READY; - } - } + VkResult status = anv_device_query_status(device); + if (status != VK_SUCCESS) + return status; } + + return anv_device_set_lost(device, "query timeout"); } VkResult genX(GetQueryPoolResults)( |