diff options
author | Lionel Landwerlin <[email protected]> | 2018-10-16 17:44:31 -0500 |
---|---|---|
committer | Lionel Landwerlin <[email protected]> | 2019-11-11 21:46:51 +0000 |
commit | 34f32a6d6648073e2fda3fb78377124fb32bb288 (patch) | |
tree | 685e65044fb17a2e8a57066b6ff3a92800058b8b /src/intel | |
parent | 5a4f15ef2c0e3aeb0f7782296a29b1d6c1cba911 (diff) |
anv: implement VK_KHR_timeline_semaphore
v2: Fix inverted condition in vkGetPhysicalDeviceExternalSemaphoreProperties()
v3: Add anv_timeline_* helpers (Jason)
v4: Avoid variable shadowing (Jason)
Split timeline wait/signal device operations (Jason/Lionel)
v5: s/point/signal_value/ (Jason)
Drop piece of drm-syncobj timeline code (Jason)
v6: Add missing sync_fd semaphore signaling (Jason)
Signed-off-by: Lionel Landwerlin <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/vulkan/anv_batch_chain.c | 56 | ||||
-rw-r--r-- | src/intel/vulkan/anv_device.c | 14 | ||||
-rw-r--r-- | src/intel/vulkan/anv_extensions.py | 1 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 50 | ||||
-rw-r--r-- | src/intel/vulkan/anv_queue.c | 685 |
5 files changed, 734 insertions, 72 deletions
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 297cb641f47..61ee1417b20 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1579,9 +1579,33 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device) return VK_SUCCESS; } +/* We lock around execbuf for three main reasons: + * + * 1) When a block pool is resized, we create a new gem handle with a + * different size and, in the case of surface states, possibly a different + * center offset but we re-use the same anv_bo struct when we do so. If + * this happens in the middle of setting up an execbuf, we could end up + * with our list of BOs out of sync with our list of gem handles. + * + * 2) The algorithm we use for building the list of unique buffers isn't + * thread-safe. While the client is supposed to syncronize around + * QueueSubmit, this would be extremely difficult to debug if it ever came + * up in the wild due to a broken app. It's better to play it safe and + * just lock around QueueSubmit. + * + * 3) The anv_cmd_buffer_execbuf function may perform relocations in + * userspace. Due to the fact that the surface state buffer is shared + * between batches, we can't afford to have that happen from multiple + * threads at the same time. Even though the user is supposed to ensure + * this doesn't happen, we play it safe as in (2) above. + * + * Since the only other things that ever take the device lock such as block + * pool resize only rarely happen, this will almost never be contended so + * taking a lock isn't really an expensive operation in this case. + */ VkResult -anv_queue_execbuf(struct anv_queue *queue, - struct anv_queue_submit *submit) +anv_queue_execbuf_locked(struct anv_queue *queue, + struct anv_queue_submit *submit) { struct anv_device *device = queue->device; struct anv_execbuf execbuf; @@ -1591,33 +1615,6 @@ anv_queue_execbuf(struct anv_queue *queue, VkResult result; - /* We lock around execbuf for three main reasons: - * - * 1) When a block pool is resized, we create a new gem handle with a - * different size and, in the case of surface states, possibly a - * different center offset but we re-use the same anv_bo struct when - * we do so. If this happens in the middle of setting up an execbuf, - * we could end up with our list of BOs out of sync with our list of - * gem handles. - * - * 2) The algorithm we use for building the list of unique buffers isn't - * thread-safe. While the client is supposed to syncronize around - * QueueSubmit, this would be extremely difficult to debug if it ever - * came up in the wild due to a broken app. It's better to play it - * safe and just lock around QueueSubmit. - * - * 3) The anv_cmd_buffer_execbuf function may perform relocations in - * userspace. Due to the fact that the surface state buffer is shared - * between batches, we can't afford to have that happen from multiple - * threads at the same time. Even though the user is supposed to - * ensure this doesn't happen, we play it safe as in (2) above. - * - * Since the only other things that ever take the device lock such as block - * pool resize only rarely happen, this will almost never be contended so - * taking a lock isn't really an expensive operation in this case. - */ - pthread_mutex_lock(&device->mutex); - for (uint32_t i = 0; i < submit->fence_bo_count; i++) { int signaled; struct anv_bo *bo = anv_unpack_ptr(submit->fence_bos[i], 1, &signaled); @@ -1705,7 +1702,6 @@ anv_queue_execbuf(struct anv_queue *queue, error: pthread_cond_broadcast(&device->queue_submit); - pthread_mutex_unlock(&queue->device->mutex); anv_execbuf_finish(&execbuf); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 61448d8dbdd..74cefbffbf1 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1245,6 +1245,13 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR: { + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features = + (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext; + features->timelineSemaphore = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext; features->variablePointersStorageBuffer = true; @@ -1797,6 +1804,13 @@ void anv_GetPhysicalDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR: { + VkPhysicalDeviceTimelineSemaphorePropertiesKHR *props = + (VkPhysicalDeviceTimelineSemaphorePropertiesKHR *) ext; + props->maxTimelineSemaphoreValueDifference = UINT64_MAX; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { VkPhysicalDeviceTransformFeedbackPropertiesEXT *props = (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index 1e0ab5f7142..2be33d4b694 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -119,6 +119,7 @@ EXTENSIONS = [ Extension('VK_KHR_surface', 25, 'ANV_HAS_SURFACE'), Extension('VK_KHR_surface_protected_capabilities', 1, 'ANV_HAS_SURFACE'), Extension('VK_KHR_swapchain', 70, 'ANV_HAS_SURFACE'), + Extension('VK_KHR_timeline_semaphore', 1, True), Extension('VK_KHR_uniform_buffer_standard_layout', 1, True), Extension('VK_KHR_variable_pointers', 1, True), Extension('VK_KHR_vulkan_memory_model', 3, True), diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3174e5ea236..b1586f4c32b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1080,6 +1080,17 @@ struct anv_queue_submit { uint32_t sync_fd_semaphore_count; uint32_t sync_fd_semaphore_array_length; + /* Allocated only with non shareable timelines. */ + struct anv_timeline ** wait_timelines; + uint32_t wait_timeline_count; + uint32_t wait_timeline_array_length; + uint64_t * wait_timeline_values; + + struct anv_timeline ** signal_timelines; + uint32_t signal_timeline_count; + uint32_t signal_timeline_array_length; + uint64_t * signal_timeline_values; + int in_fence; bool need_out_fence; int out_fence; @@ -1105,6 +1116,11 @@ struct anv_queue { struct anv_device * device; + /* + * A list of struct anv_queue_submit to be submitted to i915. + */ + struct list_head queued_submits; + VkDeviceQueueCreateFlags flags; }; @@ -1368,7 +1384,7 @@ VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo, VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue); void anv_queue_finish(struct anv_queue *queue); -VkResult anv_queue_execbuf(struct anv_queue *queue, struct anv_queue_submit *submit); +VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit); VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, struct anv_batch *batch); @@ -2828,6 +2844,32 @@ enum anv_semaphore_type { ANV_SEMAPHORE_TYPE_BO, ANV_SEMAPHORE_TYPE_SYNC_FILE, ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ, + ANV_SEMAPHORE_TYPE_TIMELINE, +}; + +struct anv_timeline_point { + struct list_head link; + + uint64_t serial; + + /* Number of waiter on this point, when > 0 the point should not be garbage + * collected. + */ + int waiting; + + /* BO used for synchronization. */ + struct anv_bo *bo; +}; + +struct anv_timeline { + pthread_mutex_t mutex; + pthread_cond_t cond; + + uint64_t highest_past; + uint64_t highest_pending; + + struct list_head points; + struct list_head free_points; }; struct anv_semaphore_impl { @@ -2852,6 +2894,12 @@ struct anv_semaphore_impl { * import so we don't need to bother with a userspace cache. */ uint32_t syncobj; + + /* Non shareable timeline semaphore + * + * Used when kernel don't have support for timeline semaphores. + */ + struct anv_timeline timeline; }; }; diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index dc476470b75..3dedbcade46 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -25,6 +25,7 @@ * This file implements VkQueue, VkFence, and VkSemaphore */ +#include <errno.h> #include <fcntl.h> #include <unistd.h> @@ -98,17 +99,191 @@ anv_queue_submit_free(struct anv_device *device, close(submit->out_fence); vk_free(alloc, submit->fences); vk_free(alloc, submit->temporary_semaphores); + vk_free(alloc, submit->wait_timelines); + vk_free(alloc, submit->wait_timeline_values); + vk_free(alloc, submit->signal_timelines); + vk_free(alloc, submit->signal_timeline_values); vk_free(alloc, submit->fence_bos); vk_free(alloc, submit); } +static bool +anv_queue_submit_ready_locked(struct anv_queue_submit *submit) +{ + for (uint32_t i = 0; i < submit->wait_timeline_count; i++) { + if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending) + return false; + } + + return true; +} + static VkResult -_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit) +anv_timeline_init(struct anv_device *device, + struct anv_timeline *timeline, + uint64_t initial_value) { - struct anv_queue_submit *submit = *_submit; - VkResult result = anv_queue_execbuf(queue, submit); + timeline->highest_past = + timeline->highest_pending = initial_value; + list_inithead(&timeline->points); + list_inithead(&timeline->free_points); + + return VK_SUCCESS; +} + +static void +anv_timeline_finish(struct anv_device *device, + struct anv_timeline *timeline) +{ + list_for_each_entry_safe(struct anv_timeline_point, point, + &timeline->free_points, link) { + list_del(&point->link); + anv_device_release_bo(device, point->bo); + vk_free(&device->alloc, point); + } + list_for_each_entry_safe(struct anv_timeline_point, point, + &timeline->points, link) { + list_del(&point->link); + anv_device_release_bo(device, point->bo); + vk_free(&device->alloc, point); + } +} + +static VkResult +anv_timeline_add_point_locked(struct anv_device *device, + struct anv_timeline *timeline, + uint64_t value, + struct anv_timeline_point **point) +{ + VkResult result = VK_SUCCESS; + + if (list_is_empty(&timeline->free_points)) { + *point = + vk_zalloc(&device->alloc, sizeof(**point), + 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!(*point)) + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (result == VK_SUCCESS) { + result = anv_device_alloc_bo(device, 4096, + ANV_BO_ALLOC_EXTERNAL | + ANV_BO_ALLOC_IMPLICIT_SYNC, + &(*point)->bo); + if (result != VK_SUCCESS) + vk_free(&device->alloc, *point); + } + } else { + *point = list_first_entry(&timeline->free_points, + struct anv_timeline_point, link); + list_del(&(*point)->link); + } + + if (result == VK_SUCCESS) { + (*point)->serial = value; + list_addtail(&(*point)->link, &timeline->points); + } + + return result; +} + +static VkResult +anv_timeline_gc_locked(struct anv_device *device, + struct anv_timeline *timeline) +{ + list_for_each_entry_safe(struct anv_timeline_point, point, + &timeline->points, link) { + /* timeline->higest_pending is only incremented once submission has + * happened. If this point has a greater serial, it means the point + * hasn't been submitted yet. + */ + if (point->serial > timeline->highest_pending) + return VK_SUCCESS; + + /* If someone is waiting on this time point, consider it busy and don't + * try to recycle it. There's a slim possibility that it's no longer + * busy by the time we look at it but we would be recycling it out from + * under a waiter and that can lead to weird races. + * + * We walk the list in-order so if this time point is still busy so is + * every following time point + */ + assert(point->waiting >= 0); + if (point->waiting) + return VK_SUCCESS; + + /* Garbage collect any signaled point. */ + VkResult result = anv_device_bo_busy(device, point->bo); + if (result == VK_NOT_READY) { + /* We walk the list in-order so if this time point is still busy so + * is every following time point + */ + return VK_SUCCESS; + } else if (result != VK_SUCCESS) { + return result; + } + + assert(timeline->highest_past < point->serial); + timeline->highest_past = point->serial; + + list_del(&point->link); + list_add(&point->link, &timeline->free_points); + } + + return VK_SUCCESS; +} + +static VkResult anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit, + struct anv_bo *bo, + bool signal); + +static VkResult +anv_queue_submit_timeline_locked(struct anv_queue *queue, + struct anv_queue_submit *submit) +{ + VkResult result; + + for (uint32_t i = 0; i < submit->wait_timeline_count; i++) { + struct anv_timeline *timeline = submit->wait_timelines[i]; + uint64_t wait_value = submit->wait_timeline_values[i]; + + if (timeline->highest_past >= wait_value) + continue; + + list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) { + if (point->serial < wait_value) + continue; + result = anv_queue_submit_add_fence_bo(submit, point->bo, false); + if (result != VK_SUCCESS) + return result; + break; + } + } + for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { + struct anv_timeline *timeline = submit->signal_timelines[i]; + uint64_t signal_value = submit->signal_timeline_values[i]; + struct anv_timeline_point *point; + + result = anv_timeline_add_point_locked(queue->device, timeline, + signal_value, &point); + if (result != VK_SUCCESS) + return result; + + result = anv_queue_submit_add_fence_bo(submit, point->bo, true); + if (result != VK_SUCCESS) + return result; + } + + result = anv_queue_execbuf_locked(queue, submit); if (result == VK_SUCCESS) { + /* Update the pending values in the timeline objects. */ + for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { + struct anv_timeline *timeline = submit->signal_timelines[i]; + uint64_t signal_value = submit->signal_timeline_values[i]; + + assert(signal_value > timeline->highest_pending); + timeline->highest_pending = signal_value; + } + /* Update signaled semaphores backed by syncfd. */ for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) { struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i]; @@ -121,11 +296,74 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit) assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE); impl->fd = dup(submit->out_fence); } + } else { + /* Unblock any waiter by signaling the points, the application will get + * a device lost error code. + */ + for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { + struct anv_timeline *timeline = submit->signal_timelines[i]; + uint64_t signal_value = submit->signal_timeline_values[i]; + + assert(signal_value > timeline->highest_pending); + timeline->highest_past = timeline->highest_pending = signal_value; + } } return result; } +static VkResult +anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance) +{ + VkResult result = VK_SUCCESS; + + /* Go through all the queued submissions and submit then until we find one + * that's waiting on a point that hasn't materialized yet. + */ + list_for_each_entry_safe(struct anv_queue_submit, submit, + &queue->queued_submits, link) { + if (!anv_queue_submit_ready_locked(submit)) + break; + + (*advance)++; + list_del(&submit->link); + + result = anv_queue_submit_timeline_locked(queue, submit); + + anv_queue_submit_free(queue->device, submit); + + if (result != VK_SUCCESS) + break; + } + + return result; +} + +static VkResult +anv_device_submit_deferred_locked(struct anv_device *device) +{ + uint32_t advance = 0; + return anv_queue_submit_deferred_locked(&device->queue, &advance); +} + +static VkResult +_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit) +{ + struct anv_queue_submit *submit = *_submit; + + /* Wait before signal behavior means we might keep alive the + * anv_queue_submit object a bit longer, so transfer the ownership to the + * anv_queue. + */ + *_submit = NULL; + + pthread_mutex_lock(&queue->device->mutex); + list_addtail(&submit->link, &queue->queued_submits); + VkResult result = anv_device_submit_deferred_locked(queue->device); + pthread_mutex_unlock(&queue->device->mutex); + return result; +} + VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue) { @@ -133,6 +371,8 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue) queue->device = device; queue->flags = 0; + list_inithead(&queue->queued_submits); + return VK_SUCCESS; } @@ -218,11 +458,81 @@ anv_queue_submit_add_sync_fd_fence(struct anv_queue_submit *submit, return VK_SUCCESS; } +static VkResult +anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit, + struct anv_device *device, + struct anv_timeline *timeline, + uint64_t value) +{ + if (submit->wait_timeline_count >= submit->wait_timeline_array_length) { + uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64); + + submit->wait_timelines = + vk_realloc(submit->alloc, + submit->wait_timelines, new_len * sizeof(*submit->wait_timelines), + 8, submit->alloc_scope); + if (submit->wait_timelines == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->wait_timeline_values = + vk_realloc(submit->alloc, + submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values), + 8, submit->alloc_scope); + if (submit->wait_timeline_values == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->wait_timeline_array_length = new_len; + } + + submit->wait_timelines[submit->wait_timeline_count] = timeline; + submit->wait_timeline_values[submit->wait_timeline_count] = value; + + submit->wait_timeline_count++; + + return VK_SUCCESS; +} + +static VkResult +anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit, + struct anv_device *device, + struct anv_timeline *timeline, + uint64_t value) +{ + assert(timeline->highest_pending < value); + + if (submit->signal_timeline_count >= submit->signal_timeline_array_length) { + uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64); + + submit->signal_timelines = + vk_realloc(submit->alloc, + submit->signal_timelines, new_len * sizeof(*submit->signal_timelines), + 8, submit->alloc_scope); + if (submit->signal_timelines == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->signal_timeline_values = + vk_realloc(submit->alloc, + submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values), + 8, submit->alloc_scope); + if (submit->signal_timeline_values == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + submit->signal_timeline_array_length = new_len; + } + + submit->signal_timelines[submit->signal_timeline_count] = timeline; + submit->signal_timeline_values[submit->signal_timeline_count] = value; + + submit->signal_timeline_count++; + + return VK_SUCCESS; +} + static struct anv_queue_submit * anv_queue_submit_alloc(struct anv_device *device) { const VkAllocationCallbacks *alloc = &device->alloc; - VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND; + VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope); if (!submit) @@ -338,6 +648,9 @@ maybe_transfer_temporary_semaphore(struct anv_queue_submit *submit, return VK_SUCCESS; } + /* BO backed timeline semaphores cannot be temporary. */ + assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE); + /* * There is a requirement to reset semaphore to their permanent state after * submission. From the Vulkan 1.0.53 spec: @@ -447,6 +760,14 @@ anv_queue_submit(struct anv_queue *queue, break; } + case ANV_SEMAPHORE_TYPE_TIMELINE: + result = anv_queue_submit_add_timeline_wait(submit, device, + &impl->timeline, + in_values ? in_values[i] : 0); + if (result != VK_SUCCESS) + goto error; + break; + default: break; } @@ -493,6 +814,14 @@ anv_queue_submit(struct anv_queue *queue, break; } + case ANV_SEMAPHORE_TYPE_TIMELINE: + result = anv_queue_submit_add_timeline_signal(submit, device, + &impl->timeline, + out_values ? out_values[i] : 0); + if (result != VK_SUCCESS) + goto error; + break; + default: break; } @@ -1309,6 +1638,56 @@ VkResult anv_GetFenceFdKHR( // Queue semaphore functions +static VkSemaphoreTypeKHR +get_semaphore_type(const void *pNext, uint64_t *initial_value) +{ + const VkSemaphoreTypeCreateInfoKHR *type_info = + vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR); + + if (!type_info) + return VK_SEMAPHORE_TYPE_BINARY_KHR; + + if (initial_value) + *initial_value = type_info->initialValue; + return type_info->semaphoreType; +} + +static VkResult +binary_semaphore_create(struct anv_device *device, + struct anv_semaphore_impl *impl, + bool exportable) +{ + if (device->instance->physicalDevice.has_syncobj) { + impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ; + impl->syncobj = anv_gem_syncobj_create(device, 0); + if (!impl->syncobj) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return VK_SUCCESS; + } else { + impl->type = ANV_SEMAPHORE_TYPE_BO; + VkResult result = + anv_device_alloc_bo(device, 4096, + ANV_BO_ALLOC_EXTERNAL | + ANV_BO_ALLOC_IMPLICIT_SYNC, + &impl->bo); + /* If we're going to use this as a fence, we need to *not* have the + * EXEC_OBJECT_ASYNC bit set. + */ + assert(!(impl->bo->flags & EXEC_OBJECT_ASYNC)); + return result; + } +} + +static VkResult +timeline_semaphore_create(struct anv_device *device, + struct anv_semaphore_impl *impl, + uint64_t initial_value) +{ + impl->type = ANV_SEMAPHORE_TYPE_TIMELINE; + anv_timeline_init(device, &impl->timeline, initial_value); + return VK_SUCCESS; +} + VkResult anv_CreateSemaphore( VkDevice _device, const VkSemaphoreCreateInfo* pCreateInfo, @@ -1320,6 +1699,9 @@ VkResult anv_CreateSemaphore( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO); + uint64_t timeline_value = 0; + VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value); + semaphore = vk_alloc(&device->alloc, sizeof(*semaphore), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (semaphore == NULL) @@ -1331,15 +1713,28 @@ VkResult anv_CreateSemaphore( vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); VkExternalSemaphoreHandleTypeFlags handleTypes = export ? export->handleTypes : 0; + VkResult result; if (handleTypes == 0) { - /* The DRM execbuffer ioctl always execute in-oder so long as you stay - * on the same ring. Since we don't expose the blit engine as a DMA - * queue, a dummy no-op semaphore is a perfectly valid implementation. - */ - semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DUMMY; + if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR) + result = binary_semaphore_create(device, &semaphore->permanent, false); + else + result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value); + if (result != VK_SUCCESS) { + vk_free2(&device->alloc, pAllocator, semaphore); + return result; + } } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); + assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR); + result = binary_semaphore_create(device, &semaphore->permanent, true); + if (result != VK_SUCCESS) { + vk_free2(&device->alloc, pAllocator, semaphore); + return result; + } + } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { + assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT); + assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR); if (device->instance->physicalDevice.has_syncobj) { semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ; semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0); @@ -1348,27 +1743,6 @@ VkResult anv_CreateSemaphore( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } } else { - semaphore->permanent.type = ANV_SEMAPHORE_TYPE_BO; - VkResult result = anv_device_alloc_bo(device, 4096, - ANV_BO_ALLOC_EXTERNAL | - ANV_BO_ALLOC_IMPLICIT_SYNC, - &semaphore->permanent.bo); - if (result != VK_SUCCESS) { - vk_free2(&device->alloc, pAllocator, semaphore); - return result; - } - - /* If we're going to use this as a fence, we need to *not* have the - * EXEC_OBJECT_ASYNC bit set. - */ - assert(!(semaphore->permanent.bo->flags & EXEC_OBJECT_ASYNC)); - } - } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { - assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT); - if (device->instance->physicalDevice.has_syncobj) { - semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ; - semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0); - } else { semaphore->permanent.type = ANV_SEMAPHORE_TYPE_SYNC_FILE; semaphore->permanent.fd = -1; } @@ -1403,6 +1777,10 @@ anv_semaphore_impl_cleanup(struct anv_device *device, close(impl->fd); break; + case ANV_SEMAPHORE_TYPE_TIMELINE: + anv_timeline_finish(device, &impl->timeline); + break; + case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: anv_gem_syncobj_destroy(device, impl->syncobj); break; @@ -1464,8 +1842,14 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties( { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + VkSemaphoreTypeKHR sem_type = + get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL); + switch (pExternalSemaphoreInfo->handleType) { case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: + /* Timeline semaphores are not exportable. */ + if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR) + break; pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; pExternalSemaphoreProperties->compatibleHandleTypes = @@ -1476,17 +1860,18 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties( return; case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: - if (device->has_exec_fence) { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = - VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; - pExternalSemaphoreProperties->compatibleHandleTypes = - VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; - pExternalSemaphoreProperties->externalSemaphoreFeatures = - VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | - VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; - return; - } - break; + if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR) + break; + if (!device->has_exec_fence) + break; + pExternalSemaphoreProperties->exportFromImportedHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->compatibleHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->externalSemaphoreFeatures = + VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; + return; default: break; @@ -1684,3 +2069,221 @@ VkResult anv_GetSemaphoreFdKHR( return VK_SUCCESS; } + +VkResult anv_GetSemaphoreCounterValueKHR( + VkDevice _device, + VkSemaphore _semaphore, + uint64_t* pValue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore); + + struct anv_semaphore_impl *impl = + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? + &semaphore->temporary : &semaphore->permanent; + + switch (impl->type) { + case ANV_SEMAPHORE_TYPE_TIMELINE: { + pthread_mutex_lock(&device->mutex); + *pValue = impl->timeline.highest_past; + pthread_mutex_unlock(&device->mutex); + return VK_SUCCESS; + } + + default: + unreachable("Invalid semaphore type"); + } +} + +static VkResult +anv_timeline_wait_locked(struct anv_device *device, + struct anv_timeline *timeline, + uint64_t serial, uint64_t abs_timeout_ns) +{ + /* Wait on the queue_submit condition variable until the timeline has a + * time point pending that's at least as high as serial. + */ + while (timeline->highest_pending < serial) { + struct timespec abstime = { + .tv_sec = abs_timeout_ns / NSEC_PER_SEC, + .tv_nsec = abs_timeout_ns % NSEC_PER_SEC, + }; + + int ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); + assert(ret != EINVAL); + if (anv_gettime_ns() >= abs_timeout_ns && + timeline->highest_pending < serial) + return VK_TIMEOUT; + } + + while (1) { + VkResult result = anv_timeline_gc_locked(device, timeline); + if (result != VK_SUCCESS) + return result; + + if (timeline->highest_past >= serial) + return VK_SUCCESS; + + /* If we got here, our earliest time point has a busy BO */ + struct anv_timeline_point *point = + list_first_entry(&timeline->points, + struct anv_timeline_point, link); + + /* Drop the lock while we wait. */ + point->waiting++; + pthread_mutex_unlock(&device->mutex); + + result = anv_device_wait(device, point->bo, + anv_get_relative_timeout(abs_timeout_ns)); + + /* Pick the mutex back up */ + pthread_mutex_lock(&device->mutex); + point->waiting--; + + /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */ + if (result != VK_SUCCESS) + return result; + } +} + +static VkResult +anv_timelines_wait(struct anv_device *device, + struct anv_timeline **timelines, + const uint64_t *serials, + uint32_t n_timelines, + bool wait_all, + uint64_t abs_timeout_ns) +{ + if (!wait_all && n_timelines > 1) { + while (1) { + VkResult result; + pthread_mutex_lock(&device->mutex); + for (uint32_t i = 0; i < n_timelines; i++) { + result = + anv_timeline_wait_locked(device, timelines[i], serials[i], 0); + if (result != VK_TIMEOUT) + break; + } + + if (result != VK_TIMEOUT || + anv_gettime_ns() >= abs_timeout_ns) { + pthread_mutex_unlock(&device->mutex); + return result; + } + + /* If none of them are ready do a short wait so we don't completely + * spin while holding the lock. The 10us is completely arbitrary. + */ + uint64_t abs_short_wait_ns = + anv_get_absolute_timeout( + MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000)); + struct timespec abstime = { + .tv_sec = abs_short_wait_ns / NSEC_PER_SEC, + .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC, + }; + ASSERTED int ret; + ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); + assert(ret != EINVAL); + } + } else { + VkResult result = VK_SUCCESS; + pthread_mutex_lock(&device->mutex); + for (uint32_t i = 0; i < n_timelines; i++) { + result = + anv_timeline_wait_locked(device, timelines[i], + serials[i], abs_timeout_ns); + if (result != VK_SUCCESS) + break; + } + pthread_mutex_unlock(&device->mutex); + return result; + } +} + +VkResult anv_WaitSemaphoresKHR( + VkDevice _device, + const VkSemaphoreWaitInfoKHR* pWaitInfo, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + struct anv_timeline **timelines = + vk_alloc(&device->alloc, + pWaitInfo->semaphoreCount * sizeof(*timelines), + 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!timelines) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint64_t *values = vk_alloc(&device->alloc, + pWaitInfo->semaphoreCount * sizeof(*values), + 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!values) { + vk_free(&device->alloc, timelines); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t handle_count = 0; + for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) { + ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]); + struct anv_semaphore_impl *impl = + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? + &semaphore->temporary : &semaphore->permanent; + + assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE); + + if (pWaitInfo->pValues[i] == 0) + continue; + + timelines[handle_count] = &impl->timeline; + values[handle_count] = pWaitInfo->pValues[i]; + handle_count++; + } + + VkResult result = VK_SUCCESS; + if (handle_count > 0) { + result = anv_timelines_wait(device, timelines, values, handle_count, + !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR), + timeout); + } + + vk_free(&device->alloc, timelines); + vk_free(&device->alloc, values); + + return result; +} + +VkResult anv_SignalSemaphoreKHR( + VkDevice _device, + const VkSemaphoreSignalInfoKHR* pSignalInfo) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore); + + struct anv_semaphore_impl *impl = + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? + &semaphore->temporary : &semaphore->permanent; + + switch (impl->type) { + case ANV_SEMAPHORE_TYPE_TIMELINE: { + pthread_mutex_lock(&device->mutex); + + VkResult result = anv_timeline_gc_locked(device, &impl->timeline); + + assert(pSignalInfo->value > impl->timeline.highest_pending); + + impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value; + + if (result == VK_SUCCESS) + result = anv_device_submit_deferred_locked(device); + + pthread_cond_broadcast(&device->queue_submit); + pthread_mutex_unlock(&device->mutex); + return result; + } + + default: + unreachable("Invalid semaphore type"); + } +} |