summaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorLionel Landwerlin <[email protected]>2018-10-16 17:44:31 -0500
committerLionel Landwerlin <[email protected]>2019-11-11 21:46:51 +0000
commit34f32a6d6648073e2fda3fb78377124fb32bb288 (patch)
tree685e65044fb17a2e8a57066b6ff3a92800058b8b /src/intel
parent5a4f15ef2c0e3aeb0f7782296a29b1d6c1cba911 (diff)
anv: implement VK_KHR_timeline_semaphore
v2: Fix inverted condition in vkGetPhysicalDeviceExternalSemaphoreProperties() v3: Add anv_timeline_* helpers (Jason) v4: Avoid variable shadowing (Jason) Split timeline wait/signal device operations (Jason/Lionel) v5: s/point/signal_value/ (Jason) Drop piece of drm-syncobj timeline code (Jason) v6: Add missing sync_fd semaphore signaling (Jason) Signed-off-by: Lionel Landwerlin <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/vulkan/anv_batch_chain.c56
-rw-r--r--src/intel/vulkan/anv_device.c14
-rw-r--r--src/intel/vulkan/anv_extensions.py1
-rw-r--r--src/intel/vulkan/anv_private.h50
-rw-r--r--src/intel/vulkan/anv_queue.c685
5 files changed, 734 insertions, 72 deletions
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 297cb641f47..61ee1417b20 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1579,9 +1579,33 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
return VK_SUCCESS;
}
+/* We lock around execbuf for three main reasons:
+ *
+ * 1) When a block pool is resized, we create a new gem handle with a
+ * different size and, in the case of surface states, possibly a different
+ * center offset but we re-use the same anv_bo struct when we do so. If
+ * this happens in the middle of setting up an execbuf, we could end up
+ * with our list of BOs out of sync with our list of gem handles.
+ *
+ * 2) The algorithm we use for building the list of unique buffers isn't
+ * thread-safe. While the client is supposed to syncronize around
+ * QueueSubmit, this would be extremely difficult to debug if it ever came
+ * up in the wild due to a broken app. It's better to play it safe and
+ * just lock around QueueSubmit.
+ *
+ * 3) The anv_cmd_buffer_execbuf function may perform relocations in
+ * userspace. Due to the fact that the surface state buffer is shared
+ * between batches, we can't afford to have that happen from multiple
+ * threads at the same time. Even though the user is supposed to ensure
+ * this doesn't happen, we play it safe as in (2) above.
+ *
+ * Since the only other things that ever take the device lock such as block
+ * pool resize only rarely happen, this will almost never be contended so
+ * taking a lock isn't really an expensive operation in this case.
+ */
VkResult
-anv_queue_execbuf(struct anv_queue *queue,
- struct anv_queue_submit *submit)
+anv_queue_execbuf_locked(struct anv_queue *queue,
+ struct anv_queue_submit *submit)
{
struct anv_device *device = queue->device;
struct anv_execbuf execbuf;
@@ -1591,33 +1615,6 @@ anv_queue_execbuf(struct anv_queue *queue,
VkResult result;
- /* We lock around execbuf for three main reasons:
- *
- * 1) When a block pool is resized, we create a new gem handle with a
- * different size and, in the case of surface states, possibly a
- * different center offset but we re-use the same anv_bo struct when
- * we do so. If this happens in the middle of setting up an execbuf,
- * we could end up with our list of BOs out of sync with our list of
- * gem handles.
- *
- * 2) The algorithm we use for building the list of unique buffers isn't
- * thread-safe. While the client is supposed to syncronize around
- * QueueSubmit, this would be extremely difficult to debug if it ever
- * came up in the wild due to a broken app. It's better to play it
- * safe and just lock around QueueSubmit.
- *
- * 3) The anv_cmd_buffer_execbuf function may perform relocations in
- * userspace. Due to the fact that the surface state buffer is shared
- * between batches, we can't afford to have that happen from multiple
- * threads at the same time. Even though the user is supposed to
- * ensure this doesn't happen, we play it safe as in (2) above.
- *
- * Since the only other things that ever take the device lock such as block
- * pool resize only rarely happen, this will almost never be contended so
- * taking a lock isn't really an expensive operation in this case.
- */
- pthread_mutex_lock(&device->mutex);
-
for (uint32_t i = 0; i < submit->fence_bo_count; i++) {
int signaled;
struct anv_bo *bo = anv_unpack_ptr(submit->fence_bos[i], 1, &signaled);
@@ -1705,7 +1702,6 @@ anv_queue_execbuf(struct anv_queue *queue,
error:
pthread_cond_broadcast(&device->queue_submit);
- pthread_mutex_unlock(&queue->device->mutex);
anv_execbuf_finish(&execbuf);
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 61448d8dbdd..74cefbffbf1 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1245,6 +1245,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR: {
+ VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
+ (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
+ features->timelineSemaphore = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
features->variablePointersStorageBuffer = true;
@@ -1797,6 +1804,13 @@ void anv_GetPhysicalDeviceProperties2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR: {
+ VkPhysicalDeviceTimelineSemaphorePropertiesKHR *props =
+ (VkPhysicalDeviceTimelineSemaphorePropertiesKHR *) ext;
+ props->maxTimelineSemaphoreValueDifference = UINT64_MAX;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index 1e0ab5f7142..2be33d4b694 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -119,6 +119,7 @@ EXTENSIONS = [
Extension('VK_KHR_surface', 25, 'ANV_HAS_SURFACE'),
Extension('VK_KHR_surface_protected_capabilities', 1, 'ANV_HAS_SURFACE'),
Extension('VK_KHR_swapchain', 70, 'ANV_HAS_SURFACE'),
+ Extension('VK_KHR_timeline_semaphore', 1, True),
Extension('VK_KHR_uniform_buffer_standard_layout', 1, True),
Extension('VK_KHR_variable_pointers', 1, True),
Extension('VK_KHR_vulkan_memory_model', 3, True),
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 3174e5ea236..b1586f4c32b 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1080,6 +1080,17 @@ struct anv_queue_submit {
uint32_t sync_fd_semaphore_count;
uint32_t sync_fd_semaphore_array_length;
+ /* Allocated only with non shareable timelines. */
+ struct anv_timeline ** wait_timelines;
+ uint32_t wait_timeline_count;
+ uint32_t wait_timeline_array_length;
+ uint64_t * wait_timeline_values;
+
+ struct anv_timeline ** signal_timelines;
+ uint32_t signal_timeline_count;
+ uint32_t signal_timeline_array_length;
+ uint64_t * signal_timeline_values;
+
int in_fence;
bool need_out_fence;
int out_fence;
@@ -1105,6 +1116,11 @@ struct anv_queue {
struct anv_device * device;
+ /*
+ * A list of struct anv_queue_submit to be submitted to i915.
+ */
+ struct list_head queued_submits;
+
VkDeviceQueueCreateFlags flags;
};
@@ -1368,7 +1384,7 @@ VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue);
void anv_queue_finish(struct anv_queue *queue);
-VkResult anv_queue_execbuf(struct anv_queue *queue, struct anv_queue_submit *submit);
+VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
struct anv_batch *batch);
@@ -2828,6 +2844,32 @@ enum anv_semaphore_type {
ANV_SEMAPHORE_TYPE_BO,
ANV_SEMAPHORE_TYPE_SYNC_FILE,
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
+ ANV_SEMAPHORE_TYPE_TIMELINE,
+};
+
+struct anv_timeline_point {
+ struct list_head link;
+
+ uint64_t serial;
+
+ /* Number of waiter on this point, when > 0 the point should not be garbage
+ * collected.
+ */
+ int waiting;
+
+ /* BO used for synchronization. */
+ struct anv_bo *bo;
+};
+
+struct anv_timeline {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ uint64_t highest_past;
+ uint64_t highest_pending;
+
+ struct list_head points;
+ struct list_head free_points;
};
struct anv_semaphore_impl {
@@ -2852,6 +2894,12 @@ struct anv_semaphore_impl {
* import so we don't need to bother with a userspace cache.
*/
uint32_t syncobj;
+
+ /* Non shareable timeline semaphore
+ *
+ * Used when kernel don't have support for timeline semaphores.
+ */
+ struct anv_timeline timeline;
};
};
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index dc476470b75..3dedbcade46 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -25,6 +25,7 @@
* This file implements VkQueue, VkFence, and VkSemaphore
*/
+#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
@@ -98,17 +99,191 @@ anv_queue_submit_free(struct anv_device *device,
close(submit->out_fence);
vk_free(alloc, submit->fences);
vk_free(alloc, submit->temporary_semaphores);
+ vk_free(alloc, submit->wait_timelines);
+ vk_free(alloc, submit->wait_timeline_values);
+ vk_free(alloc, submit->signal_timelines);
+ vk_free(alloc, submit->signal_timeline_values);
vk_free(alloc, submit->fence_bos);
vk_free(alloc, submit);
}
+static bool
+anv_queue_submit_ready_locked(struct anv_queue_submit *submit)
+{
+ for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
+ if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending)
+ return false;
+ }
+
+ return true;
+}
+
static VkResult
-_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit)
+anv_timeline_init(struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t initial_value)
{
- struct anv_queue_submit *submit = *_submit;
- VkResult result = anv_queue_execbuf(queue, submit);
+ timeline->highest_past =
+ timeline->highest_pending = initial_value;
+ list_inithead(&timeline->points);
+ list_inithead(&timeline->free_points);
+
+ return VK_SUCCESS;
+}
+
+static void
+anv_timeline_finish(struct anv_device *device,
+ struct anv_timeline *timeline)
+{
+ list_for_each_entry_safe(struct anv_timeline_point, point,
+ &timeline->free_points, link) {
+ list_del(&point->link);
+ anv_device_release_bo(device, point->bo);
+ vk_free(&device->alloc, point);
+ }
+ list_for_each_entry_safe(struct anv_timeline_point, point,
+ &timeline->points, link) {
+ list_del(&point->link);
+ anv_device_release_bo(device, point->bo);
+ vk_free(&device->alloc, point);
+ }
+}
+
+static VkResult
+anv_timeline_add_point_locked(struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t value,
+ struct anv_timeline_point **point)
+{
+ VkResult result = VK_SUCCESS;
+
+ if (list_is_empty(&timeline->free_points)) {
+ *point =
+ vk_zalloc(&device->alloc, sizeof(**point),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!(*point))
+ result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ if (result == VK_SUCCESS) {
+ result = anv_device_alloc_bo(device, 4096,
+ ANV_BO_ALLOC_EXTERNAL |
+ ANV_BO_ALLOC_IMPLICIT_SYNC,
+ &(*point)->bo);
+ if (result != VK_SUCCESS)
+ vk_free(&device->alloc, *point);
+ }
+ } else {
+ *point = list_first_entry(&timeline->free_points,
+ struct anv_timeline_point, link);
+ list_del(&(*point)->link);
+ }
+
+ if (result == VK_SUCCESS) {
+ (*point)->serial = value;
+ list_addtail(&(*point)->link, &timeline->points);
+ }
+
+ return result;
+}
+
+static VkResult
+anv_timeline_gc_locked(struct anv_device *device,
+ struct anv_timeline *timeline)
+{
+ list_for_each_entry_safe(struct anv_timeline_point, point,
+ &timeline->points, link) {
+ /* timeline->higest_pending is only incremented once submission has
+ * happened. If this point has a greater serial, it means the point
+ * hasn't been submitted yet.
+ */
+ if (point->serial > timeline->highest_pending)
+ return VK_SUCCESS;
+
+ /* If someone is waiting on this time point, consider it busy and don't
+ * try to recycle it. There's a slim possibility that it's no longer
+ * busy by the time we look at it but we would be recycling it out from
+ * under a waiter and that can lead to weird races.
+ *
+ * We walk the list in-order so if this time point is still busy so is
+ * every following time point
+ */
+ assert(point->waiting >= 0);
+ if (point->waiting)
+ return VK_SUCCESS;
+
+ /* Garbage collect any signaled point. */
+ VkResult result = anv_device_bo_busy(device, point->bo);
+ if (result == VK_NOT_READY) {
+ /* We walk the list in-order so if this time point is still busy so
+ * is every following time point
+ */
+ return VK_SUCCESS;
+ } else if (result != VK_SUCCESS) {
+ return result;
+ }
+
+ assert(timeline->highest_past < point->serial);
+ timeline->highest_past = point->serial;
+
+ list_del(&point->link);
+ list_add(&point->link, &timeline->free_points);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
+ struct anv_bo *bo,
+ bool signal);
+
+static VkResult
+anv_queue_submit_timeline_locked(struct anv_queue *queue,
+ struct anv_queue_submit *submit)
+{
+ VkResult result;
+
+ for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->wait_timelines[i];
+ uint64_t wait_value = submit->wait_timeline_values[i];
+
+ if (timeline->highest_past >= wait_value)
+ continue;
+
+ list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) {
+ if (point->serial < wait_value)
+ continue;
+ result = anv_queue_submit_add_fence_bo(submit, point->bo, false);
+ if (result != VK_SUCCESS)
+ return result;
+ break;
+ }
+ }
+ for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->signal_timelines[i];
+ uint64_t signal_value = submit->signal_timeline_values[i];
+ struct anv_timeline_point *point;
+
+ result = anv_timeline_add_point_locked(queue->device, timeline,
+ signal_value, &point);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = anv_queue_submit_add_fence_bo(submit, point->bo, true);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ result = anv_queue_execbuf_locked(queue, submit);
if (result == VK_SUCCESS) {
+ /* Update the pending values in the timeline objects. */
+ for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->signal_timelines[i];
+ uint64_t signal_value = submit->signal_timeline_values[i];
+
+ assert(signal_value > timeline->highest_pending);
+ timeline->highest_pending = signal_value;
+ }
+
/* Update signaled semaphores backed by syncfd. */
for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
@@ -121,11 +296,74 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit)
assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
impl->fd = dup(submit->out_fence);
}
+ } else {
+ /* Unblock any waiter by signaling the points, the application will get
+ * a device lost error code.
+ */
+ for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->signal_timelines[i];
+ uint64_t signal_value = submit->signal_timeline_values[i];
+
+ assert(signal_value > timeline->highest_pending);
+ timeline->highest_past = timeline->highest_pending = signal_value;
+ }
}
return result;
}
+static VkResult
+anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance)
+{
+ VkResult result = VK_SUCCESS;
+
+ /* Go through all the queued submissions and submit then until we find one
+ * that's waiting on a point that hasn't materialized yet.
+ */
+ list_for_each_entry_safe(struct anv_queue_submit, submit,
+ &queue->queued_submits, link) {
+ if (!anv_queue_submit_ready_locked(submit))
+ break;
+
+ (*advance)++;
+ list_del(&submit->link);
+
+ result = anv_queue_submit_timeline_locked(queue, submit);
+
+ anv_queue_submit_free(queue->device, submit);
+
+ if (result != VK_SUCCESS)
+ break;
+ }
+
+ return result;
+}
+
+static VkResult
+anv_device_submit_deferred_locked(struct anv_device *device)
+{
+ uint32_t advance = 0;
+ return anv_queue_submit_deferred_locked(&device->queue, &advance);
+}
+
+static VkResult
+_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit)
+{
+ struct anv_queue_submit *submit = *_submit;
+
+ /* Wait before signal behavior means we might keep alive the
+ * anv_queue_submit object a bit longer, so transfer the ownership to the
+ * anv_queue.
+ */
+ *_submit = NULL;
+
+ pthread_mutex_lock(&queue->device->mutex);
+ list_addtail(&submit->link, &queue->queued_submits);
+ VkResult result = anv_device_submit_deferred_locked(queue->device);
+ pthread_mutex_unlock(&queue->device->mutex);
+ return result;
+}
+
VkResult
anv_queue_init(struct anv_device *device, struct anv_queue *queue)
{
@@ -133,6 +371,8 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue)
queue->device = device;
queue->flags = 0;
+ list_inithead(&queue->queued_submits);
+
return VK_SUCCESS;
}
@@ -218,11 +458,81 @@ anv_queue_submit_add_sync_fd_fence(struct anv_queue_submit *submit,
return VK_SUCCESS;
}
+static VkResult
+anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
+ struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t value)
+{
+ if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+ uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+
+ submit->wait_timelines =
+ vk_realloc(submit->alloc,
+ submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
+ 8, submit->alloc_scope);
+ if (submit->wait_timelines == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->wait_timeline_values =
+ vk_realloc(submit->alloc,
+ submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+ 8, submit->alloc_scope);
+ if (submit->wait_timeline_values == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->wait_timeline_array_length = new_len;
+ }
+
+ submit->wait_timelines[submit->wait_timeline_count] = timeline;
+ submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+ submit->wait_timeline_count++;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,
+ struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t value)
+{
+ assert(timeline->highest_pending < value);
+
+ if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
+ uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
+
+ submit->signal_timelines =
+ vk_realloc(submit->alloc,
+ submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
+ 8, submit->alloc_scope);
+ if (submit->signal_timelines == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->signal_timeline_values =
+ vk_realloc(submit->alloc,
+ submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
+ 8, submit->alloc_scope);
+ if (submit->signal_timeline_values == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->signal_timeline_array_length = new_len;
+ }
+
+ submit->signal_timelines[submit->signal_timeline_count] = timeline;
+ submit->signal_timeline_values[submit->signal_timeline_count] = value;
+
+ submit->signal_timeline_count++;
+
+ return VK_SUCCESS;
+}
+
static struct anv_queue_submit *
anv_queue_submit_alloc(struct anv_device *device)
{
const VkAllocationCallbacks *alloc = &device->alloc;
- VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND;
+ VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope);
if (!submit)
@@ -338,6 +648,9 @@ maybe_transfer_temporary_semaphore(struct anv_queue_submit *submit,
return VK_SUCCESS;
}
+ /* BO backed timeline semaphores cannot be temporary. */
+ assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
+
/*
* There is a requirement to reset semaphore to their permanent state after
* submission. From the Vulkan 1.0.53 spec:
@@ -447,6 +760,14 @@ anv_queue_submit(struct anv_queue *queue,
break;
}
+ case ANV_SEMAPHORE_TYPE_TIMELINE:
+ result = anv_queue_submit_add_timeline_wait(submit, device,
+ &impl->timeline,
+ in_values ? in_values[i] : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
default:
break;
}
@@ -493,6 +814,14 @@ anv_queue_submit(struct anv_queue *queue,
break;
}
+ case ANV_SEMAPHORE_TYPE_TIMELINE:
+ result = anv_queue_submit_add_timeline_signal(submit, device,
+ &impl->timeline,
+ out_values ? out_values[i] : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
default:
break;
}
@@ -1309,6 +1638,56 @@ VkResult anv_GetFenceFdKHR(
// Queue semaphore functions
+static VkSemaphoreTypeKHR
+get_semaphore_type(const void *pNext, uint64_t *initial_value)
+{
+ const VkSemaphoreTypeCreateInfoKHR *type_info =
+ vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
+
+ if (!type_info)
+ return VK_SEMAPHORE_TYPE_BINARY_KHR;
+
+ if (initial_value)
+ *initial_value = type_info->initialValue;
+ return type_info->semaphoreType;
+}
+
+static VkResult
+binary_semaphore_create(struct anv_device *device,
+ struct anv_semaphore_impl *impl,
+ bool exportable)
+{
+ if (device->instance->physicalDevice.has_syncobj) {
+ impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+ impl->syncobj = anv_gem_syncobj_create(device, 0);
+ if (!impl->syncobj)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return VK_SUCCESS;
+ } else {
+ impl->type = ANV_SEMAPHORE_TYPE_BO;
+ VkResult result =
+ anv_device_alloc_bo(device, 4096,
+ ANV_BO_ALLOC_EXTERNAL |
+ ANV_BO_ALLOC_IMPLICIT_SYNC,
+ &impl->bo);
+ /* If we're going to use this as a fence, we need to *not* have the
+ * EXEC_OBJECT_ASYNC bit set.
+ */
+ assert(!(impl->bo->flags & EXEC_OBJECT_ASYNC));
+ return result;
+ }
+}
+
+static VkResult
+timeline_semaphore_create(struct anv_device *device,
+ struct anv_semaphore_impl *impl,
+ uint64_t initial_value)
+{
+ impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
+ anv_timeline_init(device, &impl->timeline, initial_value);
+ return VK_SUCCESS;
+}
+
VkResult anv_CreateSemaphore(
VkDevice _device,
const VkSemaphoreCreateInfo* pCreateInfo,
@@ -1320,6 +1699,9 @@ VkResult anv_CreateSemaphore(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
+ uint64_t timeline_value = 0;
+ VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
+
semaphore = vk_alloc(&device->alloc, sizeof(*semaphore), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (semaphore == NULL)
@@ -1331,15 +1713,28 @@ VkResult anv_CreateSemaphore(
vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
VkExternalSemaphoreHandleTypeFlags handleTypes =
export ? export->handleTypes : 0;
+ VkResult result;
if (handleTypes == 0) {
- /* The DRM execbuffer ioctl always execute in-oder so long as you stay
- * on the same ring. Since we don't expose the blit engine as a DMA
- * queue, a dummy no-op semaphore is a perfectly valid implementation.
- */
- semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DUMMY;
+ if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+ result = binary_semaphore_create(device, &semaphore->permanent, false);
+ else
+ result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->alloc, pAllocator, semaphore);
+ return result;
+ }
} else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
+ assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
+ result = binary_semaphore_create(device, &semaphore->permanent, true);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->alloc, pAllocator, semaphore);
+ return result;
+ }
+ } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+ assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
+ assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
if (device->instance->physicalDevice.has_syncobj) {
semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
@@ -1348,27 +1743,6 @@ VkResult anv_CreateSemaphore(
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
} else {
- semaphore->permanent.type = ANV_SEMAPHORE_TYPE_BO;
- VkResult result = anv_device_alloc_bo(device, 4096,
- ANV_BO_ALLOC_EXTERNAL |
- ANV_BO_ALLOC_IMPLICIT_SYNC,
- &semaphore->permanent.bo);
- if (result != VK_SUCCESS) {
- vk_free2(&device->alloc, pAllocator, semaphore);
- return result;
- }
-
- /* If we're going to use this as a fence, we need to *not* have the
- * EXEC_OBJECT_ASYNC bit set.
- */
- assert(!(semaphore->permanent.bo->flags & EXEC_OBJECT_ASYNC));
- }
- } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
- assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
- if (device->instance->physicalDevice.has_syncobj) {
- semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
- semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
- } else {
semaphore->permanent.type = ANV_SEMAPHORE_TYPE_SYNC_FILE;
semaphore->permanent.fd = -1;
}
@@ -1403,6 +1777,10 @@ anv_semaphore_impl_cleanup(struct anv_device *device,
close(impl->fd);
break;
+ case ANV_SEMAPHORE_TYPE_TIMELINE:
+ anv_timeline_finish(device, &impl->timeline);
+ break;
+
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
anv_gem_syncobj_destroy(device, impl->syncobj);
break;
@@ -1464,8 +1842,14 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
+ VkSemaphoreTypeKHR sem_type =
+ get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
+
switch (pExternalSemaphoreInfo->handleType) {
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ /* Timeline semaphores are not exportable. */
+ if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
+ break;
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
pExternalSemaphoreProperties->compatibleHandleTypes =
@@ -1476,17 +1860,18 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
return;
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
- if (device->has_exec_fence) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes =
- VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalSemaphoreProperties->compatibleHandleTypes =
- VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalSemaphoreProperties->externalSemaphoreFeatures =
- VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
- return;
- }
- break;
+ if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
+ break;
+ if (!device->has_exec_fence)
+ break;
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->compatibleHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures =
+ VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+ return;
default:
break;
@@ -1684,3 +2069,221 @@ VkResult anv_GetSemaphoreFdKHR(
return VK_SUCCESS;
}
+
+VkResult anv_GetSemaphoreCounterValueKHR(
+ VkDevice _device,
+ VkSemaphore _semaphore,
+ uint64_t* pValue)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
+
+ struct anv_semaphore_impl *impl =
+ semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+ &semaphore->temporary : &semaphore->permanent;
+
+ switch (impl->type) {
+ case ANV_SEMAPHORE_TYPE_TIMELINE: {
+ pthread_mutex_lock(&device->mutex);
+ *pValue = impl->timeline.highest_past;
+ pthread_mutex_unlock(&device->mutex);
+ return VK_SUCCESS;
+ }
+
+ default:
+ unreachable("Invalid semaphore type");
+ }
+}
+
+static VkResult
+anv_timeline_wait_locked(struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t serial, uint64_t abs_timeout_ns)
+{
+ /* Wait on the queue_submit condition variable until the timeline has a
+ * time point pending that's at least as high as serial.
+ */
+ while (timeline->highest_pending < serial) {
+ struct timespec abstime = {
+ .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
+ .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
+ };
+
+ int ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+ if (anv_gettime_ns() >= abs_timeout_ns &&
+ timeline->highest_pending < serial)
+ return VK_TIMEOUT;
+ }
+
+ while (1) {
+ VkResult result = anv_timeline_gc_locked(device, timeline);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (timeline->highest_past >= serial)
+ return VK_SUCCESS;
+
+ /* If we got here, our earliest time point has a busy BO */
+ struct anv_timeline_point *point =
+ list_first_entry(&timeline->points,
+ struct anv_timeline_point, link);
+
+ /* Drop the lock while we wait. */
+ point->waiting++;
+ pthread_mutex_unlock(&device->mutex);
+
+ result = anv_device_wait(device, point->bo,
+ anv_get_relative_timeout(abs_timeout_ns));
+
+ /* Pick the mutex back up */
+ pthread_mutex_lock(&device->mutex);
+ point->waiting--;
+
+ /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
+ if (result != VK_SUCCESS)
+ return result;
+ }
+}
+
+static VkResult
+anv_timelines_wait(struct anv_device *device,
+ struct anv_timeline **timelines,
+ const uint64_t *serials,
+ uint32_t n_timelines,
+ bool wait_all,
+ uint64_t abs_timeout_ns)
+{
+ if (!wait_all && n_timelines > 1) {
+ while (1) {
+ VkResult result;
+ pthread_mutex_lock(&device->mutex);
+ for (uint32_t i = 0; i < n_timelines; i++) {
+ result =
+ anv_timeline_wait_locked(device, timelines[i], serials[i], 0);
+ if (result != VK_TIMEOUT)
+ break;
+ }
+
+ if (result != VK_TIMEOUT ||
+ anv_gettime_ns() >= abs_timeout_ns) {
+ pthread_mutex_unlock(&device->mutex);
+ return result;
+ }
+
+ /* If none of them are ready do a short wait so we don't completely
+ * spin while holding the lock. The 10us is completely arbitrary.
+ */
+ uint64_t abs_short_wait_ns =
+ anv_get_absolute_timeout(
+ MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000));
+ struct timespec abstime = {
+ .tv_sec = abs_short_wait_ns / NSEC_PER_SEC,
+ .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC,
+ };
+ ASSERTED int ret;
+ ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+ }
+ } else {
+ VkResult result = VK_SUCCESS;
+ pthread_mutex_lock(&device->mutex);
+ for (uint32_t i = 0; i < n_timelines; i++) {
+ result =
+ anv_timeline_wait_locked(device, timelines[i],
+ serials[i], abs_timeout_ns);
+ if (result != VK_SUCCESS)
+ break;
+ }
+ pthread_mutex_unlock(&device->mutex);
+ return result;
+ }
+}
+
+VkResult anv_WaitSemaphoresKHR(
+ VkDevice _device,
+ const VkSemaphoreWaitInfoKHR* pWaitInfo,
+ uint64_t timeout)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ struct anv_timeline **timelines =
+ vk_alloc(&device->alloc,
+ pWaitInfo->semaphoreCount * sizeof(*timelines),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!timelines)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ uint64_t *values = vk_alloc(&device->alloc,
+ pWaitInfo->semaphoreCount * sizeof(*values),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!values) {
+ vk_free(&device->alloc, timelines);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ uint32_t handle_count = 0;
+ for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
+ ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+ struct anv_semaphore_impl *impl =
+ semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+ &semaphore->temporary : &semaphore->permanent;
+
+ assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
+
+ if (pWaitInfo->pValues[i] == 0)
+ continue;
+
+ timelines[handle_count] = &impl->timeline;
+ values[handle_count] = pWaitInfo->pValues[i];
+ handle_count++;
+ }
+
+ VkResult result = VK_SUCCESS;
+ if (handle_count > 0) {
+ result = anv_timelines_wait(device, timelines, values, handle_count,
+ !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+ timeout);
+ }
+
+ vk_free(&device->alloc, timelines);
+ vk_free(&device->alloc, values);
+
+ return result;
+}
+
+VkResult anv_SignalSemaphoreKHR(
+ VkDevice _device,
+ const VkSemaphoreSignalInfoKHR* pSignalInfo)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore);
+
+ struct anv_semaphore_impl *impl =
+ semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+ &semaphore->temporary : &semaphore->permanent;
+
+ switch (impl->type) {
+ case ANV_SEMAPHORE_TYPE_TIMELINE: {
+ pthread_mutex_lock(&device->mutex);
+
+ VkResult result = anv_timeline_gc_locked(device, &impl->timeline);
+
+ assert(pSignalInfo->value > impl->timeline.highest_pending);
+
+ impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value;
+
+ if (result == VK_SUCCESS)
+ result = anv_device_submit_deferred_locked(device);
+
+ pthread_cond_broadcast(&device->queue_submit);
+ pthread_mutex_unlock(&device->mutex);
+ return result;
+ }
+
+ default:
+ unreachable("Invalid semaphore type");
+ }
+}