summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_device.c350
1 files changed, 187 insertions, 163 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 1d2addcd1d0..239409adada 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3516,172 +3516,211 @@ radv_alloc_sem_info(struct radv_instance *instance,
return ret;
}
-/* Signals fence as soon as all the work currently put on queue is done. */
-static VkResult radv_signal_fence(struct radv_queue *queue,
- struct radv_fence *fence)
-{
- int ret;
- VkResult result;
- struct radv_winsys_sem_info sem_info;
-
- result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
- radv_fence_to_handle(fence));
- if (result != VK_SUCCESS)
- return result;
-
- ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL, &sem_info, NULL,
- false, fence->fence);
- radv_free_sem_info(&sem_info);
-
- if (ret)
- return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
-
- return VK_SUCCESS;
-}
-
-VkResult radv_QueueSubmit(
- VkQueue _queue,
- uint32_t submitCount,
- const VkSubmitInfo* pSubmits,
- VkFence _fence)
+static VkResult
+radv_get_preambles(struct radv_queue *queue,
+ const VkCommandBuffer *cmd_buffers,
+ uint32_t cmd_buffer_count,
+ struct radeon_cmdbuf **initial_full_flush_preamble_cs,
+ struct radeon_cmdbuf **initial_preamble_cs,
+ struct radeon_cmdbuf **continue_preamble_cs)
{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- RADV_FROM_HANDLE(radv_fence, fence, _fence);
- struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
- struct radeon_winsys_ctx *ctx = queue->hw_ctx;
- int ret;
- uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
uint32_t scratch_size = 0;
uint32_t compute_scratch_size = 0;
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
- struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
- VkResult result;
- bool fence_emitted = false;
bool tess_rings_needed = false;
bool gds_needed = false;
bool sample_positions_needed = false;
- /* Do this first so failing to allocate scratch buffers can't result in
- * partially executed submissions. */
- for (uint32_t i = 0; i < submitCount; i++) {
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
- pSubmits[i].pCommandBuffers[j]);
+ for (uint32_t j = 0; j < cmd_buffer_count; j++) {
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
+ cmd_buffers[j]);
- scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
- compute_scratch_size = MAX2(compute_scratch_size,
- cmd_buffer->compute_scratch_size_needed);
- esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
- gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
- tess_rings_needed |= cmd_buffer->tess_rings_needed;
- gds_needed |= cmd_buffer->gds_needed;
- sample_positions_needed |= cmd_buffer->sample_positions_needed;
- }
+ scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
+ compute_scratch_size = MAX2(compute_scratch_size,
+ cmd_buffer->compute_scratch_size_needed);
+ esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
+ gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
+ tess_rings_needed |= cmd_buffer->tess_rings_needed;
+ gds_needed |= cmd_buffer->gds_needed;
+ sample_positions_needed |= cmd_buffer->sample_positions_needed;
}
- result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
+ return radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
- gds_needed, sample_positions_needed,
- &initial_flush_preamble_cs,
- &initial_preamble_cs, &continue_preamble_cs);
+ gds_needed, sample_positions_needed,
+ initial_full_flush_preamble_cs,
+ initial_preamble_cs, continue_preamble_cs);
+}
+
+
+struct radv_queue_submission {
+ const VkCommandBuffer *cmd_buffers;
+ uint32_t cmd_buffer_count;
+ bool flush_caches;
+ VkPipelineStageFlags wait_dst_stage_mask;
+ const VkSemaphore *wait_semaphores;
+ uint32_t wait_semaphore_count;
+ const VkSemaphore *signal_semaphores;
+ uint32_t signal_semaphore_count;
+ VkFence fence;
+};
+
+static VkResult
+radv_queue_submit(struct radv_queue *queue,
+ const struct radv_queue_submission *submission)
+{
+ RADV_FROM_HANDLE(radv_fence, fence, submission->fence);
+ struct radeon_cmdbuf **cs_array;
+ struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+ uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
+ struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
+ bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
+ bool can_patch = true;
+ uint32_t advance;
+ struct radv_winsys_sem_info sem_info;
+ VkResult result;
+ int ret;
+ struct radeon_cmdbuf *initial_preamble_cs = NULL;
+ struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
+ struct radeon_cmdbuf *continue_preamble_cs = NULL;
+
+ result = radv_get_preambles(queue, submission->cmd_buffers,
+ submission->cmd_buffer_count,
+ &initial_preamble_cs,
+ &initial_flush_preamble_cs,
+ &continue_preamble_cs);
if (result != VK_SUCCESS)
return result;
- for (uint32_t i = 0; i < submitCount; i++) {
- struct radeon_cmdbuf **cs_array;
- bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
- bool can_patch = true;
- uint32_t advance;
- struct radv_winsys_sem_info sem_info;
-
- result = radv_alloc_sem_info(queue->device->instance,
- &sem_info,
- pSubmits[i].waitSemaphoreCount,
- pSubmits[i].pWaitSemaphores,
- pSubmits[i].signalSemaphoreCount,
- pSubmits[i].pSignalSemaphores,
- _fence);
- if (result != VK_SUCCESS)
- return result;
+ result = radv_alloc_sem_info(queue->device->instance,
+ &sem_info,
+ submission->wait_semaphore_count,
+ submission->wait_semaphores,
+ submission->signal_semaphore_count,
+ submission->signal_semaphores,
+ submission->fence);
+ if (result != VK_SUCCESS)
+ return result;
- if (!pSubmits[i].commandBufferCount) {
- if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- &sem_info, NULL,
- false, base_fence);
- if (ret) {
- radv_loge("failed to submit CS %d\n", i);
- abort();
- }
- fence_emitted = true;
- }
- radv_free_sem_info(&sem_info);
- continue;
+ if (!submission->cmd_buffer_count) {
+ ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
+ &queue->device->empty_cs[queue->queue_family_index],
+ 1, NULL, NULL,
+ &sem_info, NULL,
+ false, base_fence);
+ if (ret) {
+ radv_loge("failed to submit CS\n");
+ abort();
}
+ radv_free_sem_info(&sem_info);
+ return VK_SUCCESS;
+ }
- cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
- (pSubmits[i].commandBufferCount));
+ cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
+ (submission->cmd_buffer_count));
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
- pSubmits[i].pCommandBuffers[j]);
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+ for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- cs_array[j] = cmd_buffer->cs;
- if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
- can_patch = false;
+ cs_array[j] = cmd_buffer->cs;
+ if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
+ can_patch = false;
- cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
- }
+ cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
+ }
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
- struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
- const struct radv_winsys_bo_list *bo_list = NULL;
+ for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
+ struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
+ const struct radv_winsys_bo_list *bo_list = NULL;
- advance = MIN2(max_cs_submission,
- pSubmits[i].commandBufferCount - j);
+ advance = MIN2(max_cs_submission,
+ submission->cmd_buffer_count - j);
- if (queue->device->trace_bo)
- *queue->device->trace_id_ptr = 0;
+ if (queue->device->trace_bo)
+ *queue->device->trace_id_ptr = 0;
- sem_info.cs_emit_wait = j == 0;
- sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
+ sem_info.cs_emit_wait = j == 0;
+ sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
- if (unlikely(queue->device->use_global_bo_list)) {
- pthread_mutex_lock(&queue->device->bo_list.mutex);
- bo_list = &queue->device->bo_list.list;
- }
+ if (unlikely(queue->device->use_global_bo_list)) {
+ pthread_mutex_lock(&queue->device->bo_list.mutex);
+ bo_list = &queue->device->bo_list.list;
+ }
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
- advance, initial_preamble, continue_preamble_cs,
- &sem_info, bo_list,
- can_patch, base_fence);
+ ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
+ advance, initial_preamble, continue_preamble_cs,
+ &sem_info, bo_list,
+ can_patch, base_fence);
- if (unlikely(queue->device->use_global_bo_list))
- pthread_mutex_unlock(&queue->device->bo_list.mutex);
+ if (unlikely(queue->device->use_global_bo_list))
+ pthread_mutex_unlock(&queue->device->bo_list.mutex);
- if (ret) {
- radv_loge("failed to submit CS %d\n", i);
- abort();
- }
- fence_emitted = true;
- if (queue->device->trace_bo) {
- radv_check_gpu_hangs(queue, cs_array[j]);
- }
+ if (ret) {
+ radv_loge("failed to submit CS\n");
+ abort();
+ }
+ if (queue->device->trace_bo) {
+ radv_check_gpu_hangs(queue, cs_array[j]);
}
+ }
- radv_free_temp_syncobjs(queue->device,
- pSubmits[i].waitSemaphoreCount,
- pSubmits[i].pWaitSemaphores);
- radv_free_sem_info(&sem_info);
- free(cs_array);
+ radv_free_temp_syncobjs(queue->device,
+ submission->wait_semaphore_count,
+ submission->wait_semaphores);
+ radv_free_sem_info(&sem_info);
+ free(cs_array);
+ return VK_SUCCESS;
+}
+
+/* Signals fence as soon as all the work currently put on queue is done. */
+static VkResult radv_signal_fence(struct radv_queue *queue,
+ VkFence fence)
+{
+ return radv_queue_submit(queue, &(struct radv_queue_submission) {
+ .fence = fence
+ });
+}
+
+VkResult radv_QueueSubmit(
+ VkQueue _queue,
+ uint32_t submitCount,
+ const VkSubmitInfo* pSubmits,
+ VkFence fence)
+{
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ VkResult result;
+ bool fence_emitted = false;
+
+ for (uint32_t i = 0; i < submitCount; i++) {
+ if (!pSubmits[i].commandBufferCount &&
+ !pSubmits[i].waitSemaphoreCount &&
+ !pSubmits[i].signalSemaphoreCount)
+ continue;
+
+ VkPipelineStageFlags wait_dst_stage_mask = 0;
+ for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
+ wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
+ }
+
+ result = radv_queue_submit(queue, &(struct radv_queue_submission) {
+ .cmd_buffers = pSubmits[i].pCommandBuffers,
+ .cmd_buffer_count = pSubmits[i].commandBufferCount,
+ .wait_dst_stage_mask = wait_dst_stage_mask,
+ .flush_caches = !fence_emitted,
+ .wait_semaphores = pSubmits[i].pWaitSemaphores,
+ .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
+ .signal_semaphores = pSubmits[i].pSignalSemaphores,
+ .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
+ .fence = fence
+ });
+ if (result != VK_SUCCESS)
+ return result;
+
+ fence_emitted = true;
}
- if (fence) {
+ if (fence != VK_NULL_HANDLE) {
if (!fence_emitted) {
result = radv_signal_fence(queue, fence);
if (result != VK_SUCCESS)
@@ -4308,17 +4347,13 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
VkQueue _queue,
uint32_t bindInfoCount,
const VkBindSparseInfo* pBindInfo,
- VkFence _fence)
+ VkFence fence)
{
- RADV_FROM_HANDLE(radv_fence, fence, _fence);
RADV_FROM_HANDLE(radv_queue, queue, _queue);
- struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
bool fence_emitted = false;
VkResult result;
- int ret;
for (uint32_t i = 0; i < bindInfoCount; ++i) {
- struct radv_winsys_sem_info sem_info;
for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
radv_sparse_buffer_bind_memory(queue->device,
pBindInfo[i].pBufferBinds + j);
@@ -4329,36 +4364,25 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
pBindInfo[i].pImageOpaqueBinds + j);
}
- VkResult result;
- result = radv_alloc_sem_info(queue->device->instance,
- &sem_info,
- pBindInfo[i].waitSemaphoreCount,
- pBindInfo[i].pWaitSemaphores,
- pBindInfo[i].signalSemaphoreCount,
- pBindInfo[i].pSignalSemaphores,
- _fence);
- if (result != VK_SUCCESS)
- return result;
-
- if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
- ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- &sem_info, NULL,
- false, base_fence);
- if (ret) {
- radv_loge("failed to submit CS %d\n", i);
- abort();
- }
+ if (!pBindInfo[i].waitSemaphoreCount &&
+ !pBindInfo[i].signalSemaphoreCount)
+ continue;
- fence_emitted = true;
- }
+ VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
+ .wait_semaphores = pBindInfo[i].pWaitSemaphores,
+ .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
+ .signal_semaphores = pBindInfo[i].pSignalSemaphores,
+ .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
+ .fence = fence
+ });
- radv_free_sem_info(&sem_info);
+ if (result != VK_SUCCESS)
+ return result;
+ fence_emitted = true;
}
- if (fence) {
+ if (fence != VK_NULL_HANDLE) {
if (!fence_emitted) {
result = radv_signal_fence(queue, fence);
if (result != VK_SUCCESS)