diff options
-rw-r--r-- | src/amd/vulkan/radv_device.c | 250 | ||||
-rw-r--r-- | src/amd/vulkan/radv_entrypoints_gen.py | 3 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 16 | ||||
-rw-r--r-- | src/amd/vulkan/radv_radeon_winsys.h | 21 | ||||
-rw-r--r-- | src/amd/vulkan/radv_wsi.c | 30 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 110 |
6 files changed, 359 insertions, 71 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index d87be66da8b..81053ac2164 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -102,6 +102,10 @@ static const VkExtensionProperties instance_extensions[] = { .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, .specVersion = 1, }, + { + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, + .specVersion = 1, + }, }; static const VkExtensionProperties common_device_extensions[] = { @@ -162,6 +166,16 @@ static const VkExtensionProperties common_device_extensions[] = { .specVersion = 1, }, }; +static const VkExtensionProperties ext_sema_device_extensions[] = { + { + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, + .specVersion = 1, + }, + { + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, + .specVersion = 1, + }, +}; static VkResult radv_extensions_register(struct radv_instance *instance, @@ -312,6 +326,15 @@ radv_physical_device_init(struct radv_physical_device *device, if (result != VK_SUCCESS) goto fail; + if (device->rad_info.has_syncobj) { + result = radv_extensions_register(instance, + &device->extensions, + ext_sema_device_extensions, + ARRAY_SIZE(ext_sema_device_extensions)); + if (result != VK_SUCCESS) + goto fail; + } + fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); device->name = get_chip_name(device->rad_info.family); @@ -1885,6 +1908,89 @@ fail: return VK_ERROR_OUT_OF_DEVICE_MEMORY; } +static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts, + int num_sems, + const VkSemaphore *sems, + bool reset_temp) +{ + int syncobj_idx = 0, sem_idx = 0; + + if (num_sems == 0) + return VK_SUCCESS; + for (uint32_t i = 0; i < num_sems; i++) { + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); + + if (sem->temp_syncobj || sem->syncobj) + counts->syncobj_count++; + else + counts->sem_count++; + } + + if (counts->syncobj_count) { + counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count); + if (!counts->syncobj) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + if (counts->sem_count) { + counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count); + if (!counts->sem) { + free(counts->syncobj); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + for (uint32_t i = 0; i < num_sems; i++) { + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); + + if (sem->temp_syncobj) { + counts->syncobj[syncobj_idx++] = sem->temp_syncobj; + if (reset_temp) { + /* after we wait on a temp import - drop it */ + sem->temp_syncobj = 0; + } + } + else if (sem->syncobj) + counts->syncobj[syncobj_idx++] = sem->syncobj; + else { + assert(sem->sem); + counts->sem[sem_idx++] = sem->sem; + } + } + + return VK_SUCCESS; +} + +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info) +{ + free(sem_info->wait.syncobj); + free(sem_info->wait.sem); + free(sem_info->signal.syncobj); + free(sem_info->signal.sem); +} + +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info, + int num_wait_sems, + const VkSemaphore *wait_sems, + int num_signal_sems, + const VkSemaphore *signal_sems) +{ + VkResult ret; + memset(sem_info, 0, sizeof(*sem_info)); + + ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true); + if (ret) + return ret; + ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false); + if (ret) + radv_free_sem_info(sem_info); + + /* caller can override these */ + sem_info->cs_emit_wait = true; + sem_info->cs_emit_signal = true; + return ret; +} + VkResult radv_QueueSubmit( VkQueue _queue, uint32_t submitCount, @@ -1935,16 +2041,22 @@ VkResult radv_QueueSubmit( bool do_flush = !i || pSubmits[i].pWaitDstStageMask; bool can_patch = !do_flush; uint32_t advance; + struct radv_winsys_sem_info sem_info; + + result = radv_alloc_sem_info(&sem_info, + pSubmits[i].waitSemaphoreCount, + pSubmits[i].pWaitSemaphores, + pSubmits[i].signalSemaphoreCount, + pSubmits[i].pSignalSemaphores); + if (result != VK_SUCCESS) + return result; if (!pSubmits[i].commandBufferCount) { if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], 1, NULL, NULL, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - pSubmits[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - pSubmits[i].signalSemaphoreCount, + &sem_info, false, base_fence); if (ret) { radv_loge("failed to submit CS %d\n", i); @@ -1952,6 +2064,7 @@ VkResult radv_QueueSubmit( } fence_emitted = true; } + radv_free_sem_info(&sem_info); continue; } @@ -1976,18 +2089,16 @@ VkResult radv_QueueSubmit( for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) { advance = MIN2(max_cs_submission, pSubmits[i].commandBufferCount + do_flush - j); - bool b = j == 0; - bool e = j + advance == pSubmits[i].commandBufferCount + do_flush; if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; + sem_info.cs_emit_wait = j == 0; + sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount + do_flush; + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance, initial_preamble_cs, continue_preamble_cs, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - b ? pSubmits[i].waitSemaphoreCount : 0, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - e ? pSubmits[i].signalSemaphoreCount : 0, + &sem_info, can_patch, base_fence); if (ret) { @@ -2008,16 +2119,19 @@ VkResult radv_QueueSubmit( } } } + + radv_free_sem_info(&sem_info); free(cs_array); } if (fence) { - if (!fence_emitted) + if (!fence_emitted) { + struct radv_winsys_sem_info sem_info = {0}; ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, NULL, 0, NULL, 0, + 1, NULL, NULL, &sem_info, false, base_fence); - + } fence->submitted = true; } @@ -2445,6 +2559,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, bool fence_emitted = false; for (uint32_t i = 0; i < bindInfoCount; ++i) { + struct radv_winsys_sem_info sem_info; for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { radv_sparse_buffer_bind_memory(queue->device, pBindInfo[i].pBufferBinds + j); @@ -2455,19 +2570,28 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, pBindInfo[i].pImageOpaqueBinds + j); } + VkResult result; + result = radv_alloc_sem_info(&sem_info, + pBindInfo[i].waitSemaphoreCount, + pBindInfo[i].pWaitSemaphores, + pBindInfo[i].signalSemaphoreCount, + pBindInfo[i].pSignalSemaphores); + if (result != VK_SUCCESS) + return result; + if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) { queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], 1, NULL, NULL, - (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores, - pBindInfo[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores, - pBindInfo[i].signalSemaphoreCount, + &sem_info, false, base_fence); fence_emitted = true; if (fence) fence->submitted = true; } + + radv_free_sem_info(&sem_info); + } if (fence && !fence_emitted) { @@ -2604,13 +2728,38 @@ VkResult radv_CreateSemaphore( VkSemaphore* pSemaphore) { RADV_FROM_HANDLE(radv_device, device, _device); - struct radeon_winsys_sem *sem; + const VkExportSemaphoreCreateInfoKHR *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR); + VkExternalSemaphoreHandleTypeFlagsKHR handleTypes = + export ? export->handleTypes : 0; - sem = device->ws->create_sem(device->ws); + struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator, + sizeof(*sem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!sem) return VK_ERROR_OUT_OF_HOST_MEMORY; - *pSemaphore = radeon_winsys_sem_to_handle(sem); + sem->temp_syncobj = 0; + /* create a syncobject if we are going to export this semaphore */ + if (handleTypes) { + assert (device->physical_device->rad_info.has_syncobj); + assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); + int ret = device->ws->create_syncobj(device->ws, &sem->syncobj); + if (ret) { + vk_free2(&device->alloc, pAllocator, sem); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + sem->sem = NULL; + } else { + sem->sem = device->ws->create_sem(device->ws); + if (!sem->sem) { + vk_free2(&device->alloc, pAllocator, sem); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + sem->syncobj = 0; + } + + *pSemaphore = radv_semaphore_to_handle(sem); return VK_SUCCESS; } @@ -2620,11 +2769,15 @@ void radv_DestroySemaphore( const VkAllocationCallbacks* pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore); + RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore); if (!_semaphore) return; - device->ws->destroy_sem(sem); + if (sem->syncobj) + device->ws->destroy_syncobj(device->ws, sem->syncobj); + else + device->ws->destroy_sem(sem->sem); + vk_free2(&device->alloc, pAllocator, sem); } VkResult radv_CreateEvent( @@ -3409,3 +3562,56 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device, */ return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; } + +VkResult radv_ImportSemaphoreFdKHR(VkDevice _device, + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); + uint32_t syncobj_handle = 0; + assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); + + int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle); + if (ret != 0) + return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; + + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) { + sem->temp_syncobj = syncobj_handle; + } else { + sem->syncobj = syncobj_handle; + } + close(pImportSemaphoreFdInfo->fd); + return VK_SUCCESS; +} + +VkResult radv_GetSemaphoreFdKHR(VkDevice _device, + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore); + int ret; + uint32_t syncobj_handle; + + assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); + if (sem->temp_syncobj) + syncobj_handle = sem->temp_syncobj; + else + syncobj_handle = sem->syncobj; + ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); + if (ret) + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return VK_SUCCESS; +} + +void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo, + VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties) +{ + pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR; + +} diff --git a/src/amd/vulkan/radv_entrypoints_gen.py b/src/amd/vulkan/radv_entrypoints_gen.py index 61b23281501..9f5a4f3ee39 100644 --- a/src/amd/vulkan/radv_entrypoints_gen.py +++ b/src/amd/vulkan/radv_entrypoints_gen.py @@ -49,6 +49,9 @@ supported_extensions = [ 'VK_KHR_external_memory_fd', 'VK_KHR_storage_buffer_storage_class', 'VK_KHR_variable_pointers', + 'VK_KHR_external_semaphore_capabilities', + 'VK_KHR_external_semaphore', + 'VK_KHR_external_semaphore_fd' ] # We generate a static hash table for entry point lookup diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 891b34ef138..8cd5ec00463 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1470,6 +1470,20 @@ struct radv_query_pool { uint32_t pipeline_stats_mask; }; +struct radv_semaphore { + /* use a winsys sem for non-exportable */ + struct radeon_winsys_sem *sem; + uint32_t syncobj; + uint32_t temp_syncobj; +}; + +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info, + int num_wait_sems, + const VkSemaphore *wait_sems, + int num_signal_sems, + const VkSemaphore *signal_sems); +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info); + void radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, @@ -1563,6 +1577,6 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule) -RADV_DEFINE_NONDISP_HANDLE_CASTS(radeon_winsys_sem, VkSemaphore) +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore) #endif /* RADV_PRIVATE_H */ diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 2f3990c891c..215ef0bfc15 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -131,9 +131,23 @@ struct radeon_bo_metadata { uint32_t metadata[64]; }; +uint32_t syncobj_handle; struct radeon_winsys_bo; struct radeon_winsys_fence; -struct radeon_winsys_sem; + +struct radv_winsys_sem_counts { + uint32_t syncobj_count; + uint32_t sem_count; + uint32_t *syncobj; + struct radeon_winsys_sem **sem; +}; + +struct radv_winsys_sem_info { + bool cs_emit_signal; + bool cs_emit_wait; + struct radv_winsys_sem_counts wait; + struct radv_winsys_sem_counts signal; +}; struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); @@ -191,10 +205,7 @@ struct radeon_winsys { unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, struct radeon_winsys_cs *continue_preamble_cs, - struct radeon_winsys_sem **wait_sem, - unsigned wait_sem_count, - struct radeon_winsys_sem **signal_sem, - unsigned signal_sem_count, + struct radv_winsys_sem_info *sem_info, bool can_patch, struct radeon_winsys_fence *fence); diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index ab3dcd67d5f..adc43111122 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -442,7 +442,6 @@ VkResult radv_AcquireNextImageKHR( fence->submitted = true; fence->signalled = true; } - return result; } @@ -452,7 +451,6 @@ VkResult radv_QueuePresentKHR( { RADV_FROM_HANDLE(radv_queue, queue, _queue); VkResult result = VK_SUCCESS; - const VkPresentRegionsKHR *regions = vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR); @@ -461,6 +459,20 @@ VkResult radv_QueuePresentKHR( struct radeon_winsys_cs *cs; const VkPresentRegionKHR *region = NULL; VkResult item_result; + struct radv_winsys_sem_info sem_info; + + item_result = radv_alloc_sem_info(&sem_info, + pPresentInfo->waitSemaphoreCount, + pPresentInfo->pWaitSemaphores, + 0, + NULL); + if (pPresentInfo->pResults != NULL) + pPresentInfo->pResults[i] = item_result; + result = result == VK_SUCCESS ? item_result : result; + if (item_result != VK_SUCCESS) { + radv_free_sem_info(&sem_info); + continue; + } assert(radv_device_from_handle(swapchain->device) == queue->device); if (swapchain->fences[0] == VK_NULL_HANDLE) { @@ -472,8 +484,10 @@ VkResult radv_QueuePresentKHR( if (pPresentInfo->pResults != NULL) pPresentInfo->pResults[i] = item_result; result = result == VK_SUCCESS ? item_result : result; - if (item_result != VK_SUCCESS) + if (item_result != VK_SUCCESS) { + radv_free_sem_info(&sem_info); continue; + } } else { radv_ResetFences(radv_device_to_handle(queue->device), 1, &swapchain->fences[0]); @@ -487,11 +501,12 @@ VkResult radv_QueuePresentKHR( RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]); struct radeon_winsys_fence *base_fence = fence->fence; struct radeon_winsys_ctx *ctx = queue->hw_ctx; + queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL, NULL, - (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores, - pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); + &sem_info, + false, base_fence); fence->submitted = true; if (regions && regions->pRegions) @@ -504,8 +519,10 @@ VkResult radv_QueuePresentKHR( if (pPresentInfo->pResults != NULL) pPresentInfo->pResults[i] = item_result; result = result == VK_SUCCESS ? item_result : result; - if (item_result != VK_SUCCESS) + if (item_result != VK_SUCCESS) { + radv_free_sem_info(&sem_info); continue; + } VkFence last = swapchain->fences[2]; swapchain->fences[2] = swapchain->fences[1]; @@ -517,6 +534,7 @@ VkResult radv_QueuePresentKHR( 1, &last, true, 1); } + radv_free_sem_info(&sem_info); } return VK_SUCCESS; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 6ed8f32604f..bc4d460cf44 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -75,13 +75,6 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base) return (struct radv_amdgpu_cs*)base; } -struct radv_amdgpu_sem_info { - int wait_sem_count; - struct radeon_winsys_sem **wait_sems; - int signal_sem_count; - struct radeon_winsys_sem **signal_sems; -}; - static int ring_to_hw_ip(enum ring_type ring) { switch (ring) { @@ -99,10 +92,10 @@ static int ring_to_hw_ip(enum ring_type ring) static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, uint32_t ip_type, uint32_t ring, - struct radv_amdgpu_sem_info *sem_info); + struct radv_winsys_sem_info *sem_info); static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct amdgpu_cs_request *request, - struct radv_amdgpu_sem_info *sem_info); + struct radv_winsys_sem_info *sem_info); static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_fence *fence, @@ -662,7 +655,7 @@ static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx, - struct radv_amdgpu_sem_info *sem_info, + struct radv_winsys_sem_info *sem_info, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -740,7 +733,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx, - struct radv_amdgpu_sem_info *sem_info, + struct radv_winsys_sem_info *sem_info, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -752,7 +745,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; amdgpu_bo_list_handle bo_list; struct amdgpu_cs_request request; - + bool emit_signal_sem = sem_info->cs_emit_signal; assert(cs_count); for (unsigned i = 0; i < cs_count;) { @@ -792,6 +785,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, } } + sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false; r = radv_amdgpu_cs_submit(ctx, &request, sem_info); if (r) { if (r == -ENOMEM) @@ -818,7 +812,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx, - struct radv_amdgpu_sem_info *sem_info, + struct radv_winsys_sem_info *sem_info, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -833,6 +827,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, amdgpu_bo_list_handle bo_list; struct amdgpu_cs_request request; uint32_t pad_word = 0xffff1000U; + bool emit_signal_sem = sem_info->cs_emit_signal; if (radv_amdgpu_winsys(ws)->info.chip_class == SI) pad_word = 0x80000000; @@ -898,6 +893,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, request.ibs = &ib; request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); + sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false; r = radv_amdgpu_cs_submit(ctx, &request, sem_info); if (r) { if (r == -ENOMEM) @@ -929,35 +925,27 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, struct radeon_winsys_cs *continue_preamble_cs, - struct radeon_winsys_sem **wait_sem, - unsigned wait_sem_count, - struct radeon_winsys_sem **signal_sem, - unsigned signal_sem_count, + struct radv_winsys_sem_info *sem_info, bool can_patch, struct radeon_winsys_fence *_fence) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]); struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); int ret; - struct radv_amdgpu_sem_info sem_info = {0}; - - sem_info.wait_sems = wait_sem; - sem_info.wait_sem_count = wait_sem_count; - sem_info.signal_sems = signal_sem; - sem_info.signal_sem_count = signal_sem_count; + assert(sem_info); if (!cs->ws->use_ib_bos) { - ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, &sem_info, cs_array, + ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) { - ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, &sem_info, cs_array, + ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { - ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, &sem_info, cs_array, + ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } - radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, &sem_info); + radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, sem_info); return ret; } @@ -1072,10 +1060,10 @@ static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem) static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, uint32_t ip_type, uint32_t ring, - struct radv_amdgpu_sem_info *sem_info) + struct radv_winsys_sem_info *sem_info) { - for (unsigned i = 0; i < sem_info->signal_sem_count; i++) { - struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)sem_info->signal_sems[i]; + for (unsigned i = 0; i < sem_info->signal.sem_count; i++) { + struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)(sem_info->signal.sem)[i]; if (sem->context) return -EINVAL; @@ -1085,9 +1073,27 @@ static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, return 0; } +static struct drm_amdgpu_cs_chunk_sem *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts, + struct drm_amdgpu_cs_chunk *chunk, int chunk_id) +{ + struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count); + if (!syncobj) + return NULL; + + for (unsigned i = 0; i < counts->syncobj_count; i++) { + struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i]; + sem->handle = counts->syncobj[i]; + } + + chunk->chunk_id = chunk_id; + chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count; + chunk->chunk_data = (uint64_t)(uintptr_t)syncobj; + return syncobj; +} + static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct amdgpu_cs_request *request, - struct radv_amdgpu_sem_info *sem_info) + struct radv_winsys_sem_info *sem_info) { int r; int num_chunks; @@ -1096,10 +1102,12 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct drm_amdgpu_cs_chunk *chunks; struct drm_amdgpu_cs_chunk_data *chunk_data; struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL; + struct drm_amdgpu_cs_chunk_sem *wait_syncobj = NULL, *signal_syncobj = NULL; int i; struct amdgpu_cs_fence *sem; + user_fence = (request->fence_info.handle != NULL); - size = request->number_of_ibs + (user_fence ? 2 : 1) + 1; + size = request->number_of_ibs + (user_fence ? 2 : 1) + 3; chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); @@ -1136,15 +1144,30 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, &chunk_data[i]); } - if (sem_info->wait_sem_count) { - sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_info->wait_sem_count); + if (sem_info->wait.syncobj_count && sem_info->cs_emit_wait) { + wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait, + &chunks[num_chunks], + AMDGPU_CHUNK_ID_SYNCOBJ_IN); + if (!wait_syncobj) { + r = -ENOMEM; + goto error_out; + } + num_chunks++; + + if (sem_info->wait.sem_count == 0) + sem_info->cs_emit_wait = false; + + } + + if (sem_info->wait.sem_count && sem_info->cs_emit_wait) { + sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_info->wait.sem_count); if (!sem_dependencies) { r = -ENOMEM; goto error_out; } int sem_count = 0; - for (unsigned j = 0; j < sem_info->wait_sem_count; j++) { - sem = (struct amdgpu_cs_fence *)sem_info->wait_sems[j]; + for (unsigned j = 0; j < sem_info->wait.sem_count; j++) { + sem = (struct amdgpu_cs_fence *)sem_info->wait.sem[j]; if (!sem->context) continue; struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++]; @@ -1158,7 +1181,18 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count; chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies; - sem_info->wait_sem_count = 0; + sem_info->cs_emit_wait = false; + } + + if (sem_info->signal.syncobj_count && sem_info->cs_emit_signal) { + signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal, + &chunks[num_chunks], + AMDGPU_CHUNK_ID_SYNCOBJ_OUT); + if (!signal_syncobj) { + r = -ENOMEM; + goto error_out; + } + num_chunks++; } r = amdgpu_cs_submit_raw(ctx->ws->dev, @@ -1169,6 +1203,8 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, &request->seq_no); error_out: free(sem_dependencies); + free(wait_syncobj); + free(signal_syncobj); return r; } |