diff options
-rw-r--r-- | src/amd/vulkan/radv_device.c | 125 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 8 | ||||
-rw-r--r-- | src/amd/vulkan/radv_radeon_winsys.h | 6 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 46 |
4 files changed, 146 insertions, 39 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b8313b26ebf..fda21fa998a 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1221,6 +1221,55 @@ radv_queue_finish(struct radv_queue *queue) } static void +radv_bo_list_init(struct radv_bo_list *bo_list) +{ + pthread_mutex_init(&bo_list->mutex, NULL); + bo_list->list.count = bo_list->capacity = 0; + bo_list->list.bos = NULL; +} + +static void +radv_bo_list_finish(struct radv_bo_list *bo_list) +{ + free(bo_list->list.bos); + pthread_mutex_destroy(&bo_list->mutex); +} + +static VkResult radv_bo_list_add(struct radv_bo_list *bo_list, struct radeon_winsys_bo *bo) +{ + pthread_mutex_lock(&bo_list->mutex); + if (bo_list->list.count == bo_list->capacity) { + unsigned capacity = MAX2(4, bo_list->capacity * 2); + void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*)); + + if (!data) { + pthread_mutex_unlock(&bo_list->mutex); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + bo_list->list.bos = (struct radeon_winsys_bo**)data; + bo_list->capacity = capacity; + } + + bo_list->list.bos[bo_list->list.count++] = bo; + pthread_mutex_unlock(&bo_list->mutex); + return VK_SUCCESS; +} + +static void radv_bo_list_remove(struct radv_bo_list *bo_list, struct radeon_winsys_bo *bo) +{ + pthread_mutex_lock(&bo_list->mutex); + for(unsigned i = 0; i < bo_list->list.count; ++i) { + if (bo_list->list.bos[i] == bo) { + bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1]; + --bo_list->list.count; + break; + } + } + pthread_mutex_unlock(&bo_list->mutex); +} + +static void radv_device_init_gs_info(struct radv_device *device) { switch (device->physical_device->rad_info.family) { @@ -1320,6 +1369,8 @@ VkResult radv_CreateDevice( mtx_init(&device->shader_slab_mutex, mtx_plain); list_inithead(&device->shader_slabs); + radv_bo_list_init(&device->bo_list); + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; uint32_t qfi = queue_create->queueFamilyIndex; @@ -1452,6 +1503,8 @@ VkResult radv_CreateDevice( fail_meta: radv_device_finish_meta(device); fail: + radv_bo_list_finish(&device->bo_list); + if (device->trace_bo) device->ws->buffer_destroy(device->trace_bo); @@ -1499,6 +1552,7 @@ void radv_DestroyDevice( radv_destroy_shader_slabs(device); + radv_bo_list_finish(&device->bo_list); vk_free(&device->alloc, device); } @@ -2269,7 +2323,7 @@ static VkResult radv_signal_fence(struct radv_queue *queue, ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, &sem_info, + 1, NULL, NULL, &sem_info, NULL, false, fence->fence); radv_free_sem_info(&sem_info); @@ -2346,7 +2400,7 @@ VkResult radv_QueueSubmit( ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], 1, NULL, NULL, - &sem_info, + &sem_info, NULL, false, base_fence); if (ret) { radv_loge("failed to submit CS %d\n", i); @@ -2384,11 +2438,15 @@ VkResult radv_QueueSubmit( sem_info.cs_emit_wait = j == 0; sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount; + pthread_mutex_lock(&queue->device->bo_list.mutex); + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance, initial_preamble, continue_preamble_cs, - &sem_info, + &sem_info, &queue->device->bo_list.list, can_patch, base_fence); + pthread_mutex_unlock(&queue->device->bo_list.mutex); + if (ret) { radv_loge("failed to submit CS %d\n", i); abort(); @@ -2594,11 +2652,8 @@ static VkResult radv_alloc_memory(struct radv_device *device, goto fail; } else { close(import_info->fd); - goto out_success; } - } - - if (host_ptr_info) { + } else if (host_ptr_info) { assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED); mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, @@ -2608,41 +2663,46 @@ static VkResult radv_alloc_memory(struct radv_device *device, goto fail; } else { mem->user_ptr = host_ptr_info->pHostPointer; - goto out_success; } - } - - uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); - if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE || - mem_type_index == RADV_MEM_TYPE_GTT_CACHED) - domain = RADEON_DOMAIN_GTT; - else - domain = RADEON_DOMAIN_VRAM; + } else { + uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE || + mem_type_index == RADV_MEM_TYPE_GTT_CACHED) + domain = RADEON_DOMAIN_GTT; + else + domain = RADEON_DOMAIN_VRAM; - if (mem_type_index == RADV_MEM_TYPE_VRAM) - flags |= RADEON_FLAG_NO_CPU_ACCESS; - else - flags |= RADEON_FLAG_CPU_ACCESS; + if (mem_type_index == RADV_MEM_TYPE_VRAM) + flags |= RADEON_FLAG_NO_CPU_ACCESS; + else + flags |= RADEON_FLAG_CPU_ACCESS; - if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE) - flags |= RADEON_FLAG_GTT_WC; + if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE) + flags |= RADEON_FLAG_GTT_WC; - if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) - flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; + if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) + flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; - mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, - domain, flags); + mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, + domain, flags); - if (!mem->bo) { - result = VK_ERROR_OUT_OF_DEVICE_MEMORY; - goto fail; + if (!mem->bo) { + result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + goto fail; + } + mem->type_index = mem_type_index; } - mem->type_index = mem_type_index; -out_success: + + result = radv_bo_list_add(&device->bo_list, mem->bo); + if (result != VK_SUCCESS) + goto fail_bo; + *pMem = radv_device_memory_to_handle(mem); return VK_SUCCESS; +fail_bo: + device->ws->buffer_destroy(mem->bo); fail: vk_free2(&device->alloc, pAllocator, mem); @@ -2670,6 +2730,7 @@ void radv_FreeMemory( if (mem == NULL) return; + radv_bo_list_remove(&device->bo_list, mem->bo); device->ws->buffer_destroy(mem->bo); mem->bo = NULL; @@ -2989,7 +3050,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], 1, NULL, NULL, - &sem_info, + &sem_info, NULL, false, base_fence); fence_emitted = true; if (fence) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 1869604e9e2..35452b6f16d 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -598,6 +598,12 @@ struct radv_queue { struct radeon_winsys_cs *continue_preamble_cs; }; +struct radv_bo_list { + struct radv_winsys_bo_list list; + unsigned capacity; + pthread_mutex_t mutex; +}; + struct radv_device { VK_LOADER_DATA _loader_data; @@ -660,6 +666,8 @@ struct radv_device { uint64_t dmesg_timestamp; struct radv_device_extension_table enabled_extensions; + + struct radv_bo_list bo_list; }; struct radv_device_memory { diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index ba16bf332ab..7f19934ab80 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -178,6 +178,11 @@ struct radv_winsys_sem_info { struct radv_winsys_sem_counts signal; }; +struct radv_winsys_bo_list { + struct radeon_winsys_bo **bos; + unsigned count; +}; + struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); @@ -246,6 +251,7 @@ struct radeon_winsys { struct radeon_winsys_cs *initial_preamble_cs, struct radeon_winsys_cs *continue_preamble_cs, struct radv_winsys_sem_info *sem_info, + const struct radv_winsys_bo_list *bo_list, /* optional */ bool can_patch, struct radeon_winsys_fence *fence); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 9921d38ab0f..c4b2232ce9e 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -552,6 +552,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, unsigned count, struct radv_amdgpu_winsys_bo *extra_bo, struct radeon_winsys_cs *extra_cs, + const struct radv_winsys_bo_list *radv_bo_list, amdgpu_bo_list_handle *bo_list) { int r = 0; @@ -579,7 +580,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, bo_list); free(handles); pthread_mutex_unlock(&ws->global_bo_list_lock); - } else if (count == 1 && !extra_bo && !extra_cs && + } else if (count == 1 && !extra_bo && !extra_cs && !radv_bo_list && !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; if (cs->num_buffers == 0) { @@ -601,6 +602,11 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (extra_cs) { total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers; } + + if (radv_bo_list) { + total_buffer_count += radv_bo_list->count; + } + if (total_buffer_count == 0) { *bo_list = 0; return 0; @@ -674,6 +680,27 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, } } + if (radv_bo_list) { + unsigned unique_bo_so_far = unique_bo_count; + const unsigned default_bo_priority = 7; + for (unsigned i = 0; i < radv_bo_list->count; ++i) { + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(radv_bo_list->bos[i]); + bool found = false; + for (unsigned j = 0; j < unique_bo_so_far; ++j) { + if (bo->bo == handles[j]) { + found = true; + priorities[j] = MAX2(priorities[j], default_bo_priority); + break; + } + } + if (!found) { + handles[unique_bo_count] = bo->bo; + priorities[unique_bo_count] = default_bo_priority; + ++unique_bo_count; + } + } + } + if (unique_bo_count > 0) { r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, priorities, bo_list); @@ -709,6 +736,7 @@ static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radv_winsys_sem_info *sem_info, + const struct radv_winsys_bo_list *radv_bo_list, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -745,7 +773,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, } } - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, + radv_bo_list, &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed for the " "chained submission(%d)\n", r); @@ -789,6 +818,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radv_winsys_sem_info *sem_info, + const struct radv_winsys_bo_list *radv_bo_list, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -813,7 +843,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, memset(&request, 0, sizeof(request)); r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, - preamble_cs, &bo_list); + preamble_cs, radv_bo_list, &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed " "for the fallback submission (%d)\n", r); @@ -870,6 +900,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radv_winsys_sem_info *sem_info, + const struct radv_winsys_bo_list *radv_bo_list, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -939,7 +970,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, (struct radv_amdgpu_winsys_bo*)bo, - preamble_cs, &bo_list); + preamble_cs, radv_bo_list, &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed " "for the sysmem submission (%d)\n", r); @@ -990,6 +1021,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, struct radeon_winsys_cs *initial_preamble_cs, struct radeon_winsys_cs *continue_preamble_cs, struct radv_winsys_sem_info *sem_info, + const struct radv_winsys_bo_list *bo_list, bool can_patch, struct radeon_winsys_fence *_fence) { @@ -999,13 +1031,13 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, assert(sem_info); if (!cs->ws->use_ib_bos) { - ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array, + ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) { - ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array, + ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { - ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array, + ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } |