diff options
author | Dave Airlie <[email protected]> | 2017-02-10 00:20:44 +0000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2017-02-13 20:00:28 +0000 |
commit | 8b47b97215af7157bc15676167cab73aa5a61a76 (patch) | |
tree | 451ff29b67e73101485e0d4a90238965e71ead45 /src/amd/vulkan/radv_device.c | |
parent | d49d275c415d60ae08dc3e52d8db11f19a44010f (diff) |
radv: detect command buffers that do no work and drop them (v2)
If a buffer is just full of flushes we flush things on command
buffer submission, so don't bother submitting these.
This will reduce some CPU overhead on dota2, which submits a fair
few command streams that don't end up drawing anything.
v2: reorganise loop to count first then malloc,
rename some vars (Bas)
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan/radv_device.c')
-rw-r--r-- | src/amd/vulkan/radv_device.c | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index fff31259028..9be09af1795 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1452,8 +1452,18 @@ VkResult radv_QueueSubmit( struct radeon_winsys_cs **cs_array; bool can_patch = true; uint32_t advance; + int draw_cmd_buffers_count = 0; - if (!pSubmits[i].commandBufferCount) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + if (cmd_buffer->no_draws == true) + continue; + draw_cmd_buffers_count++; + } + + if (!draw_cmd_buffers_count) { if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], @@ -1472,24 +1482,27 @@ VkResult radv_QueueSubmit( continue; } - cs_array = malloc(sizeof(struct radeon_winsys_cs *) * - pSubmits[i].commandBufferCount); + cs_array = malloc(sizeof(struct radeon_winsys_cs *) * draw_cmd_buffers_count); + int draw_cmd_buffer_idx = 0; for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[i].pCommandBuffers[j]); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + if (cmd_buffer->no_draws == true) + continue; - cs_array[j] = cmd_buffer->cs; + cs_array[draw_cmd_buffer_idx] = cmd_buffer->cs; + draw_cmd_buffer_idx++; if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) can_patch = false; } - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { + for (uint32_t j = 0; j < draw_cmd_buffers_count; j += advance) { advance = MIN2(max_cs_submission, - pSubmits[i].commandBufferCount - j); + draw_cmd_buffers_count - j); bool b = j == 0; - bool e = j + advance == pSubmits[i].commandBufferCount; + bool e = j + advance == draw_cmd_buffers_count; if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; |