diff options
author | Samuel Pitoiset <[email protected]> | 2018-11-15 11:29:53 +0100 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2018-11-19 16:32:04 +0100 |
commit | 4d30f2c6f42c9653f9ff49af6c5be0218f8964f3 (patch) | |
tree | 46d30c3d7264ba001ee4ace2e79836543d2c6daf /src/amd/vulkan/winsys | |
parent | 8ca8a6a7b13798fa494c10890b2336c08c9e44aa (diff) |
radv/winsys: remove the max IBs per submit limit for the fallback path
The chained submission is the fastest path and it should now
be used more often than before. This removes some EOP events.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/vulkan/winsys')
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 103 |
1 files changed, 55 insertions, 48 deletions
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index abc4f3903d3..f2d07a54db3 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -865,66 +865,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_request request; - bool emit_signal_sem = sem_info->cs_emit_signal; + struct amdgpu_cs_request request = {}; + struct amdgpu_cs_ib_info *ibs; + struct radv_amdgpu_cs *cs0; + unsigned number_of_ibs; + assert(cs_count); + cs0 = radv_amdgpu_cs(cs_array[0]); - for (unsigned i = 0; i < cs_count;) { - struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); - struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; - struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; - unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs, - cs_count - i); + /* Compute the number of IBs for this submit. */ + number_of_ibs = cs_count + !!initial_preamble_cs; - memset(&request, 0, sizeof(request)); + /* Create a buffer object list. */ + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0, + initial_preamble_cs, radv_bo_list, + &bo_list); + if (r) { + fprintf(stderr, "amdgpu: buffer list creation failed " + "for the fallback submission (%d)\n", r); + return r; + } - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0, - preamble_cs, radv_bo_list, &bo_list); - if (r) { - fprintf(stderr, "amdgpu: buffer list creation failed " - "for the fallback submission (%d)\n", r); - return r; - } + ibs = malloc(number_of_ibs * sizeof(*ibs)); + if (!ibs) { + if (bo_list) + amdgpu_bo_list_destroy(bo_list); + return -ENOMEM; + } - request.ip_type = cs0->hw_ip; - request.ring = queue_idx; - request.resources = bo_list; - request.number_of_ibs = cnt + !!preamble_cs; - request.ibs = ibs; - request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); + /* Configure the CS request. */ + if (initial_preamble_cs) + ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib; - if (preamble_cs) { - ibs[0] = radv_amdgpu_cs(preamble_cs)->ib; - } + for (unsigned i = 0; i < cs_count; i++) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); - for (unsigned j = 0; j < cnt; ++j) { - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); - ibs[j + !!preamble_cs] = cs->ib; + ibs[i + !!initial_preamble_cs] = cs->ib; - if (cs->is_chained) { - *cs->ib_size_ptr -= 4; - cs->is_chained = false; - } + if (cs->is_chained) { + *cs->ib_size_ptr -= 4; + cs->is_chained = false; } + } - sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false; - r = radv_amdgpu_cs_submit(ctx, &request, sem_info); - if (r) { - if (r == -ENOMEM) - fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); - else - fprintf(stderr, "amdgpu: The CS has been rejected, " - "see dmesg for more information.\n"); - } + request.ip_type = cs0->hw_ip; + request.ring = queue_idx; + request.resources = bo_list; + request.number_of_ibs = number_of_ibs; + request.ibs = ibs; + request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); - if (bo_list) - amdgpu_bo_list_destroy(bo_list); + /* Submit the CS. */ + r = radv_amdgpu_cs_submit(ctx, &request, sem_info); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); + else + fprintf(stderr, "amdgpu: The CS has been rejected, " + "see dmesg for more information.\n"); + } - if (r) - return r; + if (bo_list) + amdgpu_bo_list_destroy(bo_list); + free(ibs); + + if (r) + return r; - i += cnt; - } if (fence) radv_amdgpu_request_to_fence(ctx, fence, &request); @@ -1131,7 +1138,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, if (!cs->ws->use_ib_bos) { ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); - } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) { + } else if (can_patch && cs->ws->batchchain) { ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { |