summaryrefslogtreecommitdiffstats
path: root/src/amd/vulkan/winsys
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2018-11-15 11:29:53 +0100
committerSamuel Pitoiset <[email protected]>2018-11-19 16:32:04 +0100
commit4d30f2c6f42c9653f9ff49af6c5be0218f8964f3 (patch)
tree46d30c3d7264ba001ee4ace2e79836543d2c6daf /src/amd/vulkan/winsys
parent8ca8a6a7b13798fa494c10890b2336c08c9e44aa (diff)
radv/winsys: remove the max IBs per submit limit for the fallback path
The chained submission is the fastest path and it should now be used more often than before. This removes some EOP events. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/vulkan/winsys')
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c103
1 files changed, 55 insertions, 48 deletions
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index abc4f3903d3..f2d07a54db3 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -865,66 +865,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
amdgpu_bo_list_handle bo_list;
- struct amdgpu_cs_request request;
- bool emit_signal_sem = sem_info->cs_emit_signal;
+ struct amdgpu_cs_request request = {};
+ struct amdgpu_cs_ib_info *ibs;
+ struct radv_amdgpu_cs *cs0;
+ unsigned number_of_ibs;
+
assert(cs_count);
+ cs0 = radv_amdgpu_cs(cs_array[0]);
- for (unsigned i = 0; i < cs_count;) {
- struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
- struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
- struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
- unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
- cs_count - i);
+ /* Compute the number of IBs for this submit. */
+ number_of_ibs = cs_count + !!initial_preamble_cs;
- memset(&request, 0, sizeof(request));
+ /* Create a buffer object list. */
+ r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
+ initial_preamble_cs, radv_bo_list,
+ &bo_list);
+ if (r) {
+ fprintf(stderr, "amdgpu: buffer list creation failed "
+ "for the fallback submission (%d)\n", r);
+ return r;
+ }
- r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
- preamble_cs, radv_bo_list, &bo_list);
- if (r) {
- fprintf(stderr, "amdgpu: buffer list creation failed "
- "for the fallback submission (%d)\n", r);
- return r;
- }
+ ibs = malloc(number_of_ibs * sizeof(*ibs));
+ if (!ibs) {
+ if (bo_list)
+ amdgpu_bo_list_destroy(bo_list);
+ return -ENOMEM;
+ }
- request.ip_type = cs0->hw_ip;
- request.ring = queue_idx;
- request.resources = bo_list;
- request.number_of_ibs = cnt + !!preamble_cs;
- request.ibs = ibs;
- request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
+ /* Configure the CS request. */
+ if (initial_preamble_cs)
+ ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
- if (preamble_cs) {
- ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
- }
+ for (unsigned i = 0; i < cs_count; i++) {
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
- for (unsigned j = 0; j < cnt; ++j) {
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
- ibs[j + !!preamble_cs] = cs->ib;
+ ibs[i + !!initial_preamble_cs] = cs->ib;
- if (cs->is_chained) {
- *cs->ib_size_ptr -= 4;
- cs->is_chained = false;
- }
+ if (cs->is_chained) {
+ *cs->ib_size_ptr -= 4;
+ cs->is_chained = false;
}
+ }
- sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
- r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
- if (r) {
- if (r == -ENOMEM)
- fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
- else
- fprintf(stderr, "amdgpu: The CS has been rejected, "
- "see dmesg for more information.\n");
- }
+ request.ip_type = cs0->hw_ip;
+ request.ring = queue_idx;
+ request.resources = bo_list;
+ request.number_of_ibs = number_of_ibs;
+ request.ibs = ibs;
+ request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
- if (bo_list)
- amdgpu_bo_list_destroy(bo_list);
+ /* Submit the CS. */
+ r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+ if (r) {
+ if (r == -ENOMEM)
+ fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+ else
+ fprintf(stderr, "amdgpu: The CS has been rejected, "
+ "see dmesg for more information.\n");
+ }
- if (r)
- return r;
+ if (bo_list)
+ amdgpu_bo_list_destroy(bo_list);
+ free(ibs);
+
+ if (r)
+ return r;
- i += cnt;
- }
if (fence)
radv_amdgpu_request_to_fence(ctx, fence, &request);
@@ -1131,7 +1138,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
if (!cs->ws->use_ib_bos) {
ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
- } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
+ } else if (can_patch && cs->ws->batchchain) {
ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else {