diff options
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 35 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 82 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 5 | ||||
-rw-r--r-- | src/amd/vulkan/radv_radeon_winsys.h | 2 | ||||
-rw-r--r-- | src/amd/vulkan/si_cmd_buffer.c | 5 | ||||
-rw-r--r-- | src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 30 |
6 files changed, 150 insertions, 9 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index fdb35a0060d..651b1dd452e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -32,6 +32,8 @@ #include "vk_format.h" #include "radv_meta.h" +#include "ac_debug.h" + static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, @@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, return true; } +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_device *device = cmd_buffer->device; + struct radeon_winsys_cs *cs = cmd_buffer->cs; + uint64_t va; + + if (!device->trace_bo) + return; + + va = device->ws->buffer_get_va(device->trace_bo); + + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7); + + ++cmd_buffer->state.trace_id; + device->ws->cs_add_buffer(cs, device->trace_bo, 8); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, cmd_buffer->state.trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); +} + static void radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) @@ -1929,6 +1957,8 @@ void radv_CmdDraw( S_0287F0_USE_OPAQUE(0)); assert(cmd_buffer->cs->cdw <= cdw_max); + + radv_cmd_buffer_trace_emit(cmd_buffer); } static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer) @@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed( radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); assert(cmd_buffer->cs->cdw <= cdw_max); + radv_cmd_buffer_trace_emit(cmd_buffer); } static void @@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, count_va >> 32); radeon_emit(cs, stride); /* stride */ radeon_emit(cs, di_src_sel); + radv_cmd_buffer_trace_emit(cmd_buffer); } static void @@ -2188,6 +2220,7 @@ void radv_CmdDispatch( radeon_emit(cmd_buffer->cs, 1); assert(cmd_buffer->cs->cdw <= cdw_max); + radv_cmd_buffer_trace_emit(cmd_buffer); } void radv_CmdDispatchIndirect( @@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect( } assert(cmd_buffer->cs->cdw <= cdw_max); + radv_cmd_buffer_trace_emit(cmd_buffer); } void radv_unaligned_dispatch( @@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch( S_00B800_PARTIAL_TG_EN(1)); assert(cmd_buffer->cs->cdw <= cdw_max); + radv_cmd_buffer_trace_emit(cmd_buffer); } void radv_CmdEndRenderPass( diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index e57a419cfaf..ef8ca1a3755 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -760,16 +760,34 @@ VkResult radv_CreateDevice( device->ws->cs_finalize(device->empty_cs[family]); } + if (getenv("RADV_TRACE_FILE")) { + device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8, + RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); + if (!device->trace_bo) + goto fail; + + device->trace_id_ptr = device->ws->buffer_map(device->trace_bo); + if (!device->trace_id_ptr) + goto fail; + } + *pDevice = radv_device_to_handle(device); return VK_SUCCESS; fail: + if (device->trace_bo) + device->ws->buffer_destroy(device->trace_bo); + for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { for (unsigned q = 0; q < device->queue_count[i]; q++) radv_queue_finish(&device->queues[i][q]); if (device->queue_count[i]) vk_free(&device->alloc, device->queues[i]); } + + if (device->hw_ctx) + device->ws->ctx_destroy(device->hw_ctx); + vk_free(&device->alloc, device); return result; } @@ -780,6 +798,9 @@ void radv_DestroyDevice( { RADV_FROM_HANDLE(radv_device, device, _device); + if (device->trace_bo) + device->ws->buffer_destroy(device->trace_bo); + device->ws->ctx_destroy(device->hw_ctx); for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { for (unsigned q = 0; q < device->queue_count[i]; q++) @@ -869,6 +890,21 @@ void radv_GetDeviceQueue( *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]); } +static void radv_dump_trace(struct radv_device *device, + struct radeon_winsys_cs *cs) +{ + const char *filename = getenv("RADV_TRACE_FILE"); + FILE *f = fopen(filename, "w"); + if (!f) { + fprintf(stderr, "Failed to write trace dump to %s\n", filename); + return; + } + + fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); + device->ws->cs_dump(cs, f, *device->trace_id_ptr); + fclose(f); +} + VkResult radv_QueueSubmit( VkQueue _queue, uint32_t submitCount, @@ -880,10 +916,12 @@ VkResult radv_QueueSubmit( struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; struct radeon_winsys_ctx *ctx = queue->device->hw_ctx; int ret; + uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX; for (uint32_t i = 0; i < submitCount; i++) { struct radeon_winsys_cs **cs_array; bool can_patch = true; + uint32_t advance; if (!pSubmits[i].commandBufferCount) continue; @@ -900,15 +938,41 @@ VkResult radv_QueueSubmit( if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) can_patch = false; } - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array, - pSubmits[i].commandBufferCount, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - pSubmits[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - pSubmits[i].signalSemaphoreCount, - can_patch, base_fence); - if (ret) - radv_loge("failed to submit CS %d\n", i); + + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { + advance = MIN2(max_cs_submission, + pSubmits[i].commandBufferCount - j); + bool b = j == 0; + bool e = j + advance == pSubmits[i].commandBufferCount; + + if (queue->device->trace_bo) + *queue->device->trace_id_ptr = 0; + + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array, + pSubmits[i].commandBufferCount, + (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, + b ? pSubmits[i].waitSemaphoreCount : 0, + (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, + e ? pSubmits[i].signalSemaphoreCount : 0, + can_patch, base_fence); + + if (ret) { + radv_loge("failed to submit CS %d\n", i); + abort(); + } + if (queue->device->trace_bo) { + bool success = queue->device->ws->ctx_wait_idle( + queue->device->hw_ctx, + radv_queue_family_to_ring( + queue->queue_family_index), + queue->queue_idx); + + if (!success) { /* Hang */ + radv_dump_trace(queue->device, cs_array[j]); + abort(); + } + } + } free(cs_array); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index d6ea0e32471..40ee7942585 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -481,6 +481,9 @@ struct radv_device { float sample_locations_4x[4][2]; float sample_locations_8x[8][2]; float sample_locations_16x[16][2]; + + struct radeon_winsys_bo *trace_bo; + uint32_t *trace_id_ptr; }; struct radv_device_memory { @@ -671,6 +674,7 @@ struct radv_cmd_state { unsigned active_occlusion_queries; float offset_scale; uint32_t descriptors_dirty; + uint32_t trace_id; }; struct radv_cmd_pool { @@ -765,6 +769,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value); +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); /* * Takes x,y,z as exact numbers of invocations, instead of blocks. diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 4b738b8cf46..a0b5092e300 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -319,6 +319,8 @@ struct radeon_winsys { void (*cs_execute_secondary)(struct radeon_winsys_cs *parent, struct radeon_winsys_cs *child); + void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id); + int (*surface_init)(struct radeon_winsys *ws, struct radeon_surf *surf); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e3f883f50b6..a483ad9fd39 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) } } + if (cmd_buffer->state.flush_bits) + radv_cmd_buffer_trace_emit(cmd_buffer); cmd_buffer->state.flush_bits = 0; } @@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } + + radv_cmd_buffer_trace_emit(cmd_buffer); } /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */ @@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } + radv_cmd_buffer_trace_emit(cmd_buffer); } static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index b24aa997495..99b16192bcd 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -27,6 +27,7 @@ #include <amdgpu_drm.h> #include <assert.h> +#include "ac_debug.h" #include "amdgpu_id.h" #include "radv_radeon_winsys.h" #include "radv_amdgpu_cs.h" @@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, return ret; } + +static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr) +{ + void *ret = NULL; + for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) { + struct radv_amdgpu_winsys_bo *bo; + + bo = (struct radv_amdgpu_winsys_bo*) + (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]); + if (addr >= bo->va && addr - bo->va < bo->size) { + if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) + return (char *)ret + (addr - bo->va); + } + } + return ret; +} + +static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs, + FILE* file, + uint32_t trace_id) +{ + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs; + + ac_parse_ib(file, + radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address), + cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class); +} + static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); @@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer; ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary; ws->base.cs_submit = radv_amdgpu_winsys_cs_submit; + ws->base.cs_dump = radv_amdgpu_winsys_cs_dump; ws->base.create_fence = radv_amdgpu_create_fence; ws->base.destroy_fence = radv_amdgpu_destroy_fence; ws->base.create_sem = radv_amdgpu_create_sem; |