summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c35
-rw-r--r--src/amd/vulkan/radv_device.c82
-rw-r--r--src/amd/vulkan/radv_private.h5
-rw-r--r--src/amd/vulkan/radv_radeon_winsys.h2
-rw-r--r--src/amd/vulkan/si_cmd_buffer.c5
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c30
6 files changed, 150 insertions, 9 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index fdb35a0060d..651b1dd452e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -32,6 +32,8 @@
#include "vk_format.h"
#include "radv_meta.h"
+#include "ac_debug.h"
+
static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageLayout src_layout,
@@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
return true;
}
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_device *device = cmd_buffer->device;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ uint64_t va;
+
+ if (!device->trace_bo)
+ return;
+
+ va = device->ws->buffer_get_va(device->trace_bo);
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
+
+ ++cmd_buffer->state.trace_id;
+ device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, cmd_buffer->state.trace_id);
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
+}
+
static void
radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@@ -1929,6 +1957,8 @@ void radv_CmdDraw(
S_0287F0_USE_OPAQUE(0));
assert(cmd_buffer->cs->cdw <= cdw_max);
+
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
@@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
@@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, count_va >> 32);
radeon_emit(cs, stride); /* stride */
radeon_emit(cs, di_src_sel);
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
@@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
radeon_emit(cmd_buffer->cs, 1);
assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_CmdDispatchIndirect(
@@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
}
assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_unaligned_dispatch(
@@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
S_00B800_PARTIAL_TG_EN(1));
assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
void radv_CmdEndRenderPass(
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index e57a419cfaf..ef8ca1a3755 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
device->ws->cs_finalize(device->empty_cs[family]);
}
+ if (getenv("RADV_TRACE_FILE")) {
+ device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
+ RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
+ if (!device->trace_bo)
+ goto fail;
+
+ device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
+ if (!device->trace_id_ptr)
+ goto fail;
+ }
+
*pDevice = radv_device_to_handle(device);
return VK_SUCCESS;
fail:
+ if (device->trace_bo)
+ device->ws->buffer_destroy(device->trace_bo);
+
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
radv_queue_finish(&device->queues[i][q]);
if (device->queue_count[i])
vk_free(&device->alloc, device->queues[i]);
}
+
+ if (device->hw_ctx)
+ device->ws->ctx_destroy(device->hw_ctx);
+
vk_free(&device->alloc, device);
return result;
}
@@ -780,6 +798,9 @@ void radv_DestroyDevice(
{
RADV_FROM_HANDLE(radv_device, device, _device);
+ if (device->trace_bo)
+ device->ws->buffer_destroy(device->trace_bo);
+
device->ws->ctx_destroy(device->hw_ctx);
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
@@ -869,6 +890,21 @@ void radv_GetDeviceQueue(
*pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
}
+static void radv_dump_trace(struct radv_device *device,
+ struct radeon_winsys_cs *cs)
+{
+ const char *filename = getenv("RADV_TRACE_FILE");
+ FILE *f = fopen(filename, "w");
+ if (!f) {
+ fprintf(stderr, "Failed to write trace dump to %s\n", filename);
+ return;
+ }
+
+ fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+ device->ws->cs_dump(cs, f, *device->trace_id_ptr);
+ fclose(f);
+}
+
VkResult radv_QueueSubmit(
VkQueue _queue,
uint32_t submitCount,
@@ -880,10 +916,12 @@ VkResult radv_QueueSubmit(
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
int ret;
+ uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
bool can_patch = true;
+ uint32_t advance;
if (!pSubmits[i].commandBufferCount)
continue;
@@ -900,15 +938,41 @@ VkResult radv_QueueSubmit(
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
can_patch = false;
}
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
- pSubmits[i].commandBufferCount,
- (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
- pSubmits[i].waitSemaphoreCount,
- (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
- pSubmits[i].signalSemaphoreCount,
- can_patch, base_fence);
- if (ret)
- radv_loge("failed to submit CS %d\n", i);
+
+ for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
+ advance = MIN2(max_cs_submission,
+ pSubmits[i].commandBufferCount - j);
+ bool b = j == 0;
+ bool e = j + advance == pSubmits[i].commandBufferCount;
+
+ if (queue->device->trace_bo)
+ *queue->device->trace_id_ptr = 0;
+
+ ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
+ pSubmits[i].commandBufferCount,
+ (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
+ b ? pSubmits[i].waitSemaphoreCount : 0,
+ (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
+ e ? pSubmits[i].signalSemaphoreCount : 0,
+ can_patch, base_fence);
+
+ if (ret) {
+ radv_loge("failed to submit CS %d\n", i);
+ abort();
+ }
+ if (queue->device->trace_bo) {
+ bool success = queue->device->ws->ctx_wait_idle(
+ queue->device->hw_ctx,
+ radv_queue_family_to_ring(
+ queue->queue_family_index),
+ queue->queue_idx);
+
+ if (!success) { /* Hang */
+ radv_dump_trace(queue->device, cs_array[j]);
+ abort();
+ }
+ }
+ }
free(cs_array);
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index d6ea0e32471..40ee7942585 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -481,6 +481,9 @@ struct radv_device {
float sample_locations_4x[4][2];
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
+
+ struct radeon_winsys_bo *trace_bo;
+ uint32_t *trace_id_ptr;
};
struct radv_device_memory {
@@ -671,6 +674,7 @@ struct radv_cmd_state {
unsigned active_occlusion_queries;
float offset_scale;
uint32_t descriptors_dirty;
+ uint32_t trace_id;
};
struct radv_cmd_pool {
@@ -765,6 +769,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_bo *bo,
uint64_t offset, uint64_t size, uint32_t value);
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
/*
* Takes x,y,z as exact numbers of invocations, instead of blocks.
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 4b738b8cf46..a0b5092e300 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -319,6 +319,8 @@ struct radeon_winsys {
void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
struct radeon_winsys_cs *child);
+ void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
+
int (*surface_init)(struct radeon_winsys *ws,
struct radeon_surf *surf);
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index e3f883f50b6..a483ad9fd39 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
}
}
+ if (cmd_buffer->state.flush_bits)
+ radv_cmd_buffer_trace_emit(cmd_buffer);
cmd_buffer->state.flush_bits = 0;
}
@@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
+
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index b24aa997495..99b16192bcd 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -27,6 +27,7 @@
#include <amdgpu_drm.h>
#include <assert.h>
+#include "ac_debug.h"
#include "amdgpu_id.h"
#include "radv_radeon_winsys.h"
#include "radv_amdgpu_cs.h"
@@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
return ret;
}
+
+static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr)
+{
+ void *ret = NULL;
+ for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+ struct radv_amdgpu_winsys_bo *bo;
+
+ bo = (struct radv_amdgpu_winsys_bo*)
+ (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
+ if (addr >= bo->va && addr - bo->va < bo->size) {
+ if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+ return (char *)ret + (addr - bo->va);
+ }
+ }
+ return ret;
+}
+
+static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
+ FILE* file,
+ uint32_t trace_id)
+{
+ struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+
+ ac_parse_ib(file,
+ radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
+ cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class);
+}
+
static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+ ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
ws->base.create_fence = radv_amdgpu_create_fence;
ws->base.destroy_fence = radv_amdgpu_destroy_fence;
ws->base.create_sem = radv_amdgpu_create_sem;