summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-10-14 12:35:47 +0100
committerEric Anholt <[email protected]>2015-12-15 12:05:48 -0800
commit3858722740185c644bdea1d6f27ff4960d4c49c5 (patch)
treee08726e4dba8ef17df94bab1974d9f8fb4ce0ac3 /src
parent07570edb98dc9e3b637a8057264c7953eb92a652 (diff)
vc4: Add support for dumping executed commands to a file.
The VC4_DEBUG=cl,qpu is nice and all, but I want to be able to get more detailed dumps, and to replay the same exact commands in simulation. For that I need a dump with all of the VBOs, shaders, shader recs, etc. This dump can be parsed by vc4-gpu-tools. For now this is only doable from simulator mode, because otherwise we don't have access to the RCL contents generated by the kernel.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c89
3 files changed, 94 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 090579c2c76..8ddf0865d21 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -57,6 +57,10 @@ static const struct debug_named_value debug_options[] = {
"Flush after each draw call" },
{ "always_sync", VC4_DEBUG_ALWAYS_SYNC,
"Wait for finish after each flush" },
+#if USE_VC4_SIMULATOR
+ { "dump", VC4_DEBUG_DUMP,
+ "Write a GPU command stream trace file" },
+#endif
{ NULL }
};
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 5992e371093..03f76b257e3 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -41,6 +41,7 @@ struct vc4_bo;
#define VC4_DEBUG_ALWAYS_FLUSH 0x0080
#define VC4_DEBUG_ALWAYS_SYNC 0x0100
#define VC4_DEBUG_NIR 0x0200
+#define VC4_DEBUG_DUMP 0x0400
#define VC4_MAX_MIP_LEVELS 12
#define VC4_MAX_TEXTURE_SAMPLERS 16
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 4b1df9234b6..521ef50f814 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -131,6 +131,93 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
return 0;
}
+static void
+vc4_dump_to_file(struct vc4_exec_info *exec)
+{
+ static int dumpno = 0;
+ struct drm_vc4_get_hang_state *state;
+ struct drm_vc4_get_hang_state_bo *bo_state;
+ unsigned int dump_version = 0;
+
+ if (!(vc4_debug & VC4_DEBUG_DUMP))
+ return;
+
+ state = calloc(1, sizeof(*state));
+
+ int unref_count = 0;
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+ unref_head) {
+ unref_count++;
+ }
+
+ /* Add one more for the overflow area that isn't wrapped in a BO. */
+ state->bo_count = exec->bo_count + unref_count + 1;
+ bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+ char *filename = NULL;
+ asprintf(&filename, "vc4-dri-%d.dump", dumpno++);
+ FILE *f = fopen(filename, "w+");
+ if (!f) {
+ fprintf(stderr, "Couldn't open %s: %s", filename,
+ strerror(errno));
+ return;
+ }
+
+ fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+ state->ct0ca = exec->ct0ca;
+ state->ct0ea = exec->ct0ea;
+ state->ct1ca = exec->ct1ca;
+ state->ct1ea = exec->ct1ea;
+ state->start_bin = exec->ct0ca;
+ state->start_render = exec->ct1ca;
+ fwrite(state, sizeof(*state), 1, f);
+
+ int i;
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ bo_state[i].handle = i; /* Not used by the parser. */
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ }
+
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ bo_state[i].handle = 0;
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ i++;
+ }
+
+ /* Add the static overflow memory area. */
+ bo_state[i].handle = exec->bo_count;
+ bo_state[i].paddr = 0;
+ bo_state[i].size = OVERFLOW_SIZE;
+ i++;
+
+ fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+ for (int i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ void *overflow = calloc(1, OVERFLOW_SIZE);
+ fwrite(overflow, 1, OVERFLOW_SIZE, f);
+ free(overflow);
+
+ free(state);
+ free(bo_state);
+ fclose(f);
+}
+
int
vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
{
@@ -183,6 +270,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
exec.ct1ea - exec.ct1ca, true);
}
+ vc4_dump_to_file(&exec);
+
if (exec.ct0ca != exec.ct0ea) {
int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
if (bfc != 1) {