diff options
author | Eric Anholt <[email protected]> | 2014-10-14 12:35:47 +0100 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2015-12-15 12:05:48 -0800 |
commit | 3858722740185c644bdea1d6f27ff4960d4c49c5 (patch) | |
tree | e08726e4dba8ef17df94bab1974d9f8fb4ce0ac3 /src | |
parent | 07570edb98dc9e3b637a8057264c7953eb92a652 (diff) |
vc4: Add support for dumping executed commands to a file.
The VC4_DEBUG=cl,qpu is nice and all, but I want to be able to get more
detailed dumps, and to replay the same exact commands in simulation. For
that I need a dump with all of the VBOs, shaders, shader recs, etc. This
dump can be parsed by vc4-gpu-tools.
For now this is only doable from simulator mode, because otherwise we
don't have access to the RCL contents generated by the kernel.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_screen.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_screen.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_simulator.c | 89 |
3 files changed, 94 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 090579c2c76..8ddf0865d21 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -57,6 +57,10 @@ static const struct debug_named_value debug_options[] = { "Flush after each draw call" }, { "always_sync", VC4_DEBUG_ALWAYS_SYNC, "Wait for finish after each flush" }, +#if USE_VC4_SIMULATOR + { "dump", VC4_DEBUG_DUMP, + "Write a GPU command stream trace file" }, +#endif { NULL } }; diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 5992e371093..03f76b257e3 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -41,6 +41,7 @@ struct vc4_bo; #define VC4_DEBUG_ALWAYS_FLUSH 0x0080 #define VC4_DEBUG_ALWAYS_SYNC 0x0100 #define VC4_DEBUG_NIR 0x0200 +#define VC4_DEBUG_DUMP 0x0400 #define VC4_MAX_MIP_LEVELS 12 #define VC4_MAX_TEXTURE_SAMPLERS 16 diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 4b1df9234b6..521ef50f814 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -131,6 +131,93 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec) return 0; } +static void +vc4_dump_to_file(struct vc4_exec_info *exec) +{ + static int dumpno = 0; + struct drm_vc4_get_hang_state *state; + struct drm_vc4_get_hang_state_bo *bo_state; + unsigned int dump_version = 0; + + if (!(vc4_debug & VC4_DEBUG_DUMP)) + return; + + state = calloc(1, sizeof(*state)); + + int unref_count = 0; + list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list, + unref_head) { + unref_count++; + } + + /* Add one more for the overflow area that isn't wrapped in a BO. */ + state->bo_count = exec->bo_count + unref_count + 1; + bo_state = calloc(state->bo_count, sizeof(*bo_state)); + + char *filename = NULL; + asprintf(&filename, "vc4-dri-%d.dump", dumpno++); + FILE *f = fopen(filename, "w+"); + if (!f) { + fprintf(stderr, "Couldn't open %s: %s", filename, + strerror(errno)); + return; + } + + fwrite(&dump_version, sizeof(dump_version), 1, f); + + state->ct0ca = exec->ct0ca; + state->ct0ea = exec->ct0ea; + state->ct1ca = exec->ct1ca; + state->ct1ea = exec->ct1ea; + state->start_bin = exec->ct0ca; + state->start_render = exec->ct1ca; + fwrite(state, sizeof(*state), 1, f); + + int i; + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + bo_state[i].handle = i; /* Not used by the parser. */ + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + } + + list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + bo_state[i].handle = 0; + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + i++; + } + + /* Add the static overflow memory area. */ + bo_state[i].handle = exec->bo_count; + bo_state[i].paddr = 0; + bo_state[i].size = OVERFLOW_SIZE; + i++; + + fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); + + for (int i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + void *overflow = calloc(1, OVERFLOW_SIZE); + fwrite(overflow, 1, OVERFLOW_SIZE, f); + free(overflow); + + free(state); + free(bo_state); + fclose(f); +} + int vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) { @@ -183,6 +270,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) exec.ct1ea - exec.ct1ca, true); } + vc4_dump_to_file(&exec); + if (exec.ct0ca != exec.ct0ea) { int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea); if (bfc != 1) { |