diff options
author | Nicolai Hähnle <[email protected]> | 2017-10-22 17:38:59 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2017-11-09 14:01:03 +0100 |
commit | c9fefa062b369056eb4c3ef82b529b0acc4cc88a (patch) | |
tree | 234689a8cd621e19f3febc3c942e12753906c102 /src/gallium/drivers/ddebug/dd_screen.c | |
parent | e8bb8758ddfa884b55abf8648af9cb7239bc1f66 (diff) |
ddebug: rewrite to always use a threaded approach
This patch has multiple goals:
1. Off-load the writing of records in 'always' mode to another thread
for performance.
2. Allow using ddebug with threaded contexts. This really forces us to
move some of the "after_draw" handling into another thread.
3. Simplify the different modes of ddebug, both in the code and in
the user interface, i.e. GALLIUM_DDEBUG. In particular, there's
no 'pipelined' anymore, since we're always pipelined; and 'noflush'
is replaced by 'flush', since we no longer flush by default.
4. Fix the fences in pipelining mode. They previously relied on writes
via pipe_context::clear_buffer. However, on radeonsi, those could
(quite reasonably) end up in the SDMA buffer. So we use the newly
added PIPE_FLUSH_{TOP,BOTTOM}_OF_PIPE fences instead.
5. Improve pipelined mode overall, using the finer grained information
provided by the new fences.
Overall, the result is that pipelined mode should be more useful, and
using ddebug in default mode is much less invasive, in the sense that
it changes the overall driver behavior less (which is kind of crucial
for a driver debugging tool).
An example of the new hang debug output:
Gallium debugger active.
Hang detection timeout is 1000ms.
GPU hang detected, collecting information...
Draw # driver prev BOP TOP BOP dump file
-------------------------------------------------------------
2 YES YES YES NO /home/nha/ddebug_dumps/shader_runner_19919_00000000
3 YES NO YES NO /home/nha/ddebug_dumps/shader_runner_19919_00000001
4 YES NO YES NO /home/nha/ddebug_dumps/shader_runner_19919_00000002
5 YES NO YES NO /home/nha/ddebug_dumps/shader_runner_19919_00000003
Done.
We can see that there were almost certainly 4 draws in flight when
the hang happened: the top-of-pipe fence was signaled for all 4 draws,
the bottom-of-pipe fence for none of them. In virtually all cases,
we'd expect the first draw in the list to be at fault, but due to the
GPU parallelism, it's possible (though highly unlikely) that one of
the later draws causes a component to get stuck in a way that prevents
the earlier draws from making progress as well.
(In the above example, there were actually only 3 draws truly in flight:
the last draw is a blit that waits for the earlier draws; however, its
top-of-pipe fence is emitted before the cache flush and wait, and so
the fact that the draw hasn't truly started yet can only be seen from a
closer inspection of GPU state.)
Acked-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/ddebug/dd_screen.c')
-rw-r--r-- | src/gallium/drivers/ddebug/dd_screen.c | 161 |
1 files changed, 110 insertions, 51 deletions
diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c index caf31f6df0f..11d1d8c1e9c 100644 --- a/src/gallium/drivers/ddebug/dd_screen.c +++ b/src/gallium/drivers/ddebug/dd_screen.c @@ -28,6 +28,7 @@ #include "dd_pipe.h" #include "dd_public.h" #include "util/u_memory.h" +#include <ctype.h> #include <stdio.h> @@ -381,15 +382,55 @@ dd_screen_destroy(struct pipe_screen *_screen) FREE(dscreen); } +static void +skip_space(const char **p) +{ + while (isspace(**p)) + (*p)++; +} + +static bool +match_word(const char **cur, const char *word) +{ + size_t len = strlen(word); + if (strncmp(*cur, word, len) != 0) + return false; + + const char *p = *cur + len; + if (*p) { + if (!isspace(*p)) + return false; + + *cur = p + 1; + } else { + *cur = p; + } + + return true; +} + +static bool +match_uint(const char **cur, unsigned *value) +{ + char *end; + unsigned v = strtoul(*cur, &end, 0); + if (end == *cur || (*end && !isspace(*end))) + return false; + *cur = end; + *value = v; + return true; +} + struct pipe_screen * ddebug_screen_create(struct pipe_screen *screen) { struct dd_screen *dscreen; const char *option; - bool no_flush; - unsigned timeout = 0; + bool flush = false; + bool verbose = false; + unsigned timeout = 1000; unsigned apitrace_dump_call = 0; - enum dd_mode mode; + enum dd_dump_mode mode = DD_DUMP_ONLY_HANGS; option = debug_get_option("GALLIUM_DDEBUG", NULL); if (!option) @@ -400,53 +441,70 @@ ddebug_screen_create(struct pipe_screen *screen) puts(""); puts("Usage:"); puts(""); - puts(" GALLIUM_DDEBUG=\"always [noflush] [verbose]\""); - puts(" Flush and dump context and driver information after every draw call into"); - puts(" $HOME/"DD_DIR"/."); + puts(" GALLIUM_DDEBUG=\"[<timeout in ms>] [(always|apitrace <call#)] [flush] [verbose]\""); + puts(" GALLIUM_DDEBUG_SKIP=[count]"); puts(""); - puts(" GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\""); - puts(" Flush and detect a device hang after every draw call based on the given"); - puts(" fence timeout and dump context and driver information into"); - puts(" $HOME/"DD_DIR"/ when a hang is detected."); + puts("Dump context and driver information of draw calls into"); + puts("$HOME/"DD_DIR"/. By default, watch for GPU hangs and only dump information"); + puts("about draw calls related to the hang."); puts(""); - puts(" GALLIUM_DDEBUG=\"pipelined [timeout in ms] [verbose]\""); - puts(" Detect a device hang after every draw call based on the given fence"); - puts(" timeout without flushes and dump context and driver information into"); - puts(" $HOME/"DD_DIR"/ when a hang is detected."); + puts("<timeout in ms>"); + puts(" Change the default timeout for GPU hang detection (default=1000ms)."); + puts(" Setting this to 0 will disable GPU hang detection entirely."); puts(""); - puts(" GALLIUM_DDEBUG=\"apitrace [call#] [verbose]\""); - puts(" Dump apitrace draw call information into $HOME/"DD_DIR"/. Implies 'noflush'."); + puts("always"); + puts(" Dump information about all draw calls."); puts(""); - puts(" If 'noflush' is specified, do not flush on every draw call. In hang"); - puts(" detection mode, this only detect hangs in pipe->flush."); - puts(" If 'verbose' is specified, additional information is written to stderr."); + puts("apitrace <call#>"); + puts(" Dump information about the draw call corresponding to the given"); + puts(" apitrace call number and exit."); puts(""); - puts(" GALLIUM_DDEBUG_SKIP=[count]"); - puts(" Skip flush and hang detection for the given initial number of draw calls."); + puts("flush"); + puts(" Flush after every draw call."); + puts(""); + puts("verbose"); + puts(" Write additional information to stderr."); + puts(""); + puts("GALLIUM_DDEBUG_SKIP=count"); + puts(" Skip dumping on the first count draw calls (only relevant with 'always')."); puts(""); exit(0); } - no_flush = strstr(option, "noflush") != NULL; - - if (!strncmp(option, "always", 6)) { - mode = DD_DUMP_ALL_CALLS; - } else if (!strncmp(option, "apitrace", 8)) { - mode = DD_DUMP_APITRACE_CALL; - no_flush = true; - - if (sscanf(option+8, "%u", &apitrace_dump_call) != 1) - return screen; - } else if (!strncmp(option, "pipelined", 9)) { - mode = DD_DETECT_HANGS_PIPELINED; - - if (sscanf(option+10, "%u", &timeout) != 1) - return screen; - } else { - mode = DD_DETECT_HANGS; - - if (sscanf(option, "%u", &timeout) != 1) - return screen; + for (;;) { + skip_space(&option); + if (!*option) + break; + + if (match_word(&option, "always")) { + if (mode == DD_DUMP_APITRACE_CALL) { + printf("ddebug: both 'always' and 'apitrace' specified\n"); + exit(1); + } + + mode = DD_DUMP_ALL_CALLS; + } else if (match_word(&option, "flush")) { + flush = true; + } else if (match_word(&option, "verbose")) { + verbose = true; + } else if (match_word(&option, "apitrace")) { + if (mode != DD_DUMP_ONLY_HANGS) { + printf("ddebug: 'apitrace' can only appear once and not mixed with 'always'\n"); + exit(1); + } + + if (!match_uint(&option, &apitrace_dump_call)) { + printf("ddebug: expected call number after 'apitrace'\n"); + exit(1); + } + + mode = DD_DUMP_APITRACE_CALL; + } else if (match_uint(&option, &timeout)) { + /* no-op */ + } else { + printf("ddebug: bad options: %s\n", option); + exit(1); + } } dscreen = CALLOC_STRUCT(dd_screen); @@ -496,27 +554,28 @@ ddebug_screen_create(struct pipe_screen *screen) dscreen->screen = screen; dscreen->timeout_ms = timeout; - dscreen->mode = mode; - dscreen->no_flush = no_flush; - dscreen->verbose = strstr(option, "verbose") != NULL; + dscreen->dump_mode = mode; + dscreen->flush_always = flush; + dscreen->verbose = verbose; dscreen->apitrace_dump_call = apitrace_dump_call; - switch (dscreen->mode) { + switch (dscreen->dump_mode) { case DD_DUMP_ALL_CALLS: fprintf(stderr, "Gallium debugger active. Logging all calls.\n"); break; - case DD_DETECT_HANGS: - case DD_DETECT_HANGS_PIPELINED: - fprintf(stderr, "Gallium debugger active. " - "The hang detection timeout is %i ms.\n", timeout); - break; case DD_DUMP_APITRACE_CALL: fprintf(stderr, "Gallium debugger active. Going to dump an apitrace call.\n"); break; default: - assert(0); + fprintf(stderr, "Gallium debugger active.\n"); + break; } + if (dscreen->timeout_ms > 0) + fprintf(stderr, "Hang detection timeout is %ums.\n", dscreen->timeout_ms); + else + fprintf(stderr, "Hang detection is disabled.\n"); + dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0); if (dscreen->skip_count > 0) { fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n", |