diff options
author | Marek Olšák <[email protected]> | 2016-07-19 18:28:24 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-07-26 23:06:46 +0200 |
commit | b47727a83ad6b5386f27b2c174a5c0f110a61038 (patch) | |
tree | 162d39a05a9467160d23cc903bc49f2786193330 /src/gallium/drivers/ddebug/dd_pipe.h | |
parent | 0795a3d54f7748d27020851f2e9b4b7637597590 (diff) |
ddebug: implement pipelined hang detection mode
For good performance while being able to generate decent hang reports.
The report doesn't contain the parsed IB and the buffer list, but it
isolates the draw call and dumps shaders while not having to flush
the context.
This is for GPU hangs that are harder to reproduce and require interactive
playing for minutes or even hours.
dd_pipe.h explains some implementation details. Initializing, copying
(recording) and clearing states is most of the code.
The performance should be at least 50% of the normal performance depending
on the circumstances. (i.e. 50% is expected to be the worst case scenario,
not the best case) The majority of time is spent in
dump_debug_state(PIPE_DUMP_CURRENT_SHADERS) and that's after all
the optimizations in later patches. There is no obvious way to optimize
that further.
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/ddebug/dd_pipe.h')
-rw-r--r-- | src/gallium/drivers/ddebug/dd_pipe.h | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index 5ba1b7a6d75..08cd1e3b9f1 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -32,9 +32,11 @@ #include "pipe/p_state.h" #include "pipe/p_screen.h" #include "dd_util.h" +#include "os/os_thread.h" enum dd_mode { DD_DETECT_HANGS, + DD_DETECT_HANGS_PIPELINED, DD_DUMP_ALL_CALLS, DD_DUMP_APITRACE_CALL, }; @@ -181,6 +183,33 @@ struct dd_draw_state unsigned apitrace_call_number; }; +struct dd_draw_state_copy +{ + struct dd_draw_state base; + + /* dd_draw_state_copy does not reference real CSOs. Instead, it points to + * these variables, which serve as storage. + */ + struct dd_query render_cond; + struct dd_state shaders[PIPE_SHADER_TYPES]; + struct dd_state sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + struct dd_state velems; + struct dd_state rs; + struct dd_state dsa; + struct dd_state blend; +}; + +struct dd_draw_record { + struct dd_draw_record *next; + + int64_t timestamp; + uint32_t sequence_no; + + struct dd_call call; + struct dd_draw_state_copy draw_state; + char *driver_state_log; +}; + struct dd_context { struct pipe_context base; @@ -188,6 +217,32 @@ struct dd_context struct dd_draw_state draw_state; unsigned num_draw_calls; + + /* Pipelined hang detection. + * + * This is without unnecessary flushes and waits. There is a memory-based + * fence that is incremented by clear_buffer every draw call. Driver fences + * are not used. + * + * After each draw call, a new dd_draw_record is created that contains + * a copy of all states, the output of pipe_context::dump_debug_state, + * and it has a fence number assigned. That's done without knowing whether + * that draw call is problematic or not. The record is added into the list + * of all records. + * + * An independent, separate thread loops over the list of records and checks + * their fences. Records with signalled fences are freed. On fence timeout, + * the thread dumps the record of the oldest unsignalled fence. + */ + pipe_thread thread; + pipe_mutex mutex; + int kill_thread; + struct pipe_resource *fence; + struct pipe_transfer *fence_transfer; + uint32_t *mapped_fence; + uint32_t sequence_no; + struct dd_draw_record *records; + int max_log_buffer_size; }; @@ -196,6 +251,7 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe); void dd_init_draw_functions(struct dd_context *dctx); +PIPE_THREAD_ROUTINE(dd_thread_pipelined_hang_detect, input); static inline struct dd_context * |