summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/ddebug/dd_pipe.h
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-07-19 18:28:24 +0200
committerMarek Olšák <[email protected]>2016-07-26 23:06:46 +0200
commitb47727a83ad6b5386f27b2c174a5c0f110a61038 (patch)
tree162d39a05a9467160d23cc903bc49f2786193330 /src/gallium/drivers/ddebug/dd_pipe.h
parent0795a3d54f7748d27020851f2e9b4b7637597590 (diff)
ddebug: implement pipelined hang detection mode
For good performance while being able to generate decent hang reports. The report doesn't contain the parsed IB and the buffer list, but it isolates the draw call and dumps shaders while not having to flush the context. This is for GPU hangs that are harder to reproduce and require interactive playing for minutes or even hours. dd_pipe.h explains some implementation details. Initializing, copying (recording) and clearing states is most of the code. The performance should be at least 50% of the normal performance depending on the circumstances. (i.e. 50% is expected to be the worst case scenario, not the best case) The majority of time is spent in dump_debug_state(PIPE_DUMP_CURRENT_SHADERS) and that's after all the optimizations in later patches. There is no obvious way to optimize that further. Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/ddebug/dd_pipe.h')
-rw-r--r--src/gallium/drivers/ddebug/dd_pipe.h56
1 files changed, 56 insertions, 0 deletions
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index 5ba1b7a6d75..08cd1e3b9f1 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -32,9 +32,11 @@
#include "pipe/p_state.h"
#include "pipe/p_screen.h"
#include "dd_util.h"
+#include "os/os_thread.h"
enum dd_mode {
DD_DETECT_HANGS,
+ DD_DETECT_HANGS_PIPELINED,
DD_DUMP_ALL_CALLS,
DD_DUMP_APITRACE_CALL,
};
@@ -181,6 +183,33 @@ struct dd_draw_state
unsigned apitrace_call_number;
};
+struct dd_draw_state_copy
+{
+ struct dd_draw_state base;
+
+ /* dd_draw_state_copy does not reference real CSOs. Instead, it points to
+ * these variables, which serve as storage.
+ */
+ struct dd_query render_cond;
+ struct dd_state shaders[PIPE_SHADER_TYPES];
+ struct dd_state sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ struct dd_state velems;
+ struct dd_state rs;
+ struct dd_state dsa;
+ struct dd_state blend;
+};
+
+struct dd_draw_record {
+ struct dd_draw_record *next;
+
+ int64_t timestamp;
+ uint32_t sequence_no;
+
+ struct dd_call call;
+ struct dd_draw_state_copy draw_state;
+ char *driver_state_log;
+};
+
struct dd_context
{
struct pipe_context base;
@@ -188,6 +217,32 @@ struct dd_context
struct dd_draw_state draw_state;
unsigned num_draw_calls;
+
+ /* Pipelined hang detection.
+ *
+ * This is without unnecessary flushes and waits. There is a memory-based
+ * fence that is incremented by clear_buffer every draw call. Driver fences
+ * are not used.
+ *
+ * After each draw call, a new dd_draw_record is created that contains
+ * a copy of all states, the output of pipe_context::dump_debug_state,
+ * and it has a fence number assigned. That's done without knowing whether
+ * that draw call is problematic or not. The record is added into the list
+ * of all records.
+ *
+ * An independent, separate thread loops over the list of records and checks
+ * their fences. Records with signalled fences are freed. On fence timeout,
+ * the thread dumps the record of the oldest unsignalled fence.
+ */
+ pipe_thread thread;
+ pipe_mutex mutex;
+ int kill_thread;
+ struct pipe_resource *fence;
+ struct pipe_transfer *fence_transfer;
+ uint32_t *mapped_fence;
+ uint32_t sequence_no;
+ struct dd_draw_record *records;
+ int max_log_buffer_size;
};
@@ -196,6 +251,7 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
void
dd_init_draw_functions(struct dd_context *dctx);
+PIPE_THREAD_ROUTINE(dd_thread_pipelined_hang_detect, input);
static inline struct dd_context *