diff options
-rw-r--r-- | src/gallium/drivers/ddebug/dd_context.c | 55 | ||||
-rw-r--r-- | src/gallium/drivers/ddebug/dd_draw.c | 449 | ||||
-rw-r--r-- | src/gallium/drivers/ddebug/dd_pipe.h | 56 | ||||
-rw-r--r-- | src/gallium/drivers/ddebug/dd_screen.c | 11 |
4 files changed, 567 insertions, 4 deletions
diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c index 96fea3a80c3..4423e904dc1 100644 --- a/src/gallium/drivers/ddebug/dd_context.c +++ b/src/gallium/drivers/ddebug/dd_context.c @@ -27,6 +27,7 @@ #include "dd_pipe.h" #include "tgsi/tgsi_parse.h" +#include "util/u_inlines.h" #include "util/u_memory.h" @@ -589,6 +590,19 @@ dd_context_destroy(struct pipe_context *_pipe) struct dd_context *dctx = dd_context(_pipe); struct pipe_context *pipe = dctx->pipe; + if (dctx->thread) { + pipe_mutex_lock(dctx->mutex); + dctx->kill_thread = 1; + pipe_mutex_unlock(dctx->mutex); + pipe_thread_wait(dctx->thread); + pipe_mutex_destroy(dctx->mutex); + assert(!dctx->records); + } + + if (dctx->fence) { + pipe->transfer_unmap(pipe, dctx->fence_transfer); + pipe_resource_reference(&dctx->fence, NULL); + } pipe->destroy(pipe); FREE(dctx); } @@ -731,10 +745,8 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) return NULL; dctx = CALLOC_STRUCT(dd_context); - if (!dctx) { - pipe->destroy(pipe); - return NULL; - } + if (!dctx) + goto fail; dctx->pipe = pipe; dctx->base.priv = pipe->priv; /* expose wrapped priv data */ @@ -826,5 +838,40 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) dd_init_draw_functions(dctx); dctx->draw_state.sample_mask = ~0; + + if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) { + dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, 4); + if (!dctx->fence) + goto fail; + + dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence, + PIPE_TRANSFER_READ_WRITE | + PIPE_TRANSFER_PERSISTENT | + PIPE_TRANSFER_COHERENT, + &dctx->fence_transfer); + if (!dctx->mapped_fence) + goto fail; + + *dctx->mapped_fence = 0; + + pipe_mutex_init(dctx->mutex); + dctx->thread = pipe_thread_create(dd_thread_pipelined_hang_detect, dctx); + if (!dctx->thread) { + pipe_mutex_destroy(dctx->mutex); + goto fail; + } + } + return &dctx->base; + +fail: + if (dctx) { + if (dctx->mapped_fence) + pipe_transfer_unmap(pipe, dctx->fence_transfer); + pipe_resource_reference(&dctx->fence, NULL); + FREE(dctx); + } + pipe->destroy(pipe); + return NULL; } diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c index 836f812296c..a54cecb5f08 100644 --- a/src/gallium/drivers/ddebug/dd_draw.c +++ b/src/gallium/drivers/ddebug/dd_draw.c @@ -29,7 +29,14 @@ #include "util/u_dump.h" #include "util/u_format.h" +#include "util/u_framebuffer.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" +#include "os/os_time.h" +#include <inttypes.h> static FILE * @@ -570,6 +577,444 @@ dd_flush_and_handle_hang(struct dd_context *dctx, } static void +dd_unreference_copy_of_call(struct dd_call *dst) +{ + switch (dst->type) { + case CALL_DRAW_VBO: + pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, NULL); + pipe_resource_reference(&dst->info.draw_vbo.indirect, NULL); + pipe_resource_reference(&dst->info.draw_vbo.indirect_params, NULL); + break; + case CALL_LAUNCH_GRID: + pipe_resource_reference(&dst->info.launch_grid.indirect, NULL); + break; + case CALL_RESOURCE_COPY_REGION: + pipe_resource_reference(&dst->info.resource_copy_region.dst, NULL); + pipe_resource_reference(&dst->info.resource_copy_region.src, NULL); + break; + case CALL_BLIT: + pipe_resource_reference(&dst->info.blit.dst.resource, NULL); + pipe_resource_reference(&dst->info.blit.src.resource, NULL); + break; + case CALL_FLUSH_RESOURCE: + pipe_resource_reference(&dst->info.flush_resource, NULL); + break; + case CALL_CLEAR: + break; + case CALL_CLEAR_BUFFER: + pipe_resource_reference(&dst->info.clear_buffer.res, NULL); + break; + case CALL_CLEAR_RENDER_TARGET: + break; + case CALL_CLEAR_DEPTH_STENCIL: + break; + case CALL_GENERATE_MIPMAP: + pipe_resource_reference(&dst->info.generate_mipmap.res, NULL); + break; + } +} + +static void +dd_copy_call(struct dd_call *dst, struct dd_call *src) +{ + dst->type = src->type; + + switch (src->type) { + case CALL_DRAW_VBO: + pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, + src->info.draw_vbo.count_from_stream_output); + pipe_resource_reference(&dst->info.draw_vbo.indirect, + src->info.draw_vbo.indirect); + pipe_resource_reference(&dst->info.draw_vbo.indirect_params, + src->info.draw_vbo.indirect_params); + dst->info.draw_vbo = src->info.draw_vbo; + break; + case CALL_LAUNCH_GRID: + pipe_resource_reference(&dst->info.launch_grid.indirect, + src->info.launch_grid.indirect); + dst->info.launch_grid = src->info.launch_grid; + break; + case CALL_RESOURCE_COPY_REGION: + pipe_resource_reference(&dst->info.resource_copy_region.dst, + src->info.resource_copy_region.dst); + pipe_resource_reference(&dst->info.resource_copy_region.src, + src->info.resource_copy_region.src); + dst->info.resource_copy_region = src->info.resource_copy_region; + break; + case CALL_BLIT: + pipe_resource_reference(&dst->info.blit.dst.resource, + src->info.blit.dst.resource); + pipe_resource_reference(&dst->info.blit.src.resource, + src->info.blit.src.resource); + dst->info.blit = src->info.blit; + break; + case CALL_FLUSH_RESOURCE: + pipe_resource_reference(&dst->info.flush_resource, + src->info.flush_resource); + break; + case CALL_CLEAR: + dst->info.clear = src->info.clear; + break; + case CALL_CLEAR_BUFFER: + pipe_resource_reference(&dst->info.clear_buffer.res, + src->info.clear_buffer.res); + dst->info.clear_buffer = src->info.clear_buffer; + break; + case CALL_CLEAR_RENDER_TARGET: + break; + case CALL_CLEAR_DEPTH_STENCIL: + break; + case CALL_GENERATE_MIPMAP: + pipe_resource_reference(&dst->info.generate_mipmap.res, + src->info.generate_mipmap.res); + dst->info.generate_mipmap = src->info.generate_mipmap; + break; + } +} + +static void +dd_init_copy_of_draw_state(struct dd_draw_state_copy *state) +{ + unsigned i,j; + + /* Just clear pointers to gallium objects. Don't clear the whole structure, + * because it would kill performance with its size of 130 KB. + */ + memset(&state->base.index_buffer, 0, + sizeof(state->base.index_buffer)); + memset(state->base.vertex_buffers, 0, + sizeof(state->base.vertex_buffers)); + memset(state->base.so_targets, 0, + sizeof(state->base.so_targets)); + memset(state->base.constant_buffers, 0, + sizeof(state->base.constant_buffers)); + memset(state->base.sampler_views, 0, + sizeof(state->base.sampler_views)); + memset(state->base.shader_images, 0, + sizeof(state->base.shader_images)); + memset(state->base.shader_buffers, 0, + sizeof(state->base.shader_buffers)); + memset(&state->base.framebuffer_state, 0, + sizeof(state->base.framebuffer_state)); + + memset(state->shaders, 0, sizeof(state->shaders)); + + state->base.render_cond.query = &state->render_cond; + + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + state->base.shaders[i] = &state->shaders[i]; + for (j = 0; j < PIPE_MAX_SAMPLERS; j++) + state->base.sampler_states[i][j] = &state->sampler_states[i][j]; + } + + state->base.velems = &state->velems; + state->base.rs = &state->rs; + state->base.dsa = &state->dsa; + state->base.blend = &state->blend; +} + +static void +dd_unreference_copy_of_draw_state(struct dd_draw_state_copy *state) +{ + struct dd_draw_state *dst = &state->base; + unsigned i,j; + + util_set_index_buffer(&dst->index_buffer, NULL); + + for (i = 0; i < ARRAY_SIZE(dst->vertex_buffers); i++) + pipe_resource_reference(&dst->vertex_buffers[i].buffer, NULL); + for (i = 0; i < ARRAY_SIZE(dst->so_targets); i++) + pipe_so_target_reference(&dst->so_targets[i], NULL); + + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + if (dst->shaders[i]) + tgsi_free_tokens(dst->shaders[i]->state.shader.tokens); + + for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) + pipe_resource_reference(&dst->constant_buffers[i][j].buffer, NULL); + for (j = 0; j < PIPE_MAX_SAMPLERS; j++) + pipe_sampler_view_reference(&dst->sampler_views[i][j], NULL); + for (j = 0; j < PIPE_MAX_SHADER_IMAGES; j++) + pipe_resource_reference(&dst->shader_images[i][j].resource, NULL); + for (j = 0; j < PIPE_MAX_SHADER_BUFFERS; j++) + pipe_resource_reference(&dst->shader_buffers[i][j].buffer, NULL); + } + + util_unreference_framebuffer_state(&dst->framebuffer_state); +} + +static void +dd_copy_draw_state(struct dd_draw_state *dst, struct dd_draw_state *src) +{ + unsigned i,j; + + if (src->render_cond.query) { + *dst->render_cond.query = *src->render_cond.query; + dst->render_cond.condition = src->render_cond.condition; + dst->render_cond.mode = src->render_cond.mode; + } else { + dst->render_cond.query = NULL; + } + + util_set_index_buffer(&dst->index_buffer, &src->index_buffer); + + for (i = 0; i < ARRAY_SIZE(src->vertex_buffers); i++) { + pipe_resource_reference(&dst->vertex_buffers[i].buffer, + src->vertex_buffers[i].buffer); + memcpy(&dst->vertex_buffers[i], &src->vertex_buffers[i], + sizeof(src->vertex_buffers[i])); + } + + dst->num_so_targets = src->num_so_targets; + for (i = 0; i < ARRAY_SIZE(src->so_targets); i++) + pipe_so_target_reference(&dst->so_targets[i], src->so_targets[i]); + memcpy(dst->so_offsets, src->so_offsets, sizeof(src->so_offsets)); + + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + if (!src->shaders[i]) { + dst->shaders[i] = NULL; + continue; + } + + if (src->shaders[i]) { + dst->shaders[i]->state.shader = src->shaders[i]->state.shader; + dst->shaders[i]->state.shader.tokens = + tgsi_dup_tokens(src->shaders[i]->state.shader.tokens); + } else { + dst->shaders[i] = NULL; + } + + for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) { + pipe_resource_reference(&dst->constant_buffers[i][j].buffer, + src->constant_buffers[i][j].buffer); + memcpy(&dst->constant_buffers[i][j], &src->constant_buffers[i][j], + sizeof(src->constant_buffers[i][j])); + } + + for (j = 0; j < PIPE_MAX_SAMPLERS; j++) { + pipe_sampler_view_reference(&dst->sampler_views[i][j], + src->sampler_views[i][j]); + if (src->sampler_states[i][j]) + dst->sampler_states[i][j]->state.sampler = + src->sampler_states[i][j]->state.sampler; + else + dst->sampler_states[i][j] = NULL; + } + /* TODO: shader buffers & images */ + } + + if (src->velems) + dst->velems->state.velems = src->velems->state.velems; + else + dst->velems = NULL; + + if (src->rs) + dst->rs->state.rs = src->rs->state.rs; + else + dst->rs = NULL; + + if (src->dsa) + dst->dsa->state.dsa = src->dsa->state.dsa; + else + dst->dsa = NULL; + + if (src->blend) + dst->blend->state.blend = src->blend->state.blend; + else + dst->blend = NULL; + + dst->blend_color = src->blend_color; + dst->stencil_ref = src->stencil_ref; + dst->sample_mask = src->sample_mask; + dst->min_samples = src->min_samples; + dst->clip_state = src->clip_state; + util_copy_framebuffer_state(&dst->framebuffer_state, &src->framebuffer_state); + memcpy(dst->scissors, src->scissors, sizeof(src->scissors)); + memcpy(dst->viewports, src->viewports, sizeof(src->viewports)); + memcpy(dst->tess_default_levels, src->tess_default_levels, + sizeof(src->tess_default_levels)); + dst->apitrace_call_number = src->apitrace_call_number; +} + +static void +dd_free_record(struct dd_draw_record **record) +{ + struct dd_draw_record *next = (*record)->next; + + dd_unreference_copy_of_call(&(*record)->call); + dd_unreference_copy_of_draw_state(&(*record)->draw_state); + FREE((*record)->driver_state_log); + FREE(*record); + *record = next; +} + +static void +dd_dump_record(struct dd_context *dctx, struct dd_draw_record *record, + uint32_t hw_sequence_no, int64_t now) +{ + FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), + record->draw_state.base.apitrace_call_number); + if (!f) + return; + + fprintf(f, "Draw call sequence # = %u\n", record->sequence_no); + fprintf(f, "HW reached sequence # = %u\n", hw_sequence_no); + fprintf(f, "Elapsed time = %"PRIi64" ms\n\n", + (now - record->timestamp) / 1000); + + dd_dump_call(f, &record->draw_state.base, &record->call); + fprintf(f, "%s\n", record->driver_state_log); + + dctx->pipe->dump_debug_state(dctx->pipe, f, + PIPE_DUMP_DEVICE_STATUS_REGISTERS); + dd_dump_dmesg(f); + fclose(f); +} + +PIPE_THREAD_ROUTINE(dd_thread_pipelined_hang_detect, input) +{ + struct dd_context *dctx = (struct dd_context *)input; + struct dd_screen *dscreen = dd_screen(dctx->base.screen); + + pipe_mutex_lock(dctx->mutex); + + while (!dctx->kill_thread) { + struct dd_draw_record **record = &dctx->records; + + /* Loop over all records. */ + while (*record) { + int64_t now; + + /* If the fence has been signalled, release the record and all older + * records. + */ + if (*dctx->mapped_fence >= (*record)->sequence_no) { + while (*record) + dd_free_record(record); + break; + } + + /* The fence hasn't been signalled. Check the timeout. */ + now = os_time_get(); + if (os_time_timeout((*record)->timestamp, + (*record)->timestamp + dscreen->timeout_ms * 1000, + now)) { + fprintf(stderr, "GPU hang detected.\n"); + + /* Get the oldest unsignalled draw call. */ + while ((*record)->next && + *dctx->mapped_fence < (*record)->next->sequence_no) + record = &(*record)->next; + + dd_dump_record(dctx, *record, *dctx->mapped_fence, now); + dd_kill_process(); + } + + record = &(*record)->next; + } + + /* Unlock and sleep before starting all over again. */ + pipe_mutex_unlock(dctx->mutex); + os_time_sleep(10000); /* 10 ms */ + pipe_mutex_lock(dctx->mutex); + } + + /* Thread termination. */ + while (dctx->records) + dd_free_record(&dctx->records); + + pipe_mutex_unlock(dctx->mutex); + return 0; +} + +static char * +dd_get_driver_shader_log(struct dd_context *dctx) +{ + FILE *f; + char *buf; + int written_bytes; + + if (!dctx->max_log_buffer_size) + dctx->max_log_buffer_size = 16 * 1024; + + /* Keep increasing the buffer size until there is enough space. + * + * open_memstream can resize automatically, but it's VERY SLOW. + * fmemopen is much faster. + */ + while (1) { + buf = malloc(dctx->max_log_buffer_size); + buf[0] = 0; + + f = fmemopen(buf, dctx->max_log_buffer_size, "a"); + if (!f) { + free(buf); + return NULL; + } + + dd_dump_driver_state(dctx, f, PIPE_DUMP_CURRENT_SHADERS); + written_bytes = ftell(f); + fclose(f); + + /* Return if the backing buffer is large enough. */ + if (written_bytes < dctx->max_log_buffer_size - 1) + break; + + /* Try again. */ + free(buf); + dctx->max_log_buffer_size *= 2; + } + + return buf; +} + +static void +dd_pipelined_process_draw(struct dd_context *dctx, struct dd_call *call) +{ + struct pipe_context *pipe = dctx->pipe; + struct dd_draw_record *record; + char *log; + + /* Make a record of the draw call. */ + record = MALLOC_STRUCT(dd_draw_record); + if (!record) + return; + + /* Create the log. */ + log = dd_get_driver_shader_log(dctx); + if (!log) { + FREE(record); + return; + } + + /* Update the fence with the GPU. + * + * radeonsi/clear_buffer waits in the command processor until shaders are + * idle before writing to memory. That's a necessary condition for isolating + * draw calls. + */ + dctx->sequence_no++; + pipe->clear_buffer(pipe, dctx->fence, 0, 4, &dctx->sequence_no, 4); + + /* Initialize the record. */ + record->timestamp = os_time_get(); + record->sequence_no = dctx->sequence_no; + record->driver_state_log = log; + + memset(&record->call, 0, sizeof(record->call)); + dd_copy_call(&record->call, call); + + dd_init_copy_of_draw_state(&record->draw_state); + dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state); + + /* Add the record to the list. */ + pipe_mutex_lock(dctx->mutex); + record->next = dctx->records; + dctx->records = record; + pipe_mutex_unlock(dctx->mutex); +} + +static void dd_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, unsigned flags) { @@ -581,6 +1026,7 @@ dd_context_flush(struct pipe_context *_pipe, dd_flush_and_handle_hang(dctx, fence, flags, "GPU hang detected in pipe->flush()"); break; + case DD_DETECT_HANGS_PIPELINED: /* nothing to do here */ case DD_DUMP_ALL_CALLS: case DD_DUMP_APITRACE_CALL: pipe->flush(pipe, fence, flags); @@ -625,6 +1071,9 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call) dd_kill_process(); } break; + case DD_DETECT_HANGS_PIPELINED: + dd_pipelined_process_draw(dctx, call); + break; case DD_DUMP_ALL_CALLS: if (!dscreen->no_flush) pipe->flush(pipe, NULL, 0); diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index 5ba1b7a6d75..08cd1e3b9f1 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -32,9 +32,11 @@ #include "pipe/p_state.h" #include "pipe/p_screen.h" #include "dd_util.h" +#include "os/os_thread.h" enum dd_mode { DD_DETECT_HANGS, + DD_DETECT_HANGS_PIPELINED, DD_DUMP_ALL_CALLS, DD_DUMP_APITRACE_CALL, }; @@ -181,6 +183,33 @@ struct dd_draw_state unsigned apitrace_call_number; }; +struct dd_draw_state_copy +{ + struct dd_draw_state base; + + /* dd_draw_state_copy does not reference real CSOs. Instead, it points to + * these variables, which serve as storage. + */ + struct dd_query render_cond; + struct dd_state shaders[PIPE_SHADER_TYPES]; + struct dd_state sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + struct dd_state velems; + struct dd_state rs; + struct dd_state dsa; + struct dd_state blend; +}; + +struct dd_draw_record { + struct dd_draw_record *next; + + int64_t timestamp; + uint32_t sequence_no; + + struct dd_call call; + struct dd_draw_state_copy draw_state; + char *driver_state_log; +}; + struct dd_context { struct pipe_context base; @@ -188,6 +217,32 @@ struct dd_context struct dd_draw_state draw_state; unsigned num_draw_calls; + + /* Pipelined hang detection. + * + * This is without unnecessary flushes and waits. There is a memory-based + * fence that is incremented by clear_buffer every draw call. Driver fences + * are not used. + * + * After each draw call, a new dd_draw_record is created that contains + * a copy of all states, the output of pipe_context::dump_debug_state, + * and it has a fence number assigned. That's done without knowing whether + * that draw call is problematic or not. The record is added into the list + * of all records. + * + * An independent, separate thread loops over the list of records and checks + * their fences. Records with signalled fences are freed. On fence timeout, + * the thread dumps the record of the oldest unsignalled fence. + */ + pipe_thread thread; + pipe_mutex mutex; + int kill_thread; + struct pipe_resource *fence; + struct pipe_transfer *fence_transfer; + uint32_t *mapped_fence; + uint32_t sequence_no; + struct dd_draw_record *records; + int max_log_buffer_size; }; @@ -196,6 +251,7 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe); void dd_init_draw_functions(struct dd_context *dctx); +PIPE_THREAD_ROUTINE(dd_thread_pipelined_hang_detect, input); static inline struct dd_context * diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c index 46869ab63f2..412ea365ce2 100644 --- a/src/gallium/drivers/ddebug/dd_screen.c +++ b/src/gallium/drivers/ddebug/dd_screen.c @@ -314,6 +314,11 @@ ddebug_screen_create(struct pipe_screen *screen) puts(" fence timeout and dump context and driver information into"); puts(" $HOME/"DD_DIR"/ when a hang is detected."); puts(""); + puts(" GALLIUM_DDEBUG=\"pipelined [timeout in ms] [verbose]\""); + puts(" Detect a device hang after every draw call based on the given fence"); + puts(" timeout without flushes and dump context and driver information into"); + puts(" $HOME/"DD_DIR"/ when a hang is detected."); + puts(""); puts(" GALLIUM_DDEBUG=\"apitrace [call#] [verbose]\""); puts(" Dump apitrace draw call information into $HOME/"DD_DIR"/. Implies 'noflush'."); puts(""); @@ -337,6 +342,11 @@ ddebug_screen_create(struct pipe_screen *screen) if (sscanf(option+8, "%u", &apitrace_dump_call) != 1) return screen; + } else if (!strncmp(option, "pipelined", 8)) { + mode = DD_DETECT_HANGS_PIPELINED; + + if (sscanf(option+10, "%u", &timeout) != 1) + return screen; } else { mode = DD_DETECT_HANGS; @@ -392,6 +402,7 @@ ddebug_screen_create(struct pipe_screen *screen) fprintf(stderr, "Gallium debugger active. Logging all calls.\n"); break; case DD_DETECT_HANGS: + case DD_DETECT_HANGS_PIPELINED: fprintf(stderr, "Gallium debugger active. " "The hang detection timeout is %i ms.\n", timeout); break; |