From 2c14a6d3b1c53d5814414ce9e91fd8d24c90b787 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 19 Aug 2015 11:53:25 +0200 Subject: radeonsi: add IB tracing support for debug contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds trace points to all IBs and the parser prints them and also prints which trace points were reached (executed) by the CP. This can help pinpoint a problematic packet, draw call, etc. Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_debug.c | 67 ++++++++++++++++++++++++++-- src/gallium/drivers/radeonsi/si_hw_context.c | 24 +++++++++- src/gallium/drivers/radeonsi/si_pipe.c | 2 + src/gallium/drivers/radeonsi/si_pipe.h | 9 +++- src/gallium/drivers/radeonsi/si_state_draw.c | 19 ++++---- 5 files changed, 105 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 72b7989e92c..cf09686c636 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, si_dump_reg(f, reg + i*4, ib[2+i], ~0); } -static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) +static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, + int trace_id) { unsigned count = PKT_COUNT_G(ib[0]); unsigned op = PKT3_IT_OPCODE_G(ib[0]); @@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) if (ib[0] == 0xffff1000) { count = -1; /* One dword NOP. */ break; + } else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) { + unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]); + + print_spaces(f, INDENT_PKT); + fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id); + + if (trace_id == -1) + break; /* tracing was disabled */ + + print_spaces(f, INDENT_PKT); + if (packet_id < trace_id) + fprintf(f, COLOR_RED + "This trace point was reached by the CP." + COLOR_RESET "\n"); + else if (packet_id == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the last trace point that " + "was reached by the CP !!!!!" + COLOR_RESET "\n"); + else if (packet_id+1 == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the first trace point that " + "was NOT been reached by the CP !!!!!" + COLOR_RESET "\n"); + else + fprintf(f, COLOR_RED + "!!!!! This trace point was NOT reached " + "by the CP !!!!!" + COLOR_RESET "\n"); + break; } /* fall through, print all dwords */ default: @@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) return ib; } -static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) +/** + * Parse and print an IB into a file. + * + * \param f file + * \param ib IB + * \param num_dw size of the IB + * \param chip_class chip class + * \param trace_id the last trace ID that is known to have been reached + * and executed by the CP, typically read from a buffer + */ +static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id) { fprintf(f, "------------------ IB begin ------------------\n"); @@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) switch (type) { case 3: - ib = si_parse_packet3(f, ib, &num_dw); + ib = si_parse_packet3(f, ib, &num_dw, trace_id); break; case 2: /* type-2 nop */ @@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->ps_shader, "Fragment", f); if (sctx->last_ib) { - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size); + int last_trace_id = -1; + + if (sctx->last_trace_buf) { + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = + sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) + last_trace_id = *map; + } + + si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size, + last_trace_id); free(sctx->last_ib); /* dump only once */ sctx->last_ib = NULL; + r600_resource_reference(&sctx->last_trace_buf, NULL); } fprintf(f, "Done.\n"); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index b726eb3cdd8..110e3163021 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, num_dw += ctx->atoms.s.cache_flush->num_dw; if (ctx->screen->b.trace_bo) - num_dw += SI_TRACE_CS_DWORDS; + num_dw += SI_TRACE_CS_DWORDS * 2; /* Flush if there's not enough space. */ if (num_dw > cs->max_dw) { @@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags, /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Save the IB for debug contexts. */ if (ctx->is_debug) { free(ctx->last_ib); ctx->last_ib_dw_size = cs->cdw; ctx->last_ib = malloc(cs->cdw * 4); memcpy(ctx->last_ib, cs->buf, cs->cdw * 4); + r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); + r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ @@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags, void si_begin_new_cs(struct si_context *ctx) { + if (ctx->is_debug) { + uint32_t zero = 0; + + /* Create a buffer used for writing trace IDs and initialize it to 0. */ + assert(!ctx->trace_buf); + ctx->trace_buf = (struct r600_resource*) + pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, 4); + if (ctx->trace_buf) + pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, + 0, sizeof(zero), &zero); + ctx->trace_id = 0; + } + + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | SI_CONTEXT_INV_TC_L1 | diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e5900b74806..92c6ae3de2b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context) LLVMDisposeTargetMachine(sctx->tm); #endif + r600_resource_reference(&sctx->trace_buf, NULL); + r600_resource_reference(&sctx->last_trace_buf, NULL); free(sctx->last_ib); FREE(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09a21ceb618..52167f24a95 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -43,7 +43,7 @@ #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 -#define SI_TRACE_CS_DWORDS 6 +#define SI_TRACE_CS_DWORDS 7 #define SI_MAX_DRAW_CS_DWORDS \ (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \ @@ -81,6 +81,10 @@ SI_CONTEXT_FLUSH_AND_INV_DB | \ SI_CONTEXT_FLUSH_AND_INV_DB_META) +#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) +#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) +#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) + struct si_compute; struct si_screen { @@ -247,6 +251,9 @@ struct si_context { bool is_debug; uint32_t *last_ib; unsigned last_ib_dw_size; + struct r600_resource *last_trace_buf; + struct r600_resource *trace_buf; + unsigned trace_id; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e56c9e70eca..b1aba1290d6 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_emit_draw_registers(sctx, info); si_emit_draw_packets(sctx, info, &ib); - if (sctx->screen->b.trace_bo) + if (sctx->trace_buf) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. @@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) void si_trace_emit(struct si_context *sctx) { - struct si_screen *sscreen = sctx->screen; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va; - va = sscreen->b.trace_bo->gpu_address; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo, + sctx->trace_id++; + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0)); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | PKT3_WRITE_DATA_WR_CONFIRM | PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); - radeon_emit(cs, va & 0xFFFFFFFFUL); - radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL); - radeon_emit(cs, cs->cdw); - radeon_emit(cs, sscreen->b.cs_count); + radeon_emit(cs, sctx->trace_buf->gpu_address); + radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); + radeon_emit(cs, sctx->trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id)); } -- cgit v1.2.3