diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/r600_hw_context.c | 58 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_pipe.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_pipe.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_pm4.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/sid.h | 14 |
6 files changed, 124 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c index bd348f9fe14..0975a1fe978 100644 --- a/src/gallium/drivers/radeonsi/r600_hw_context.c +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c @@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, /* Save 16 dwords for the fence mechanism. */ num_dw += 16; +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + num_dw += R600_TRACE_CS_DWORDS; + } +#endif + /* Flush if there's not enough space. */ if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); @@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx, unsigned flags) /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + struct r600_screen *rscreen = ctx->screen; + unsigned i; + + for (i = 0; i < cs->cdw; i++) { + fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]); + } + rscreen->cs_count++; + } +#endif + /* Flush the CS. */ ctx->ws->cs_flush(ctx->cs, flags); +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + struct r600_screen *rscreen = ctx->screen; + unsigned i; + + for (i = 0; i < 10; i++) { + usleep(5); + if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) { + break; + } + } + if (i == 10) { + fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n", + rscreen->trace_ptr[1], rscreen->trace_ptr[0]); + } else { + fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5); + } + } +#endif + ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; @@ -665,3 +703,23 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ); } + +#if R600_TRACE_CS +void r600_trace_emit(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; + struct radeon_winsys_cs *cs = rctx->cs; + uint64_t va; + + va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo); + r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE); + cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0); + cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | + PKT3_WRITE_DATA_WR_CONFIRM | + PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME); + cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL; + cs->buf[cs->cdw++] = cs->cdw; + cs->buf[cs->cdw++] = rscreen->cs_count; +} +#endif diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index 672017a4ec7..0d243097839 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -522,6 +522,14 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf); si_resource_reference(&rscreen->fences.bo, NULL); } + +#if R600_TRACE_CS + if (rscreen->trace_bo) { + rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf); + pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL); + } +#endif + pipe_mutex_destroy(rscreen->fences.mutex); rscreen->ws->destroy(rscreen->ws); @@ -724,5 +732,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) LIST_INITHEAD(&rscreen->fences.blocks); pipe_mutex_init(rscreen->fences.mutex); +#if R600_TRACE_CS + rscreen->cs_count = 0; + if (rscreen->info.drm_minor >= 28) { + rscreen->trace_bo = (struct si_resource*)pipe_buffer_create(&rscreen->screen, + PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, + 4096); + if (rscreen->trace_bo) { + rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, + PIPE_TRANSFER_UNSYNCHRONIZED); + } + } +#endif + return &rscreen->screen; } diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index d0f04f40562..40a5c8c3d90 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -47,6 +47,9 @@ #define R600_BIG_ENDIAN 0 #endif +#define R600_TRACE_CS 0 +#define R600_TRACE_CS_DWORDS 6 + struct r600_pipe_fences { struct si_resource *bo; unsigned *data; @@ -67,6 +70,11 @@ struct r600_screen { struct r600_tiling_info tiling_info; struct util_slab_mempool pool_buffers; struct r600_pipe_fences fences; +#if R600_TRACE_CS + struct si_resource *trace_bo; + uint32_t *trace_ptr; + unsigned cs_count; +#endif }; struct si_pipe_sampler_view { @@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct r600_context *r600, struct pipe_index_buffer *ib, unsigned count); +#if R600_TRACE_CS +void r600_trace_emit(struct r600_context *rctx); +#endif + /* * common helpers */ diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c b/src/gallium/drivers/radeonsi/radeonsi_pm4.c index 79a2521f339..8e01738253e 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.c @@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx) continue; count += state->ndw; +#if R600_TRACE_CS + /* for tracing each states */ + if (rctx->screen->trace_bo) { + count += R600_TRACE_CS_DWORDS; + } +#endif } return count; @@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state) } cs->cdw += state->ndw; + +#if R600_TRACE_CS + if (rctx->screen->trace_bo) { + r600_trace_emit(rctx); + } +#endif } void si_pm4_emit_dirty(struct r600_context *rctx) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index a78751bf802..1e1d1cc6d6b 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_pm4_emit_dirty(rctx); rctx->pm4_dirty_cdwords = 0; +#if R600_TRACE_CS + if (rctx->screen->trace_bo) { + r600_trace_emit(rctx); + } +#endif + #if 0 /* Enable stream out if needed. */ if (rctx->streamout_start) { @@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } #endif - rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY; /* Set the depth buffer as dirty. */ diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 57553a69be2..8528981ab79 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -77,6 +77,20 @@ #define PKT3_DRAW_INDEX_IMMD 0x2E #define PKT3_NUM_INSTANCES 0x2F #define PKT3_STRMOUT_BUFFER_UPDATE 0x34 +#define PKT3_WRITE_DATA 0x37 +#define PKT3_WRITE_DATA_DST_SEL(x) ((x) << 8) +#define PKT3_WRITE_DATA_DST_SEL_REG 0 +#define PKT3_WRITE_DATA_DST_SEL_MEM_SYNC 1 +#define PKT3_WRITE_DATA_DST_SEL_TC_OR_L2 2 +#define PKT3_WRITE_DATA_DST_SEL_GDS 3 +#define PKT3_WRITE_DATA_DST_SEL_RESERVED_4 4 +#define PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC 5 +#define PKT3_WR_ONE_ADDR (1 << 16) +#define PKT3_WRITE_DATA_WR_CONFIRM (1 << 20) +#define PKT3_WRITE_DATA_ENGINE_SEL(x) ((x) << 30) +#define PKT3_WRITE_DATA_ENGINE_SEL_ME 0 +#define PKT3_WRITE_DATA_ENGINE_SEL_PFP 1 +#define PKT3_WRITE_DATA_ENGINE_SEL_CE 2 #define PKT3_MEM_SEMAPHORE 0x39 #define PKT3_MPEG_INDEX 0x3A #define PKT3_WAIT_REG_MEM 0x3C |