diff options
author | Marek Olšák <[email protected]> | 2016-05-26 22:00:03 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-06-04 15:42:33 +0200 |
commit | 5ea5ed60500a8612166853975b42abd40a459216 (patch) | |
tree | 5b9bc8656166ed7b359b3ceca87c54591a3bdb6e /src/gallium/drivers/r600/r600_hw_context.c | |
parent | ade16e1f5d046f6407c4f0046efb8363520adcf0 (diff) |
r600g: fix CP DMA hazard with index buffer fetches (v3)
v3: use PFP_SYNC_ME on EG-CM only when supported by the kernel,
otherwise use MEM_WRITE + WAIT_REG_MEM to emulate that
Reviewed-by: Alex Deucher <[email protected]>
Tested-by: Grazvydas Ignotas <[email protected]>
Tested-by: Dieter Nützel <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/r600_hw_context.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 69 |
1 files changed, 68 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 808bd27607f..3ba723d0541 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -364,6 +364,66 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; } +void r600_emit_pfp_sync_me(struct r600_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + + if (rctx->b.chip_class >= EVERGREEN && + rctx->b.screen->info.drm_minor >= 46) { + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cs, 0); + } else { + /* Emulate PFP_SYNC_ME by writing a value to memory in ME and + * waiting for it in PFP. + */ + struct r600_resource *buf = NULL; + unsigned offset, reloc; + uint64_t va; + + /* 16-byte address alignment is required by WAIT_REG_MEM. */ + u_suballocator_alloc(rctx->b.allocator_zeroed_memory, 4, 16, + &offset, (struct pipe_resource**)&buf); + if (!buf) { + /* This is too heavyweight, but will work. */ + rctx->b.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); + return; + } + + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, buf, + RADEON_USAGE_READWRITE, + RADEON_PRIO_FENCE); + + va = buf->gpu_address + offset; + assert(va % 16 == 0); + + /* Write 1 to memory in ME. */ + radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); + radeon_emit(cs, va); + radeon_emit(cs, ((va >> 32) & 0xff) | MEM_WRITE_32_BITS); + radeon_emit(cs, 1); + radeon_emit(cs, 0); + + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, reloc); + + /* Wait in PFP (PFP can only do GEQUAL against memory). */ + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_GEQUAL | + WAIT_REG_MEM_MEMORY | + WAIT_REG_MEM_PFP); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, 1); /* reference value */ + radeon_emit(cs, 0xffffffff); /* mask */ + radeon_emit(cs, 4); /* poll interval */ + + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, reloc); + + r600_resource_reference(&buf, NULL); + } +} + /* The max number of bytes to copy per packet. */ #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) @@ -407,7 +467,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + - 3, FALSE); + 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { @@ -447,6 +507,13 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_CP_DMA_IDLE(1)); + /* CP DMA is executed in ME, but index buffers are read by PFP. + * This ensures that ME (CP DMA) is idle before PFP starts fetching + * indices. If we wanted to execute CP DMA in PFP, this packet + * should precede it. + */ + r600_emit_pfp_sync_me(rctx); + /* Invalidate the read caches. */ rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | |