summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-04-05 17:54:39 -0400
committerMarek Olšák <[email protected]>2018-04-13 14:07:20 -0400
commit918b798668c5465d85ca542423e4cf525dc79b31 (patch)
tree98826b6f07a0e1f6612a6eb1abc7876c53e5a471 /src/gallium
parentb6ad7075b93bcea157eb74fc3129d61b1fb2a5ca (diff)
radeonsi: make sure CP DMA is idle at the end of IBs
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c12
-rw-r--r--src/gallium/drivers/radeonsi/si_gfx_cs.c5
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
3 files changed, 16 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 358b33c4eb1..b316637d94b 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -65,7 +65,6 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
struct radeon_winsys_cs *cs = sctx->gfx_cs;
uint32_t header = 0, command = 0;
- assert(size);
assert(size <= cp_dma_max_byte_count(sctx));
if (sctx->chip_class >= GFX9)
@@ -128,6 +127,17 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
}
}
+void si_cp_dma_wait_for_idle(struct si_context *sctx)
+{
+ /* Issue a dummy DMA that copies zero bytes.
+ *
+ * The DMA engine will see that there's no work to do and skip this
+ * DMA request, however, the CP will see the sync flag and still wait
+ * for all DMAs to complete.
+ */
+ si_emit_cp_dma(sctx, 0, 0, 0, CP_DMA_SYNC, SI_COHERENCY_NONE);
+}
+
static unsigned get_flush_flags(struct si_context *sctx, enum si_coherency coher)
{
switch (coher) {
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index f99bc324c98..2d5e510b19e 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -111,7 +111,10 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
ctx->flags |= SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_INV_VMEM_L1;
- si_emit_cache_flush(ctx);
+ /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
+ * because the kernel doesn't wait for it. */
+ if (ctx->chip_class >= CIK)
+ si_cp_dma_wait_for_idle(ctx);
if (ctx->current_saved_cs) {
si_trace_emit(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index c7ad5366a68..3a2f7ca11d1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -902,6 +902,7 @@ enum si_coherency {
SI_COHERENCY_CB_META,
};
+void si_cp_dma_wait_for_idle(struct si_context *sctx);
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
enum si_coherency coher);