diff options
author | Marek Olšák <[email protected]> | 2019-02-25 22:53:37 -0500 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-05-16 13:13:36 -0400 |
commit | 04122532e3c06260ae889a4f6a28d6f9849b00f5 (patch) | |
tree | 4efe14a9881ff4d9377eac6a99e5a42a4a6c3655 /src/gallium/drivers | |
parent | 9f505ce21d675b102cb2c89ac1ab2f03d6680b22 (diff) |
radeonsi: invalidate caches at the beginning of the prim discard compute IB
Acked-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 19 |
3 files changed, 23 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 8261311f74a..362c63c2e44 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -1196,6 +1196,17 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, } /* 2) IB initialization. */ + + /* This needs to be done at the beginning of IBs due to possible + * TTM buffer moves in the kernel. + */ + si_emit_surface_sync(sctx, cs, + S_0085F0_TC_ACTION_ENA(1) | + S_0085F0_TCL1_ACTION_ENA(1) | + S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8) | + S_0085F0_SH_ICACHE_ACTION_ENA(1) | + S_0085F0_SH_KCACHE_ACTION_ENA(1)); + /* Restore the GDS prim restart counter if needed. */ if (sctx->preserve_prim_restart_gds_at_flush) { si_cp_copy_data(sctx, cs, diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 05e974d4c12..66a20241446 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -604,6 +604,8 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_vs_prolog_bits *prolog_key); /* si_state_draw.c */ +void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, + unsigned cp_coher_cntl); void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx); void si_emit_cache_flush(struct si_context *sctx); void si_trace_emit(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 2c571016ada..d7de37b33ff 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -889,12 +889,13 @@ static void si_emit_draw_packets(struct si_context *sctx, } } -static void si_emit_surface_sync(struct si_context *sctx, - unsigned cp_coher_cntl) +void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, + unsigned cp_coher_cntl) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + bool compute_ib = !sctx->has_graphics || + cs == sctx->prim_discard_compute_cs; - if (sctx->chip_class >= GFX9 || !sctx->has_graphics) { + if (sctx->chip_class >= GFX9 || compute_ib) { /* Flush caches and wait for the caches to assert idle. */ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0)); radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ @@ -914,7 +915,7 @@ static void si_emit_surface_sync(struct si_context *sctx, /* ACQUIRE_MEM has an implicit context roll if the current context * is busy. */ - if (sctx->has_graphics) + if (!compute_ib) sctx->context_roll = true; } @@ -1162,7 +1163,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) * WB must be set on GFX8+ when TC_ACTION is set. */ - si_emit_surface_sync(sctx, cp_coher_cntl | + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); @@ -1179,7 +1180,7 @@ void si_emit_cache_flush(struct si_context *sctx) * * WB doesn't work without NC. */ - si_emit_surface_sync(sctx, cp_coher_cntl | + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; @@ -1187,7 +1188,7 @@ void si_emit_cache_flush(struct si_context *sctx) } if (flags & SI_CONTEXT_INV_VMEM_L1) { /* Invalidate per-CU VMEM L1. */ - si_emit_surface_sync(sctx, cp_coher_cntl | + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; } @@ -1195,7 +1196,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* If TC flushes haven't cleared this... */ if (cp_coher_cntl) - si_emit_surface_sync(sctx, cp_coher_cntl); + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl); if (is_barrier) si_prim_discard_signal_next_compute_ib_start(sctx); |