aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-02-25 22:53:37 -0500
committerMarek Olšák <[email protected]>2019-05-16 13:13:36 -0400
commit04122532e3c06260ae889a4f6a28d6f9849b00f5 (patch)
tree4efe14a9881ff4d9377eac6a99e5a42a4a6c3655 /src/gallium/drivers
parent9f505ce21d675b102cb2c89ac1ab2f03d6680b22 (diff)
radeonsi: invalidate caches at the beginning of the prim discard compute IB
Acked-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeonsi/si_compute_prim_discard.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c19
3 files changed, 23 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index 8261311f74a..362c63c2e44 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -1196,6 +1196,17 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
}
/* 2) IB initialization. */
+
+ /* This needs to be done at the beginning of IBs due to possible
+ * TTM buffer moves in the kernel.
+ */
+ si_emit_surface_sync(sctx, cs,
+ S_0085F0_TC_ACTION_ENA(1) |
+ S_0085F0_TCL1_ACTION_ENA(1) |
+ S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8) |
+ S_0085F0_SH_ICACHE_ACTION_ENA(1) |
+ S_0085F0_SH_KCACHE_ACTION_ENA(1));
+
/* Restore the GDS prim restart counter if needed. */
if (sctx->preserve_prim_restart_gds_at_flush) {
si_cp_copy_data(sctx, cs,
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 05e974d4c12..66a20241446 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -604,6 +604,8 @@ void si_shader_selector_key_vs(struct si_context *sctx,
struct si_vs_prolog_bits *prolog_key);
/* si_state_draw.c */
+void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
+ unsigned cp_coher_cntl);
void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx);
void si_emit_cache_flush(struct si_context *sctx);
void si_trace_emit(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2c571016ada..d7de37b33ff 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -889,12 +889,13 @@ static void si_emit_draw_packets(struct si_context *sctx,
}
}
-static void si_emit_surface_sync(struct si_context *sctx,
- unsigned cp_coher_cntl)
+void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
+ unsigned cp_coher_cntl)
{
- struct radeon_cmdbuf *cs = sctx->gfx_cs;
+ bool compute_ib = !sctx->has_graphics ||
+ cs == sctx->prim_discard_compute_cs;
- if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
+ if (sctx->chip_class >= GFX9 || compute_ib) {
/* Flush caches and wait for the caches to assert idle. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
@@ -914,7 +915,7 @@ static void si_emit_surface_sync(struct si_context *sctx,
/* ACQUIRE_MEM has an implicit context roll if the current context
* is busy. */
- if (sctx->has_graphics)
+ if (!compute_ib)
sctx->context_roll = true;
}
@@ -1162,7 +1163,7 @@ void si_emit_cache_flush(struct si_context *sctx)
/* Invalidate L1 & L2. (L1 is always invalidated on GFX6)
* WB must be set on GFX8+ when TC_ACTION is set.
*/
- si_emit_surface_sync(sctx, cp_coher_cntl |
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8));
@@ -1179,7 +1180,7 @@ void si_emit_cache_flush(struct si_context *sctx)
*
* WB doesn't work without NC.
*/
- si_emit_surface_sync(sctx, cp_coher_cntl |
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
@@ -1187,7 +1188,7 @@ void si_emit_cache_flush(struct si_context *sctx)
}
if (flags & SI_CONTEXT_INV_VMEM_L1) {
/* Invalidate per-CU VMEM L1. */
- si_emit_surface_sync(sctx, cp_coher_cntl |
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
}
@@ -1195,7 +1196,7 @@ void si_emit_cache_flush(struct si_context *sctx)
/* If TC flushes haven't cleared this... */
if (cp_coher_cntl)
- si_emit_surface_sync(sctx, cp_coher_cntl);
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl);
if (is_barrier)
si_prim_discard_signal_next_compute_ib_start(sctx);