summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/vulkan/radv_private.h18
-rw-r--r--src/amd/vulkan/si_cmd_buffer.c70
2 files changed, 54 insertions, 34 deletions
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 0e629fcf836..30201a67e73 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -587,16 +587,18 @@ enum radv_cmd_flush_bits {
RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
+ /* Same as above, but only writes back and doesn't invalidate */
+ RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
/* Framebuffer caches */
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6,
- RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
/* Engine synchronization. */
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8,
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9,
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10,
- RADV_CMD_FLAG_VGT_FLUSH = 1 << 11,
+ RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
+ RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 4709ef69a02..5d35287f8e3 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -689,6 +689,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
}
+static void
+si_emit_acquire_mem(struct radeon_winsys_cs *cs,
+ bool is_mec,
+ unsigned cp_coher_cntl)
+{
+ if (is_mec) {
+ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ } else {
+ /* ACQUIRE_MEM is only required on a compute ring. */
+ radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ }
+}
+
void
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
@@ -701,13 +725,6 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
- cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
- cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
- if (chip_class >= VI)
- cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
- }
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
@@ -778,28 +795,29 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
radeon_emit(cs, 0);
}
+ if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
+ (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
+ cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ if (chip_class >= VI)
+ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+ } else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
+ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
+ S_0301F0_TC_NC_ACTION_ENA(1);
+
+ /* L2 writeback doesn't combine with L1 invalidate */
+ si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
+
+ cp_coher_cntl = 0;
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
+ cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
+
/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
* Therefore, it should be last. Done in PFP.
*/
- if (cp_coher_cntl) {
- if (is_mec) {
- radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- } else {
- /* ACQUIRE_MEM is only required on a compute ring. */
- radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- }
- }
+ if (cp_coher_cntl)
+ si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
}
void