diff options
-rw-r--r-- | src/amd/vulkan/radv_private.h | 18 | ||||
-rw-r--r-- | src/amd/vulkan/si_cmd_buffer.c | 70 |
2 files changed, 54 insertions, 34 deletions
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 0e629fcf836..30201a67e73 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -587,16 +587,18 @@ enum radv_cmd_flush_bits { RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2, /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3, + /* Same as above, but only writes back and doesn't invalidate */ + RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4, /* Framebuffer caches */ - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4, - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5, - RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6, - RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7, + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, + RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, + RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, /* Engine synchronization. */ - RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8, - RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9, - RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10, - RADV_CMD_FLAG_VGT_FLUSH = 1 << 11, + RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, + RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, + RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, + RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 4709ef69a02..5d35287f8e3 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -689,6 +689,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, } +static void +si_emit_acquire_mem(struct radeon_winsys_cs *cs, + bool is_mec, + unsigned cp_coher_cntl) +{ + if (is_mec) { + radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | + PKT3_SHADER_TYPE_S(1)); + radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */ + radeon_emit(cs, 0); /* CP_COHER_BASE */ + radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ + radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ + } else { + /* ACQUIRE_MEM is only required on a compute ring. */ + radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); + radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cs, 0); /* CP_COHER_BASE */ + radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ + } +} + void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, enum chip_class chip_class, @@ -701,13 +725,6 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) - cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); - if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { - cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); - if (chip_class >= VI) - cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); - } if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | @@ -778,28 +795,29 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, radeon_emit(cs, 0); } + if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) || + (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { + cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); + if (chip_class >= VI) + cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); + } else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) { + cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) | + S_0301F0_TC_NC_ACTION_ENA(1); + + /* L2 writeback doesn't combine with L1 invalidate */ + si_emit_acquire_mem(cs, is_mec, cp_coher_cntl); + + cp_coher_cntl = 0; + } + + if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) + cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); + /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle. * Therefore, it should be last. Done in PFP. */ - if (cp_coher_cntl) { - if (is_mec) { - radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */ - radeon_emit(cs, 0); /* CP_COHER_BASE */ - radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ - radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ - } else { - /* ACQUIRE_MEM is only required on a compute ring. */ - radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); - radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cs, 0); /* CP_COHER_BASE */ - radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ - } - } + if (cp_coher_cntl) + si_emit_acquire_mem(cs, is_mec, cp_coher_cntl); } void |