diff options
author | Bas Nieuwenhuizen <[email protected]> | 2016-12-12 08:38:00 +0100 |
---|---|---|
committer | Bas Nieuwenhuizen <[email protected]> | 2016-12-18 20:52:26 +0100 |
commit | b3499557a2b83269602c021c5c250e448c2aae93 (patch) | |
tree | bf05cac914efa4572aba4e42318ff98dc55265ef | |
parent | 72aaa83f4b5ba193cd4570da610893cd7b054332 (diff) |
radv: Implement cache flushing for the MEC.
Signed-off-by: Bas Nieuwenhuizen <[email protected]>
Reviewed-by: Dave Airlie <[email protected]>
-rw-r--r-- | src/amd/vulkan/si_cmd_buffer.c | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 5ac2a148096..4b2624cb8e2 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -601,6 +601,16 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) { enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class; unsigned cp_coher_cntl = 0; + bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; + + if (is_compute) + cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | + RADV_CMD_FLAG_FLUSH_AND_INV_DB | + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | + RADV_CMD_FLAG_VS_PARTIAL_FLUSH | + RADV_CMD_FLAG_VGT_FLUSH); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); @@ -679,7 +689,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ - if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { + if ((cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) && + !radv_cmd_buffer_uses_mec(cmd_buffer)) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cmd_buffer->cs, 0); } @@ -688,12 +699,23 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) * Therefore, it should be last. Done in PFP. */ if (cp_coher_cntl) { - /* ACQUIRE_MEM is only required on a compute ring. */ - radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); - radeon_emit(cmd_buffer->cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cmd_buffer->cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE */ - radeon_emit(cmd_buffer->cs, 0x0000000A); /* POLL_INTERVAL */ + if (radv_cmd_buffer_uses_mec(cmd_buffer)) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | + PKT3_SHADER_TYPE_S(1)); + radeon_emit(cmd_buffer->cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cmd_buffer->cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cmd_buffer->cs, 0xff); /* CP_COHER_SIZE_HI */ + radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE */ + radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE_HI */ + radeon_emit(cmd_buffer->cs, 0x0000000A); /* POLL_INTERVAL */ + } else { + /* ACQUIRE_MEM is only required on a compute ring. */ + radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); + radeon_emit(cmd_buffer->cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cmd_buffer->cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cmd_buffer->cs, 0); /* CP_COHER_BASE */ + radeon_emit(cmd_buffer->cs, 0x0000000A); /* POLL_INTERVAL */ + } } cmd_buffer->state.flush_bits = 0; |