summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2018-06-28 12:21:18 +0200
committerSamuel Pitoiset <[email protected]>2018-07-05 11:31:06 +0200
commit8339ba827bf3f18463824206347665a45989b99e (patch)
tree1a8c258ae4a7ee1d4826bdf57b6a163b453fd3d2
parentf635109140d92139f981f5cd5409f5b1078fc079 (diff)
radv: optimize vkCmd{Set,Reset}Event() a little bit
Always emitting a bottom-of-pipe event is quite dumb. Instead, start to optimize these functions by syncing PFP for the top-of-pipe and syncing ME for the post-index-fetch event. This can still be improved by emitting EOS events for syncing PS and CS stages. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c46
1 files changed, 38 insertions, 8 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 074e9c4c7f1..17385aace1f 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4275,14 +4275,44 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
- /* TODO: this is overkill. Probably should figure something out from
- * the stage mask. */
-
- si_cs_emit_write_event_eop(cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DATA_SEL_VALUE_32BIT, va, 2, value);
+ /* Flags that only require a top-of-pipe event. */
+ static const VkPipelineStageFlags top_of_pipe_flags =
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+
+ /* Flags that only require a post-index-fetch event. */
+ static const VkPipelineStageFlags post_index_fetch_flags =
+ top_of_pipe_flags |
+ VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+
+ /* TODO: Emit EOS events for syncing PS/CS stages. */
+
+ if (!(stageMask & ~top_of_pipe_flags)) {
+ /* Just need to sync the PFP engine. */
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+ } else if (!(stageMask & ~post_index_fetch_flags)) {
+ /* Sync ME because PFP reads index and indirect buffers. */
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+ } else {
+ /* Otherwise, sync all prior GPU work using an EOP event. */
+ si_cs_emit_write_event_eop(cs,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ radv_cmd_buffer_uses_mec(cmd_buffer),
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DATA_SEL_VALUE_32BIT, va, 2, value);
+ }
assert(cmd_buffer->cs->cdw <= cdw_max);
}