summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-05-03 07:13:21 +1000
committerDave Airlie <[email protected]>2017-05-03 07:21:31 +1000
commita52470402515c46cd9f33a5d83dc8d2bc9f7bae9 (patch)
treedf7b1255b9a62df5a6d7dbec2525c2e43b000ee1 /src
parente0e01895b054847e41b98cbf3939520747a0f32d (diff)
radv: flush more stages when semaphore are waiting.
This still doesn't give us complete pWaitDstStageMask support, but it should provide enough to be correct if not as efficent as possible. If we have wait semaphores we must flush between submits and flush the shaders as well. This fixes the remaining fails in: dEQP-VK.synchronization.op.single_queue.semaphore.*ssbo* Reviewed-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/amd/vulkan/radv_device.c24
-rw-r--r--src/amd/vulkan/radv_private.h2
2 files changed, 23 insertions, 3 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 1301801ef47..408c0347856 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1046,6 +1046,22 @@ VkResult radv_CreateDevice(
break;
}
device->ws->cs_finalize(device->flush_cs[family]);
+
+ device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
+ switch (family) {
+ case RADV_QUEUE_GENERAL:
+ case RADV_QUEUE_COMPUTE:
+ si_cs_emit_cache_flush(device->flush_shader_cs[family],
+ device->physical_device->rad_info.chip_class,
+ family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
+ family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
+ RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_GLOBAL_L2);
+ break;
+ }
+ device->ws->cs_finalize(device->flush_shader_cs[family]);
}
if (getenv("RADV_TRACE_FILE")) {
@@ -1121,6 +1137,8 @@ void radv_DestroyDevice(
device->ws->cs_destroy(device->empty_cs[i]);
if (device->flush_cs[i])
device->ws->cs_destroy(device->flush_cs[i]);
+ if (device->flush_shader_cs[i])
+ device->ws->cs_destroy(device->flush_shader_cs[i]);
}
radv_device_finish_meta(device);
@@ -1822,7 +1840,7 @@ VkResult radv_QueueSubmit(
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
- bool do_flush = !i;
+ bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
bool can_patch = !do_flush;
uint32_t advance;
@@ -1849,7 +1867,9 @@ VkResult radv_QueueSubmit(
(pSubmits[i].commandBufferCount + do_flush));
if(do_flush)
- cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
+ cs_array[0] = pSubmits[i].waitSemaphoreCount ?
+ queue->device->flush_shader_cs[queue->queue_family_index] :
+ queue->device->flush_cs[queue->queue_family_index];
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 5028bf507b3..1fe54bb8a07 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -495,7 +495,7 @@ struct radv_device {
int queue_count[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
-
+ struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
uint64_t debug_flags;
bool llvm_supports_spill;