summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_device.c24
-rw-r--r--src/amd/vulkan/radv_private.h2
2 files changed, 23 insertions, 3 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 1301801ef47..408c0347856 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1046,6 +1046,22 @@ VkResult radv_CreateDevice(
break;
}
device->ws->cs_finalize(device->flush_cs[family]);
+
+ device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
+ switch (family) {
+ case RADV_QUEUE_GENERAL:
+ case RADV_QUEUE_COMPUTE:
+ si_cs_emit_cache_flush(device->flush_shader_cs[family],
+ device->physical_device->rad_info.chip_class,
+ family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
+ family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
+ RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_GLOBAL_L2);
+ break;
+ }
+ device->ws->cs_finalize(device->flush_shader_cs[family]);
}
if (getenv("RADV_TRACE_FILE")) {
@@ -1121,6 +1137,8 @@ void radv_DestroyDevice(
device->ws->cs_destroy(device->empty_cs[i]);
if (device->flush_cs[i])
device->ws->cs_destroy(device->flush_cs[i]);
+ if (device->flush_shader_cs[i])
+ device->ws->cs_destroy(device->flush_shader_cs[i]);
}
radv_device_finish_meta(device);
@@ -1822,7 +1840,7 @@ VkResult radv_QueueSubmit(
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
- bool do_flush = !i;
+ bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
bool can_patch = !do_flush;
uint32_t advance;
@@ -1849,7 +1867,9 @@ VkResult radv_QueueSubmit(
(pSubmits[i].commandBufferCount + do_flush));
if(do_flush)
- cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
+ cs_array[0] = pSubmits[i].waitSemaphoreCount ?
+ queue->device->flush_shader_cs[queue->queue_family_index] :
+ queue->device->flush_cs[queue->queue_family_index];
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 5028bf507b3..1fe54bb8a07 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -495,7 +495,7 @@ struct radv_device {
int queue_count[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
-
+ struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
uint64_t debug_flags;
bool llvm_supports_spill;