summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristian Høgsberg Kristensen <[email protected]>2016-01-29 12:10:12 -0800
committerKristian Høgsberg Kristensen <[email protected]>2016-01-29 14:56:41 -0800
commit0c4ef36360627686a0f3b56d64409ffb8bfbcb8c (patch)
tree3cd995aca976d404cbb5071dc7a56d612293b2af
parent31d3486bd2dda4b9dd65c8b24544b8f8cb54054b (diff)
anv: clflush is only orderered against mfence
We can't use the more fine-grained load and store fence commands (lfence and mfence), since clflush is only guaranteed to be ordered with respect to mfence.
-rw-r--r--src/vulkan/anv_batch_chain.c4
-rw-r--r--src/vulkan/anv_device.c17
-rw-r--r--src/vulkan/anv_private.h2
3 files changed, 12 insertions, 11 deletions
diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c
index e9bd67c9442..d74c5995168 100644
--- a/src/vulkan/anv_batch_chain.c
+++ b/src/vulkan/anv_batch_chain.c
@@ -755,7 +755,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
if (!primary->device->info.has_llc) {
void *inst = secondary->batch.next - inst_size;
void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
while (p < secondary->batch.next) {
__builtin_ia32_clflush(p);
p += CACHELINE_SIZE;
@@ -1047,7 +1047,7 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
if (!cmd_buffer->device->info.has_llc) {
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
__builtin_ia32_clflush((*bbo)->bo.map + i);
diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c
index c7a9fd15c1d..5bb9fec0085 100644
--- a/src/vulkan/anv_device.c
+++ b/src/vulkan/anv_device.c
@@ -1173,7 +1173,7 @@ VkResult anv_FlushMappedMemoryRanges(
return VK_SUCCESS;
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
@@ -1193,7 +1193,7 @@ VkResult anv_InvalidateMappedMemoryRanges(
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
/* Make sure no reads get moved up above the invalidate. */
- __builtin_ia32_lfence();
+ __builtin_ia32_mfence();
return VK_SUCCESS;
}
@@ -1342,7 +1342,7 @@ VkResult anv_CreateFence(
if (!device->info.has_llc) {
assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(fence->bo.map);
}
@@ -1510,7 +1510,7 @@ VkResult anv_CreateEvent(
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(event);
}
@@ -1538,9 +1538,10 @@ VkResult anv_GetEventStatus(
ANV_FROM_HANDLE(anv_event, event, _event);
if (!device->info.has_llc) {
- /* Make sure the writes we're flushing have landed. */
+ /* Invalidate read cache before reading event written by GPU. */
__builtin_ia32_clflush(event);
- __builtin_ia32_lfence();
+ __builtin_ia32_mfence();
+
}
return event->semaphore;
@@ -1557,7 +1558,7 @@ VkResult anv_SetEvent(
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(event);
}
@@ -1575,7 +1576,7 @@ VkResult anv_ResetEvent(
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(event);
}
diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h
index b1d4577f93e..c5ce1484bc2 100644
--- a/src/vulkan/anv_private.h
+++ b/src/vulkan/anv_private.h
@@ -433,7 +433,7 @@ anv_state_clflush(struct anv_state state)
void *end = state.map + state.alloc_size;
void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK);
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
while (p < end) {
__builtin_ia32_clflush(p);
p += CACHELINE_SIZE;