summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorChris Wilson <[email protected]>2018-05-26 00:33:56 +0100
committerKenneth Graunke <[email protected]>2018-05-25 21:35:50 -0700
commitf5e8b13f78a085bc95a1c0895e4a38ff6b87b375 (patch)
treedade5ae70a4bec18f363488fc954d66cab568698 /src/mesa
parent18c50498db078f8bfbf1f8135b4e72ed479e32d9 (diff)
i915: Fix streaming loads for intel_tiled_memcpy
We stream from a tiled and aligned source into an unaligned user buffer, so we need to use _mm_storeu_si128. Fixes: d21c086d819d78fb3f6abcbb14aa492970f442aa (i965/tiled_memcpy: inline movntdqa loads in tiled_to_linear) Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/intel_tiled_memcpy.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index fac5427d2ed..6440dceac36 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -223,17 +223,17 @@ _memcpy_streaming_load(void *dest, const void *src, size_t count)
{
if (count == 16) {
__m128i val = _mm_stream_load_si128((__m128i *)src);
- _mm_store_si128((__m128i *)dest, val);
+ _mm_storeu_si128((__m128i *)dest, val);
return dest;
} else if (count == 64) {
__m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
__m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
__m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
__m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
- _mm_store_si128(((__m128i *)dest) + 0, val0);
- _mm_store_si128(((__m128i *)dest) + 1, val1);
- _mm_store_si128(((__m128i *)dest) + 2, val2);
- _mm_store_si128(((__m128i *)dest) + 3, val3);
+ _mm_storeu_si128(((__m128i *)dest) + 0, val0);
+ _mm_storeu_si128(((__m128i *)dest) + 1, val1);
+ _mm_storeu_si128(((__m128i *)dest) + 2, val2);
+ _mm_storeu_si128(((__m128i *)dest) + 3, val3);
return dest;
} else {
assert(count < 64); /* and (count < 16) for ytiled */