diff options
author | Tim Rowley <[email protected]> | 2017-07-20 10:51:30 -0500 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-07-21 15:13:14 -0500 |
commit | 3e03ecaaf698418714764da76fb7d29ea158e0dd (patch) | |
tree | f5cf18d32c80cc6ba6a9eae3493be1522634aa21 | |
parent | 2656a940c2d4c0e56cb84467a065b566cb93058c (diff) |
swr/rast: fix memory paths for avx512 optimized avx/sse
Source/destination will not be AVX512 aligned, use the
unaligned load/store intrinsics.
Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl | 10 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl | 10 |
2 files changed, 10 insertions, 10 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl index aaa74146ad9..012f3105e9f 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl @@ -294,12 +294,12 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8); //----------------------------------------------------------------------- static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory) { - return __conv(_mm512_maskz_load_ps(__mmask16(0xf), p)); + return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p)); } static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p { - return __conv(_mm512_maskz_load_epi32(__mmask16(0xf), p)); + return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p)); } static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem) @@ -353,17 +353,17 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) { __mmask16 m = 0xf; m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000)); - _mm512_mask_store_ps(p, m, __conv(src)); + _mm512_mask_storeu_ps(p, m, __conv(src)); } static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory) { - _mm512_mask_store_ps(p, __mmask16(0xf), __conv(a)); + _mm512_mask_storeu_ps(p, __mmask16(0xf), __conv(a)); } static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a { - _mm512_mask_store_epi32(p, __mmask16(0xf), __conv(a)); + _mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a)); } //======================================================================= diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl index 5103bdafa22..a8d2a4b8bfd 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl @@ -295,12 +295,12 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8); //----------------------------------------------------------------------- static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory) { - return __conv(_mm512_maskz_load_ps(__mmask16(0xff), p)); + return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p)); } static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p { - return __conv(_mm512_maskz_load_epi32(__mmask16(0xff), p)); + return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p)); } static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem) @@ -354,17 +354,17 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) { __mmask16 m = 0xff; m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000)); - _mm512_mask_store_ps(p, m, __conv(src)); + _mm512_mask_storeu_ps(p, m, __conv(src)); } static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory) { - _mm512_mask_store_ps(p, __mmask16(0xff), __conv(a)); + _mm512_mask_storeu_ps(p, __mmask16(0xff), __conv(a)); } static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a { - _mm512_mask_store_epi32(p, __mmask16(0xff), __conv(a)); + _mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a)); } //======================================================================= |