aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-05-16 10:25:21 -0500
committerTim Rowley <[email protected]>2017-05-30 17:21:22 -0500
commit2438932b7e66c5cd496bdd19e6d96d168b3dd2c8 (patch)
treeae17d0c1f720ec826a797d68a9a6c44abf893f0f
parent7be26a2d35c2e91fd9f3195b012c163d562eb9df (diff)
swr/rast: make simd16 logicops avx512f safe
Express the simd16 logicops in terms of avx512f instructions. Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simd16intrin.h14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index 2fe18f28282..84585ffc8fc 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -941,10 +941,16 @@ INLINE simd16scalar SIMDAPI _simd16_cmp_ps_temp(simd16scalar a, simd16scalar b)
#define _simd16_castpd_ps _mm512_castpd_ps
#define _simd16_castps_pd _mm512_castps_pd
-#define _simd16_and_ps _mm512_and_ps
-#define _simd16_andnot_ps _mm512_andnot_ps
-#define _simd16_or_ps _mm512_or_ps
-#define _simd16_xor_ps _mm512_xor_ps
+// _mm512_and_ps (and other bitwise operations) exist in AVX512DQ,
+// while the functionally equivalent _mm512_and_epi32 is in AVX512F.
+// Define the _simd16_*_ps versions in terms of AVX512F for broader
+// support.
+#define _simd16_logicop_ps(a, b, op) _simd16_castsi_ps(op##_epi32(_simd16_castps_si(a), _simd16_castps_si(b)))
+
+#define _simd16_and_ps(a, b) _simd16_logicop_ps(a, b, _mm512_and)
+#define _simd16_andnot_ps(a, b) _simd16_logicop_ps(a, b, _mm512_andnot)
+#define _simd16_or_ps(a, b) _simd16_logicop_ps(a, b, _mm512_or)
+#define _simd16_xor_ps(a, b) _simd16_logicop_ps(a, b, _mm512_xor)
template <int mode>
INLINE simd16scalar SIMDAPI _simd16_round_ps_temp(simd16scalar a)