summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer/common
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-05-17 17:39:33 -0500
committerTim Rowley <[email protected]>2017-05-30 17:21:36 -0500
commit5ea9a30f50dabe60fe95c9077457915dd5ed52c4 (patch)
treeb06954a5a67e62cddbcebcc374747988a86bb6bf /src/gallium/drivers/swr/rasterizer/common
parentfb9f7bd717dbaf94a744d352029603a273c7d332 (diff)
swr/rast: SIMD16 FE - fix/use SIMD16 calcDeterminantIntVertical()
Stop double pumping the SIMD8 version. Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/common')
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simd16intrin.h22
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdintrin.h24
2 files changed, 46 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index 84585ffc8fc..e303ce59713 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -770,6 +770,26 @@ INLINE simd16scalari SIMDAPI _simd16_cvtepu16_epi32(simdscalari a)
return result;
}
+INLINE simd16scalari SIMDAPI _simd16_cvtepu16_epi64(simdscalari a)
+{
+ simd16scalari result;
+
+ result.lo = _simd_cvtepu16_epi64(_mm256_extractf128_si256(a, 0));
+ result.hi = _simd_cvtepu16_epi64(_mm256_extractf128_si256(a, 1));
+
+ return result;
+}
+
+INLINE simd16scalari SIMDAPI _simd16_cvtepu32_epi64(simdscalari a)
+{
+ simd16scalari result;
+
+ result.lo = _simd_cvtepu32_epi64(_mm256_extractf128_si256(a, 0));
+ result.hi = _simd_cvtepu32_epi64(_mm256_extractf128_si256(a, 1));
+
+ return result;
+}
+
SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packus_epi16, _simd_packus_epi16)
SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packs_epi16, _simd_packs_epi16)
SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packus_epi32, _simd_packus_epi32)
@@ -1097,6 +1117,8 @@ INLINE simd16scalari SIMDAPI _simd16_cmpgt_epi8(simd16scalari a, simd16scalari b
#define _simd16_cvtepu8_epi16 _mm512_cvtepu8_epi16
#define _simd16_cvtepu8_epi32 _mm512_cvtepu8_epi32
#define _simd16_cvtepu16_epi32 _mm512_cvtepu16_epi32
+#define _simd16_cvtepu16_epi64 _mm512_cvtepu16_epi64
+#define _simd16_cvtepu32_epi64 _mm512_cvtepu32_epi64
#define _simd16_packus_epi16 _mm512_packus_epi16
#define _simd16_packs_epi16 _mm512_packs_epi16
#define _simd16_packus_epi32 _mm512_packus_epi32
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index 61c0c5461a3..ed6e56b5e26 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -456,6 +456,28 @@ __m256i _simd_cvtepu16_epi32(__m128i a)
}
INLINE
+__m256i _simd_cvtepu16_epi64(__m128i a)
+{
+ __m128i resultlo = _mm_cvtepu16_epi64(a);
+ __m128i resulthi = _mm_cvtepu16_epi64(_mm_srli_si128(a, 4));
+
+ __m256i result = _mm256_castsi128_si256(resultlo);
+
+ return _mm256_insertf128_si256(result, resulthi, 1);
+}
+
+INLINE
+__m256i _simd_cvtepu32_epi64(__m128i a)
+{
+ __m128i resultlo = _mm_cvtepu32_epi64(a);
+ __m128i resulthi = _mm_cvtepu32_epi64(_mm_srli_si128(a, 8));
+
+ __m256i result = _mm256_castsi128_si256(resultlo);
+
+ return _mm256_insertf128_si256(result, resulthi, 1);
+}
+
+INLINE
__m256i _simd_packus_epi16(__m256i a, __m256i b)
{
__m128i alo = _mm256_extractf128_si256(a, 0);
@@ -582,6 +604,8 @@ __m256i _simd_packs_epi32(__m256i a, __m256i b)
#define _simd_cvtepu8_epi16 _mm256_cvtepu8_epi16
#define _simd_cvtepu8_epi32 _mm256_cvtepu8_epi32
#define _simd_cvtepu16_epi32 _mm256_cvtepu16_epi32
+#define _simd_cvtepu16_epi64 _mm256_cvtepu16_epi64
+#define _simd_cvtepu32_epi64 _mm256_cvtepu32_epi64
#define _simd_packus_epi16 _mm256_packus_epi16
#define _simd_packs_epi16 _mm256_packs_epi16
#define _simd_packus_epi32 _mm256_packus_epi32