diff options
author | Tim Rowley <[email protected]> | 2016-12-21 17:59:44 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-01-06 10:04:50 -0600 |
commit | 1a77e0c48d7762deba5227cf12f3ffda89ff1882 (patch) | |
tree | 38ca3560d97193ac11e5af4044c731688b7e63c3 /src/gallium/drivers/swr/rasterizer/common | |
parent | bd22c3d41151ce265e61d64f9034928f83d3c959 (diff) |
swr: [rasterizer core] fix SIMD16 PackTraits pack() and unpack()
Fix routines for 8-bit and 16-bit formats used by optimized tile store.
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/common')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simd16intrin.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simdintrin.h | 36 |
2 files changed, 42 insertions, 2 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h index 94da225c651..22a125b05ad 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h @@ -769,8 +769,10 @@ INLINE simd16scalari _simd16_cvtepu16_epi32(simdscalari a) return result; } -SIMD16_EMU_AVX512_2(simd16scalari, _simd_packus_epi32, _mm256_packus_epi32) -SIMD16_EMU_AVX512_2(simd16scalari, _simd_packs_epi32, _mm256_packs_epi32) +SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packus_epi16, _simd_packus_epi16) +SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packs_epi16, _simd_packs_epi16) +SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packus_epi32, _simd_packus_epi32) +SIMD16_EMU_AVX512_2(simd16scalari, _simd16_packs_epi32, _simd_packs_epi32) INLINE simd16mask _simd16_int2mask(int mask) { @@ -1080,6 +1082,8 @@ INLINE simd16scalari _simd16_cmpgt_epi8(simd16scalari a, simd16scalari b) #define _simd16_cvtepu8_epi16 _mm512_cvtepu8_epi16 #define _simd16_cvtepu8_epi32 _mm512_cvtepu8_epi32 #define _simd16_cvtepu16_epi32 _mm512_cvtepu16_epi32 +#define _simd16_packus_epi16 _mm512_packus_epi16 +#define _simd16_packs_epi16 _mm512_packs_epi16 #define _simd16_packus_epi32 _mm512_packus_epi32 #define _simd16_packs_epi32 _mm512_packs_epi32 diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h index 671e3b82a34..8926e665173 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h @@ -456,6 +456,40 @@ __m256i _simd_cvtepu16_epi32(__m128i a) } INLINE +__m256i _simd_packus_epi16(__m256i a, __m256i b) +{ + __m128i alo = _mm256_extractf128_si256(a, 0); + __m128i ahi = _mm256_extractf128_si256(a, 1); + + __m128i blo = _mm256_extractf128_si256(b, 0); + __m128i bhi = _mm256_extractf128_si256(b, 1); + + __m128i resultlo = _mm_packus_epi16(alo, blo); + __m128i resulthi = _mm_packus_epi16(ahi, bhi); + + __m256i result = _mm256_castsi128_si256(resultlo); + + return _mm256_insertf128_si256(result, resulthi, 1); +} + +INLINE +__m256i _simd_packs_epi16(__m256i a, __m256i b) +{ + __m128i alo = _mm256_extractf128_si256(a, 0); + __m128i ahi = _mm256_extractf128_si256(a, 1); + + __m128i blo = _mm256_extractf128_si256(b, 0); + __m128i bhi = _mm256_extractf128_si256(b, 1); + + __m128i resultlo = _mm_packs_epi16(alo, blo); + __m128i resulthi = _mm_packs_epi16(ahi, bhi); + + __m256i result = _mm256_castsi128_si256(resultlo); + + return _mm256_insertf128_si256(result, resulthi, 1); +} + +INLINE __m256i _simd_packus_epi32(__m256i a, __m256i b) { __m128i alo = _mm256_extractf128_si256(a, 0); @@ -548,6 +582,8 @@ __m256i _simd_packs_epi32(__m256i a, __m256i b) #define _simd_cvtepu8_epi16 _mm256_cvtepu8_epi16 #define _simd_cvtepu8_epi32 _mm256_cvtepu8_epi32 #define _simd_cvtepu16_epi32 _mm256_cvtepu16_epi32 +#define _simd_packus_epi16 _mm256_packus_epi16 +#define _simd_packs_epi16 _mm256_packs_epi16 #define _simd_packus_epi32 _mm256_packus_epi32 #define _simd_packs_epi32 _mm256_packs_epi32 |