diff options
author | Tim Rowley <[email protected]> | 2017-06-09 16:58:59 -0500 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-06-16 16:20:16 -0500 |
commit | 9b448da60ffb5aa807d9145bbac0fdbd580acea9 (patch) | |
tree | d8381f4928e61d8596d1f2a84a4239916f6eeef0 /src/gallium/drivers/swr/rasterizer/common | |
parent | 08a466aec0b1baf54a7ca7b0d7d43bb267e01841 (diff) |
swr/rast: Refactor includes to limit simdintrin.h usage
Reduces the files rebuilt after modifying simdintrin.h from
84 to 64.
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/common')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/intrin.h | 169 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simd16intrin.h | 52 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/common/simdintrin.h | 87 |
3 files changed, 171 insertions, 137 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/intrin.h b/src/gallium/drivers/swr/rasterizer/common/intrin.h new file mode 100644 index 00000000000..f45b2e55880 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/intrin.h @@ -0,0 +1,169 @@ +/**************************************************************************** +* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ + +#ifndef __SWR_INTRIN_H__ +#define __SWR_INTRIN_H__ + +#include "os.h" + +#include <cassert> + +#include <emmintrin.h> +#include <immintrin.h> +#include <xmmintrin.h> + +#if KNOB_SIMD_WIDTH == 8 +typedef __m256 simdscalar; +typedef __m256i simdscalari; +typedef uint8_t simdmask; +#else +#error Unsupported vector width +#endif + +// simd vector +OSALIGNSIMD(union) simdvector +{ + simdscalar v[4]; + struct + { + simdscalar x, y, z, w; + }; + + simdscalar& operator[] (const int i) { return v[i]; } + const simdscalar& operator[] (const int i) const { return v[i]; } +}; + +#if ENABLE_AVX512_SIMD16 + +#if KNOB_SIMD16_WIDTH == 16 + +#if ENABLE_AVX512_EMULATION +struct simd16scalar +{ + __m256 lo; + __m256 hi; +}; +struct simd16scalard +{ + __m256d lo; + __m256d hi; +}; +struct simd16scalari +{ + __m256i lo; + __m256i hi; +}; +typedef uint16_t simd16mask; + +#else +typedef __m512 simd16scalar; +typedef __m512d simd16scalard; +typedef __m512i simd16scalari; +typedef __mmask16 simd16mask; +#endif//ENABLE_AVX512_EMULATION +#else +#error Unsupported vector width +#endif//KNOB_SIMD16_WIDTH == 16 + +#define _simd16_masklo(mask) ((mask) & 0xFF) +#define _simd16_maskhi(mask) (((mask) >> 8) & 0xFF) +#define _simd16_setmask(hi, lo) (((hi) << 8) | (lo)) + +#if defined(_WIN32) +#define SIMDAPI __vectorcall +#else +#define SIMDAPI +#endif + +OSALIGN(union, KNOB_SIMD16_BYTES) simd16vector +{ + simd16scalar v[4]; + struct + { + simd16scalar x, y, z, w; + }; + + simd16scalar& operator[] (const int i) { return v[i]; } + const simd16scalar& operator[] (const int i) const { return v[i]; } +}; + +#endif // ENABLE_AVX512_SIMD16 + +INLINE +UINT pdep_u32(UINT a, UINT mask) +{ +#if KNOB_ARCH >= KNOB_ARCH_AVX2 + return _pdep_u32(a, mask); +#else + UINT result = 0; + + // copied from http://wm.ite.pl/articles/pdep-soft-emu.html + // using bsf instead of funky loop + DWORD maskIndex; + while (_BitScanForward(&maskIndex, mask)) + { + // 1. isolate lowest set bit of mask + const UINT lowest = 1 << maskIndex; + + // 2. populate LSB from src + const UINT LSB = (UINT)((int)(a << 31) >> 31); + + // 3. copy bit from mask + result |= LSB & lowest; + + // 4. clear lowest bit + mask &= ~lowest; + + // 5. prepare for next iteration + a >>= 1; + } + + return result; +#endif +} + +INLINE +UINT pext_u32(UINT a, UINT mask) +{ +#if KNOB_ARCH >= KNOB_ARCH_AVX2 + return _pext_u32(a, mask); +#else + UINT result = 0; + DWORD maskIndex; + uint32_t currentBit = 0; + while (_BitScanForward(&maskIndex, mask)) + { + // 1. isolate lowest set bit of mask + const UINT lowest = 1 << maskIndex; + + // 2. copy bit from mask + result |= ((a & lowest) > 0) << currentBit++; + + // 3. clear lowest bit + mask &= ~lowest; + } + return result; +#endif +} + +#endif//__SWR_INTRIN_H__ diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h index e303ce59713..a822420ae37 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h @@ -26,58 +26,6 @@ #if ENABLE_AVX512_SIMD16 -#if KNOB_SIMD16_WIDTH == 16 - -#if ENABLE_AVX512_EMULATION -struct simd16scalar -{ - __m256 lo; - __m256 hi; -}; -struct simd16scalard -{ - __m256d lo; - __m256d hi; -}; -struct simd16scalari -{ - __m256i lo; - __m256i hi; -}; -typedef uint16_t simd16mask; - -#else -typedef __m512 simd16scalar; -typedef __m512d simd16scalard; -typedef __m512i simd16scalari; -typedef __mmask16 simd16mask; -#endif//ENABLE_AVX512_EMULATION -#else -#error Unsupported vector width -#endif//KNOB_SIMD16_WIDTH == 16 - -#define _simd16_masklo(mask) ((mask) & 0xFF) -#define _simd16_maskhi(mask) (((mask) >> 8) & 0xFF) -#define _simd16_setmask(hi, lo) (((hi) << 8) | (lo)) - -#if defined(_WIN32) -#define SIMDAPI __vectorcall -#else -#define SIMDAPI -#endif - -OSALIGN(union, KNOB_SIMD16_BYTES) simd16vector -{ - simd16scalar v[4]; - struct - { - simd16scalar x, y, z, w; - }; - - simd16scalar& operator[] (const int i) { return v[i]; } - const simd16scalar& operator[] (const int i) const { return v[i]; } -}; - #if ENABLE_AVX512_EMULATION #define SIMD16_EMU_AVX512_0(type, func, intrin) \ diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h index ed6e56b5e26..5ccb6c3ea95 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h @@ -24,34 +24,8 @@ #ifndef __SWR_SIMDINTRIN_H__ #define __SWR_SIMDINTRIN_H__ -#include "os.h" - -#include <cassert> - -#include <emmintrin.h> -#include <immintrin.h> -#include <xmmintrin.h> - -#if KNOB_SIMD_WIDTH == 8 -typedef __m256 simdscalar; -typedef __m256i simdscalari; -typedef uint8_t simdmask; -#else -#error Unsupported vector width -#endif - -// simd vector -OSALIGNSIMD(union) simdvector -{ - simdscalar v[4]; - struct - { - simdscalar x, y, z, w; - }; - - simdscalar& operator[] (const int i) { return v[i]; } - const simdscalar& operator[] (const int i) const { return v[i]; } -}; +#include "common/os.h" +#include "common/intrin.h" #if KNOB_SIMD_WIDTH == 8 #define _simd128_maskstore_ps _mm_maskstore_ps @@ -1210,63 +1184,6 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a) return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff))); } -INLINE -UINT pdep_u32(UINT a, UINT mask) -{ -#if KNOB_ARCH >= KNOB_ARCH_AVX2 - return _pdep_u32(a, mask); -#else - UINT result = 0; - - // copied from http://wm.ite.pl/articles/pdep-soft-emu.html - // using bsf instead of funky loop - DWORD maskIndex; - while (_BitScanForward(&maskIndex, mask)) - { - // 1. isolate lowest set bit of mask - const UINT lowest = 1 << maskIndex; - - // 2. populate LSB from src - const UINT LSB = (UINT)((int)(a << 31) >> 31); - - // 3. copy bit from mask - result |= LSB & lowest; - - // 4. clear lowest bit - mask &= ~lowest; - - // 5. prepare for next iteration - a >>= 1; - } - - return result; -#endif -} - -INLINE -UINT pext_u32(UINT a, UINT mask) -{ -#if KNOB_ARCH >= KNOB_ARCH_AVX2 - return _pext_u32(a, mask); -#else - UINT result = 0; - DWORD maskIndex; - uint32_t currentBit = 0; - while (_BitScanForward(&maskIndex, mask)) - { - // 1. isolate lowest set bit of mask - const UINT lowest = 1 << maskIndex; - - // 2. copy bit from mask - result |= ((a & lowest) > 0) << currentBit++; - - // 3. clear lowest bit - mask &= ~lowest; - } - return result; -#endif -} - #if ENABLE_AVX512_SIMD16 #include "simd16intrin.h" #endif//ENABLE_AVX512_SIMD16 |