summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer/common
diff options
context:
space:
mode:
authorGeorge Kyriazis <[email protected]>2018-03-14 13:38:18 -0500
committerGeorge Kyriazis <[email protected]>2018-04-18 10:51:38 -0500
commit9103119cb36fbcfd591df8c722fa9941aaa9a911 (patch)
tree1b13dbc16efd88520235c489f95937dd68d4246a /src/gallium/drivers/swr/rasterizer/common
parent4c69823d150805c6f1d1ea212efa4e20558768bd (diff)
swr/rast: Permute work for simd16
Fix slow permutes in PA tri lists under SIMD16 emulation on AVX Added missing permute (interlane, immediate) to SIMDLIB Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/common')
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simd16intrin.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdintrin.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl6
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl7
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl6
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl14
6 files changed, 33 insertions, 2 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index 019b26d8cfb..98a8b9b2f9f 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -138,6 +138,7 @@ typedef SIMD512 SIMD16;
#define _simd16_cmpeq_epi8 SIMD16::cmpeq_epi8
#define _simd16_cmpgt_epi8 SIMD16::cmpgt_epi8
+#define _simd16_permute_ps_i(a, i) SIMD16::permute_ps<i>(a)
#define _simd16_permute_ps SIMD16::permute_ps
#define _simd16_permute_epi32 SIMD16::permute_epi32
#define _simd16_sllv_epi32 SIMD16::sllv_epi32
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index fce360df9a7..b1471a97250 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -106,6 +106,7 @@ typedef SIMD256 SIMD;
#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
#define _simd_movemask_epi8 SIMD::movemask_epi8
+#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
#define _simd_permute_ps SIMD::permute_ps
#define _simd_permute_epi32 SIMD::permute_epi32
#define _simd_srlv_epi32 SIMD::srlv_epi32
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
index 42b45528731..00c094a425a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
@@ -479,6 +479,12 @@ SIMD_EMU_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32
SIMD_EMU_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
SIMD_EMU_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+template<int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+{
+ return _mm256_permute_ps(a, ImmT);
+}
+
static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
{
Integer result;
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
index 9cd0a640025..96c24fff9da 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
@@ -174,6 +174,13 @@ SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and
SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+
+template<int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+{
+ return _mm256_permute_ps(a, ImmT);
+}
+
SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index f3a58f9e1cb..dfe19d3c04a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -433,6 +433,12 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b)
// SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16
// SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32
+template<int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+{
+ return _mm512_permute_ps(a, ImmT);
+}
+
static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
return _mm512_permutexvar_epi32(swiz, a);
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
index 44eba0b126b..5d5120af36a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
@@ -519,6 +519,16 @@ SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 a
SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+template<int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+{
+ return Float
+ {
+ SIMD256T::template permute_ps<ImmT>(a.v8[0]),
+ SIMD256T::template permute_ps<ImmT>(a.v8[1]),
+ };
+}
+
static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
{
return castps_si(permute_ps(castsi_ps(a), swiz));
@@ -587,10 +597,10 @@ template <int shuf>
static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const &a, Integer const &b)
{
return Integer
- {
+ {
SIMD256T::template permute2f128_si<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
SIMD256T::template permute2f128_si<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
- };
+ };
}
SIMD_IWRAPPER_2I_1(shuffle_epi32);