aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/common')
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simd16intrin.h14
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdintrin.h21
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl15
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl10
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl4
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl21
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl12
7 files changed, 35 insertions, 62 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index a160ca2c5ed..019b26d8cfb 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -159,20 +159,10 @@ typedef SIMD512 SIMD16;
#define _simd16_packus_epi32 SIMD16::packus_epi32
#define _simd16_packs_epi32 SIMD16::packs_epi32
#define _simd16_cmplt_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
+#define _simd16_cmpeq_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
#define _simd16_int2mask(mask) simd16mask(mask)
#define _simd16_mask2int(mask) int(mask)
-
-// convert bitmask to vector mask
-SIMDINLINE simd16scalar vMask16(int32_t mask)
-{
- simd16scalari temp = _simd16_set1_epi32(mask);
-
- simd16scalari bits = _simd16_set_epi32(0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001);
-
- simd16scalari result = _simd16_cmplt_epi32(_simd16_setzero_si(), _simd16_and_si(temp, bits));
-
- return _simd16_castsi_ps(result);
-}
+#define _simd16_vmask_ps SIMD16::vmask_ps
#endif//ENABLE_AVX512_SIMD16
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index f95c109e6fe..f4b9e1055ce 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -181,6 +181,7 @@ typedef SIMD256 SIMD;
#define _simd_storeu2_si SIMD::storeu2_si
#define _simd_blendv_epi32 SIMD::blendv_epi32
+#define _simd_vmask_ps SIMD::vmask_ps
template<int mask> SIMDINLINE
SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer a, SIMD128::Integer b)
@@ -188,26 +189,6 @@ SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer a, SIMD128::Integer b)
return SIMD128::castps_si(SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
}
-// convert bitmask to vector mask
-SIMDINLINE
-SIMD256::Float vMask(int32_t mask)
-{
- SIMD256::Integer vec = SIMD256::set1_epi32(mask);
- const SIMD256::Integer bit = SIMD256::set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = SIMD256::and_si(vec, bit);
- vec = SIMD256::cmplt_epi32(SIMD256::setzero_si(), vec);
- return SIMD256::castsi_ps(vec);
-}
-
-SIMDINLINE
-SIMD256::Integer vMaski(int32_t mask)
-{
- SIMD256::Integer vec = SIMD256::set1_epi32(mask);
- const SIMD256::Integer bit = SIMD256::set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = SIMD256::and_si(vec, bit);
- return SIMD256::cmplt_epi32(SIMD256::setzero_si(), vec);
-}
-
SIMDINLINE
void _simd_mov(simdscalar &r, unsigned int rlane, simdscalar& s, unsigned int slane)
{
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
index 5bcedf39713..72327918937 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
@@ -519,6 +519,11 @@ static SIMDINLINE Float SIMDCALL set_ps(float in3, float in2, float in1, float i
return _mm_set_ps(in3, in2, in1, in0);
}
+static SIMDINLINE Integer SIMDCALL set_epi32(int in3, int in2, int in1, int in0)
+{
+ return _mm_set_epi32(in3, in2, in1, in0);
+}
+
template <int ImmT>
static SIMDINLINE float SIMDCALL extract_ps(Float a)
{
@@ -526,6 +531,16 @@ static SIMDINLINE float SIMDCALL extract_ps(Float a)
return *reinterpret_cast<float*>(&tmp);
}
+static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
+{
+ Integer vec = set1_epi32(mask);
+ const Integer bit = set_epi32(
+ 0x08, 0x04, 0x02, 0x01);
+ vec = and_si(vec, bit);
+ vec = cmplt_epi32(setzero_si(), vec);
+ return castsi_ps(vec);
+}
+
#undef SIMD_WRAPPER_1
#undef SIMD_WRAPPER_2
#undef SIMD_DWRAPPER_2
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
index 16eb5217cba..77086119e2f 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
@@ -741,6 +741,16 @@ static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer *phi, SIMD128Imp
_mm256_storeu2_m128i(&phi->v, &plo->v, src);
}
+static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
+{
+ Integer vec = set1_epi32(mask);
+ const Integer bit = set_epi32(
+ 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+ vec = and_si(vec, bit);
+ vec = cmplt_epi32(setzero_si(), vec);
+ return castsi_ps(vec);
+}
+
#undef SIMD_WRAPPER_1
#undef SIMD_WRAPPER_2
#undef SIMD_DWRAPPER_2
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index 1f93da7345f..1001417704d 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -554,12 +554,12 @@ static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a)
{
- __mmask8 m = _mm512_cmplt_pd_mask(a, setzero_pd());
+ __mmask8 m = _mm512_test_epi64_mask(castpd_si(a), set1_epi32(-1));
return static_cast<uint32_t>(m);
}
static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a)
{
- __mmask16 m = _mm512_cmplt_ps_mask(a, setzero_ps());
+ __mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(-1));
return static_cast<uint32_t>(m);
}
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
index 310f1540065..17001be0674 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
@@ -29,9 +29,6 @@
//
//============================================================================
-static const int TARGET_SIMD_WIDTH = 16;
-using SIMD256T = SIMD256Impl::AVX2Impl;
-
#define SIMD_WRAPPER_1_(op, intrin) \
static SIMDINLINE Float SIMDCALL op(Float a) \
{\
@@ -135,24 +132,6 @@ using SIMD256T = SIMD256Impl::AVX2Impl;
}
#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-private:
- static SIMDINLINE Integer vmask(__mmask8 m)
- {
- return _mm512_maskz_set1_epi64(m, -1LL);
- }
- static SIMDINLINE Integer vmask(__mmask16 m)
- {
- return _mm512_maskz_set1_epi32(m, -1);
- }
- static SIMDINLINE Integer vmask(__mmask32 m)
- {
- return _mm512_maskz_set1_epi16(m, -1);
- }
- static SIMDINLINE Integer vmask(__mmask64 m)
- {
- return _mm512_maskz_set1_epi8(m, -1);
- }
-
public:
SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int)
SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
index a45429f4b6b..c414d75d42e 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
@@ -821,13 +821,11 @@ static SIMDINLINE Float SIMDCALL set_ps(
static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
{
- Integer vec = set1_epi32(mask);
- const Integer bit = set_epi32(
- 0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100,
- 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = and_si(vec, bit);
- vec = cmplt_epi32(setzero_si(), vec);
- return castsi_ps(vec);
+ return Float
+ {
+ SIMD256T::vmask_ps(mask),
+ SIMD256T::vmask_ps(mask >> TARGET_SIMD_WIDTH)
+ };
}
#undef SIMD_WRAPPER_1