diff options
Diffstat (limited to 'src/gallium')
18 files changed, 339 insertions, 323 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h index f4b9e1055ce..fce360df9a7 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h @@ -184,7 +184,7 @@ typedef SIMD256 SIMD; #define _simd_vmask_ps SIMD::vmask_ps template<int mask> SIMDINLINE -SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer a, SIMD128::Integer b) +SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const &a, SIMD128::Integer const &b) { return SIMD128::castps_si(SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b))); } @@ -242,7 +242,7 @@ void _simdvec_mov(simdvector &r, unsigned int rlane, simdvector& s, unsigned int ////////////////////////////////////////////////////////////////////////// /// @brief Compute plane equation vA * vX + vB * vY + vC -SIMDINLINE simdscalar vplaneps(simdscalar vA, simdscalar vB, simdscalar vC, simdscalar &vX, simdscalar &vY) +SIMDINLINE simdscalar vplaneps(simdscalar const &vA, simdscalar const &vB, simdscalar const &vC, simdscalar const &vX, simdscalar const &vY) { simdscalar vOut = _simd_fmadd_ps(vA, vX, vC); vOut = _simd_fmadd_ps(vB, vY, vOut); @@ -251,7 +251,7 @@ SIMDINLINE simdscalar vplaneps(simdscalar vA, simdscalar vB, simdscalar vC, simd ////////////////////////////////////////////////////////////////////////// /// @brief Compute plane equation vA * vX + vB * vY + vC -SIMDINLINE simd4scalar vplaneps(simd4scalar vA, simd4scalar vB, simd4scalar vC, simd4scalar &vX, simd4scalar &vY) +SIMDINLINE simd4scalar vplaneps(simd4scalar const &vA, simd4scalar const &vB, simd4scalar const &vC, simd4scalar const &vX, simd4scalar const &vY) { simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC); vOut = _simd128_fmadd_ps(vB, vY, vOut); @@ -264,7 +264,7 @@ SIMDINLINE simd4scalar vplaneps(simd4scalar vA, simd4scalar vB, simd4scalar vC, /// @param vJ - barycentric J /// @param pInterpBuffer - pointer to attribute barycentric coeffs template<UINT Attrib, UINT Comp, UINT numComponents = 4> -static SIMDINLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, const float *pInterpBuffer) +static SIMDINLINE simdscalar InterpolateComponent(simdscalar const &vI, simdscalar const &vJ, const float *pInterpBuffer) { const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp]; const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp]; @@ -299,7 +299,7 @@ static SIMDINLINE simdscalar InterpolateComponentFlat(const float *pInterpBuffer /// @param vJ - barycentric J /// @param pInterpBuffer - pointer to attribute barycentric coeffs template<UINT Attrib, UINT Comp, UINT numComponents = 4> -static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar vI, simd4scalar vJ, const float *pInterpBuffer) +static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const &vI, simd4scalar const &vJ, const float *pInterpBuffer) { const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp]; const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp]; @@ -315,13 +315,13 @@ static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar vI, simd4scalar v return vplaneps(vA, vB, vC, vI, vJ); } -static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar a) +static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const &a) { simd4scalari ai = SIMD128::castps_si(a); return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff))); } -static SIMDINLINE simdscalar _simd_abs_ps(simdscalar a) +static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const &a) { simdscalari ai = _simd_castps_si(a); return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff))); diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp index a4b5854d004..22d7da42d0a 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp @@ -225,7 +225,7 @@ struct SIMDBase : Traits::IsaImpl } static SIMDINLINE - void vec4_set1_vps(Vec4& r, Float s) + void vec4_set1_vps(Vec4& r, Float const &s) { r[0] = s; r[1] = s; @@ -285,7 +285,7 @@ struct SIMDBase : Traits::IsaImpl } static SIMDINLINE - void vec4_mul_ps(Vec4& r, const Vec4& v, Float s) + void vec4_mul_ps(Vec4& r, const Vec4& v, Float const &s) { r[0] = SIMD::mul_ps(v[0], s); r[1] = SIMD::mul_ps(v[1], s); @@ -303,7 +303,7 @@ struct SIMDBase : Traits::IsaImpl } static SIMDINLINE - void vec4_add_ps(Vec4& r, const Vec4& v0, Float s) + void vec4_add_ps(Vec4& r, const Vec4& v0, Float const &s) { r[0] = SIMD::add_ps(v0[0], s); r[1] = SIMD::add_ps(v0[1], s); @@ -321,7 +321,7 @@ struct SIMDBase : Traits::IsaImpl } static SIMDINLINE - void vec4_min_ps(Vec4& r, const Vec4& v0, Float s) + void vec4_min_ps(Vec4& r, const Vec4& v0, Float const &s) { r[0] = SIMD::min_ps(v0[0], s); r[1] = SIMD::min_ps(v0[1], s); @@ -330,7 +330,7 @@ struct SIMDBase : Traits::IsaImpl } static SIMDINLINE - void vec4_max_ps(Vec4& r, const Vec4& v0, Float s) + void vec4_max_ps(Vec4& r, const Vec4& v0, Float const &s) { r[0] = SIMD::max_ps(v0[0], s); r[1] = SIMD::max_ps(v0[1], s); diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl index 77086119e2f..42b45528731 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl @@ -31,78 +31,78 @@ using SIMD128T = SIMD128Impl::AVXImpl; //============================================================================ #define SIMD_WRAPPER_1(op) \ - static SIMDINLINE Float SIMDCALL op(Float a) \ + static SIMDINLINE Float SIMDCALL op(Float const &a) \ {\ return _mm256_##op(a);\ } #define SIMD_WRAPPER_2(op) \ - static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \ {\ return _mm256_##op(a, b);\ } #define SIMD_DWRAPPER_2(op) \ - static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + static SIMDINLINE Double SIMDCALL op(Double const &a, Double const &b) \ {\ return _mm256_##op(a, b);\ } #define SIMD_WRAPPER_2I(op) \ template<int ImmT>\ - static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \ {\ return _mm256_##op(a, b, ImmT);\ } #define SIMD_DWRAPPER_2I(op) \ template<int ImmT>\ - static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + static SIMDINLINE Double SIMDCALL op(Double const &a, Double const &b) \ {\ return _mm256_##op(a, b, ImmT);\ } #define SIMD_WRAPPER_3(op) \ - static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b, Float const &c) \ {\ return _mm256_##op(a, b, c);\ } #define SIMD_IWRAPPER_1(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a) \ {\ return _mm256_##op(a);\ } #define SIMD_IWRAPPER_2(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return _mm256_##op(a, b);\ } #define SIMD_IFWRAPPER_2(op, intrin) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return castps_si( intrin(castsi_ps(a), castsi_ps(b)) );\ } #define SIMD_IFWRAPPER_2I(op, intrin) \ template<int ImmT> \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return castps_si( intrin(castsi_ps(a), castsi_ps(b), ImmT) );\ } #define SIMD_IWRAPPER_2I_(op, intrin) \ template<int ImmT>\ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return _mm256_##intrin(a, b, ImmT);\ } #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) #define SIMD_IWRAPPER_3(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b, Integer c) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b, Integer const &c) \ {\ return _mm256_##op(a, b, c);\ } @@ -110,7 +110,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; // emulated integer simd #define SIMD_EMU_IWRAPPER_1(op) \ static SIMDINLINE \ - Integer SIMDCALL op(Integer a)\ + Integer SIMDCALL op(Integer const &a)\ {\ return Integer\ {\ @@ -120,7 +120,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; } #define SIMD_EMU_IWRAPPER_1L(op, shift) \ static SIMDINLINE \ - Integer SIMDCALL op(Integer a)\ + Integer SIMDCALL op(Integer const &a)\ {\ return Integer \ {\ @@ -129,7 +129,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; };\ }\ static SIMDINLINE \ - Integer SIMDCALL op(SIMD128Impl::Integer a)\ + Integer SIMDCALL op(SIMD128Impl::Integer const &a)\ {\ return Integer \ {\ @@ -140,7 +140,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_EMU_IWRAPPER_1I(op) \ template <int ImmT> static SIMDINLINE \ - Integer SIMDCALL op(Integer a)\ + Integer SIMDCALL op(Integer const &a)\ {\ return Integer\ {\ @@ -151,7 +151,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_EMU_IWRAPPER_2(op) \ static SIMDINLINE \ - Integer SIMDCALL op(Integer a, Integer b)\ + Integer SIMDCALL op(Integer const &a, Integer const &b)\ {\ return Integer\ {\ @@ -162,7 +162,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_EMU_IWRAPPER_2I(op) \ template <int ImmT> static SIMDINLINE \ - Integer SIMDCALL op(Integer a, Integer b)\ + Integer SIMDCALL op(Integer const &a, Integer const &b)\ {\ return Integer\ {\ @@ -177,12 +177,12 @@ using SIMD128T = SIMD128Impl::AVXImpl; SIMD_WRAPPER_2(add_ps); // return a + b SIMD_WRAPPER_2(div_ps); // return a / b -static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c) // return (a * b) + c +static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float const &c) // return (a * b) + c { return add_ps(mul_ps(a, b), c); } -static SIMDINLINE Float SIMDCALL fmsub_ps(Float a, Float b, Float c) // return (a * b) - c +static SIMDINLINE Float SIMDCALL fmsub_ps(Float const &a, Float const &b, Float const &c) // return (a * b) - c { return sub_ps(mul_ps(a, b), c); } @@ -195,13 +195,13 @@ SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a) SIMD_WRAPPER_2(sub_ps); // return a - b template <RoundMode RMT> -static SIMDINLINE Float SIMDCALL round_ps(Float a) +static SIMDINLINE Float SIMDCALL round_ps(Float const &a) { return _mm256_round_ps(a, static_cast<int>(RMT)); } -static SIMDINLINE Float SIMDCALL ceil_ps(Float a) { return round_ps<RoundMode::CEIL_NOEXC>(a); } -static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); } +static SIMDINLINE Float SIMDCALL ceil_ps(Float const &a) { return round_ps<RoundMode::CEIL_NOEXC>(a); } +static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); } //----------------------------------------------------------------------- // Integer (various width) arithmetic operations @@ -243,7 +243,7 @@ SIMD_EMU_IWRAPPER_2(xor_si); // return a ^ b (int) //----------------------------------------------------------------------- SIMD_EMU_IWRAPPER_1I(slli_epi32); // return a << ImmT -static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vCount) // return a << b (uint32) +static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer const &vA, Integer const &vCount) // return a << b (uint32) { int32_t aHi, aLow, countHi, countLow; __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1)); @@ -302,12 +302,12 @@ SIMD_EMU_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32) SIMD_EMU_IWRAPPER_1I(srli_si); // return a >> (ImmT*8) (uint) template<int ImmT> // same as srli_si, but with Float cast to int -static SIMDINLINE Float SIMDCALL srlisi_ps(Float a) +static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a) { return castsi_ps(srli_si<ImmT>(castps_si(a))); } -static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vCount) // return a >> b (uint32) +static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer const &vA, Integer const &vCount) // return a >> b (uint32) { int32_t aHi, aLow, countHi, countLow; __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1)); @@ -366,37 +366,37 @@ static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vCount) // ret //----------------------------------------------------------------------- // Conversion operations //----------------------------------------------------------------------- -static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a) +static SIMDINLINE Float SIMDCALL castpd_ps(Double const &a) // return *(Float*)(&a) { return _mm256_castpd_ps(a); } -static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a) +static SIMDINLINE Integer SIMDCALL castps_si(Float const &a) // return *(Integer*)(&a) { return _mm256_castps_si256(a); } -static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a) +static SIMDINLINE Double SIMDCALL castsi_pd(Integer const &a) // return *(Double*)(&a) { return _mm256_castsi256_pd(a); } -static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a) +static SIMDINLINE Double SIMDCALL castps_pd(Float const &a) // return *(Double*)(&a) { return _mm256_castps_pd(a); } -static SIMDINLINE Integer SIMDCALL castpd_si(Double a) // return *(Integer*)(&a) +static SIMDINLINE Integer SIMDCALL castpd_si(Double const &a) // return *(Integer*)(&a) { return _mm256_castpd_si256(a); } -static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a) +static SIMDINLINE Float SIMDCALL castsi_ps(Integer const &a) // return *(Float*)(&a) { return _mm256_castsi256_ps(a); } -static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a (int32 --> float) +static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer const &a) // return (float)a (int32 --> float) { return _mm256_cvtepi32_ps(a); } @@ -407,12 +407,12 @@ SIMD_EMU_IWRAPPER_1L(cvtepu16_epi32, 8); // return (int32)a ( SIMD_EMU_IWRAPPER_1L(cvtepu16_epi64, 4); // return (int64)a (uint16 --> int64) SIMD_EMU_IWRAPPER_1L(cvtepu32_epi64, 8); // return (int64)a (uint32 --> int64) -static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32) +static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float const &a) // return (int32)a (float --> int32) { return _mm256_cvtps_epi32(a); } -static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32) +static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a) // return (int32)a (rnd_to_zero(float) --> int32) { return _mm256_cvttps_epi32(a); } @@ -421,16 +421,16 @@ static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a) // return (in // Comparison operations //----------------------------------------------------------------------- template<CompareType CmpTypeT> -static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b +static SIMDINLINE Float SIMDCALL cmp_ps(Float const &a, Float const &b) // return a (CmpTypeT) b { return _mm256_cmp_ps(a, b, static_cast<const int>(CmpTypeT)); } -static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b) { return cmp_ps<CompareType::LT_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b) { return cmp_ps<CompareType::GT_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<CompareType::EQ_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmplt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LT_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GT_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::EQ_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpge_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GE_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmple_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LE_OQ>(a, b); } SIMD_EMU_IWRAPPER_2(cmpeq_epi8); // return a == b (int8) SIMD_EMU_IWRAPPER_2(cmpeq_epi16); // return a == b (int16) @@ -442,12 +442,12 @@ SIMD_EMU_IWRAPPER_2(cmpgt_epi32); // return a > b (int32) SIMD_EMU_IWRAPPER_2(cmpgt_epi64); // return a > b (int64) SIMD_EMU_IWRAPPER_2(cmplt_epi32); // return a < b (int32) -static SIMDINLINE bool SIMDCALL testz_ps(Float a, Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float) +static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b) // return all_lanes_zero(a & b) ? 1 : 0 (float) { return 0 != _mm256_testz_ps(a, b); } -static SIMDINLINE bool SIMDCALL testz_si(Integer a, Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int) +static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int) { return 0 != _mm256_testz_si256(a, b); } @@ -459,12 +459,12 @@ SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float) SIMD_IFWRAPPER_2I(blend_epi32, _mm256_blend_ps); // return ImmT ? b : a (int32) SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float) -static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask) // return mask ? b : a (int) +static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Float const &mask) // return mask ? b : a (int) { return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask)); } -static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Integer mask) // return mask ? b : a (int) +static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Integer const &mask) // return mask ? b : a (int) { return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask))); } @@ -479,7 +479,7 @@ SIMD_EMU_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 SIMD_EMU_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 SIMD_EMU_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 -static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (int32) +static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32) { Integer result; @@ -496,7 +496,7 @@ static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // ret return result; } -static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float) +static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float) { Float result; @@ -521,7 +521,7 @@ SIMD_IWRAPPER_2I_(permute2f128_si, permute2f128_si256); SIMD_EMU_IWRAPPER_1I(shuffle_epi32); template<int ImmT> -static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b) +static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const &a, Integer const &b) { return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b))); } @@ -545,7 +545,7 @@ SIMD_WRAPPER_2(unpacklo_ps); // Load / store operations //----------------------------------------------------------------------- template<ScaleFactor ScaleT> -static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) +static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) { uint32_t *pOffsets = (uint32_t*)&idx; Float vResult; @@ -587,7 +587,7 @@ static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (s // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old template<ScaleFactor ScaleT> -static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask) +static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask) { uint32_t *pOffsets = (uint32_t*)&idx; Float vResult = old; @@ -605,22 +605,22 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In return vResult; } -static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) +static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer const &mask, Float const &src) { _mm256_maskstore_ps(p, mask, src); } -static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const &a) { return SIMD128T::movemask_epi8(a.v4[0]) | (SIMD128T::movemask_epi8(a.v4[1]) << 16); } -static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a) +static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const &a) { return static_cast<uint32_t>(_mm256_movemask_pd(a)); } -static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a) +static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const &a) { return static_cast<uint32_t>(_mm256_movemask_ps(a)); } @@ -650,17 +650,17 @@ static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer) return _mm256_setzero_si256(); } -static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory) +static SIMDINLINE void SIMDCALL store_ps(float *p, Float const &a) // *p = a (stores all elements contiguously in memory) { _mm256_store_ps(p, a); } -static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a +static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer const &a) // *p = a { _mm256_store_si256(&p->v, a); } -static SIMDINLINE void SIMDCALL stream_ps(float *p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache) +static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a) // *p = a (same as store_ps, but doesn't keep memory in cache) { _mm256_stream_ps(p, a); } @@ -675,37 +675,37 @@ static SIMDINLINE Float SIMDCALL broadcast_ps(SIMD128Impl::Float const *p) } template<int ImmT> -static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double a) +static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double const &a) { return _mm256_extractf128_pd(a, ImmT); } template<int ImmT> -static SIMDINLINE SIMD128Impl::Float SIMDCALL extractf128_ps(Float a) +static SIMDINLINE SIMD128Impl::Float SIMDCALL extractf128_ps(Float const &a) { return _mm256_extractf128_ps(a, ImmT); } template<int ImmT> -static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer a) +static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer const &a) { return _mm256_extractf128_si256(a, ImmT); } template<int ImmT> -static SIMDINLINE Double SIMDCALL insertf128_pd(Double a, SIMD128Impl::Double b) +static SIMDINLINE Double SIMDCALL insertf128_pd(Double const &a, SIMD128Impl::Double const &b) { return _mm256_insertf128_pd(a, b, ImmT); } template<int ImmT> -static SIMDINLINE Float SIMDCALL insertf128_ps(Float a, SIMD128Impl::Float b) +static SIMDINLINE Float SIMDCALL insertf128_ps(Float const &a, SIMD128Impl::Float const &b) { return _mm256_insertf128_ps(a, b, ImmT); } template<int ImmT> -static SIMDINLINE Integer SIMDCALL insertf128_si(Integer a, SIMD128Impl::Integer b) +static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const &a, SIMD128Impl::Integer const &b) { return _mm256_insertf128_si256(a, b, ImmT); } @@ -736,7 +736,7 @@ static SIMDINLINE Float SIMDCALL set_ps(float i7, float i6, float i5, float i4, return _mm256_set_ps(i7, i6, i5, i4, i3, i2, i1, i0); } -static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer *phi, SIMD128Impl::Integer *plo, Integer src) +static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer *phi, SIMD128Impl::Integer *plo, Integer const &src) { _mm256_storeu2_m128i(&phi->v, &plo->v, src); } diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl index 0a812039300..9cd0a640025 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl @@ -33,53 +33,53 @@ //============================================================================ #define SIMD_IWRAPPER_1(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a) \ {\ return _mm256_##op(a);\ } #define SIMD_IWRAPPER_1L(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a) \ {\ return _mm256_##op(_mm256_castsi256_si128(a));\ }\ #define SIMD_IWRAPPER_1I(op) \ template<int ImmT> \ - static SIMDINLINE Integer SIMDCALL op(Integer a) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a) \ {\ return _mm256_##op(a, ImmT);\ } #define SIMD_IWRAPPER_1I_(op, intrin) \ template<int ImmT> \ - static SIMDINLINE Integer SIMDCALL op(Integer a) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a) \ {\ return _mm256_##intrin(a, ImmT);\ } #define SIMD_IWRAPPER_2_(op, intrin) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return _mm256_##intrin(a, b);\ } #define SIMD_IWRAPPER_2(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return _mm256_##op(a, b);\ } #define SIMD_IWRAPPER_2I(op) \ template<int ImmT> \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return _mm256_##op(a, b, ImmT);\ } #define SIMD_IWRAPPER_2I(op) \ template<int ImmT>\ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return _mm256_##op(a, b, ImmT);\ } @@ -87,7 +87,7 @@ //----------------------------------------------------------------------- // Floating point arithmetic operations //----------------------------------------------------------------------- -static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c) // return (a * b) + c +static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float const &c) // return (a * b) + c { return _mm256_fmadd_ps(a, b, c); } @@ -134,7 +134,7 @@ SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32) SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint) template<int ImmT> // same as srli_si, but with Float cast to int -static SIMDINLINE Float SIMDCALL srlisi_ps(Float a) +static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a) { return castsi_ps(srli_si<ImmT>(castps_si(a))); } @@ -161,7 +161,7 @@ SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16) SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32) SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64) -static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32) +static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const &a, Integer const &b) // return a < b (int32) { return cmpgt_epi32(b, a); } @@ -176,14 +176,14 @@ SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32); -static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float) +static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float) { return _mm256_permutevar8x32_ps(a, swiz); } SIMD_IWRAPPER_1I(shuffle_epi32); template<int ImmT> -static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b) +static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const &a, Integer const &b) { return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b))); } @@ -201,21 +201,21 @@ SIMD_IWRAPPER_2(unpacklo_epi8); // Load / store operations //----------------------------------------------------------------------- template<ScaleFactor ScaleT> -static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) +static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) { return _mm256_i32gather_ps(p, idx, static_cast<int>(ScaleT)); } // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old template<ScaleFactor ScaleT> -static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask) +static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask) { // g++ in debug mode needs the explicit .v suffix instead of relying on operator __m256() // Only for this intrinsic - not sure why. :( return _mm256_mask_i32gather_ps(old.v, p, idx.v, mask.v, static_cast<int>(ScaleT)); } -static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const &a) { return static_cast<uint32_t>(_mm256_movemask_epi8(a)); } diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl index c414d75d42e..d6af7b1c641 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl @@ -32,7 +32,7 @@ static const int TARGET_SIMD_WIDTH = 8; using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_WRAPPER_1(op) \ - static SIMDINLINE Float SIMDCALL op(Float a) \ + static SIMDINLINE Float SIMDCALL op(Float const &a) \ {\ return Float\ {\ @@ -42,7 +42,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; } #define SIMD_WRAPPER_2(op) \ - static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \ {\ return Float\ {\ @@ -53,7 +53,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_WRAPPER_2I(op) \ template<int ImmT>\ - static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \ {\ return Float\ {\ @@ -64,7 +64,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_WRAPPER_2I_1(op) \ template<int ImmT>\ - static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \ {\ return Float\ {\ @@ -74,7 +74,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; } #define SIMD_WRAPPER_3(op) \ - static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b, Float const &c) \ {\ return Float\ {\ @@ -84,7 +84,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; } #define SIMD_IWRAPPER_1(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a) \ {\ return Integer\ {\ @@ -94,7 +94,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; } #define SIMD_IWRAPPER_2(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return Integer\ {\ @@ -105,7 +105,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_IWRAPPER_2I(op) \ template<int ImmT>\ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return Integer\ {\ @@ -116,7 +116,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_IWRAPPER_2I_1(op) \ template<int ImmT>\ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return Integer\ {\ @@ -127,7 +127,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; #define SIMD_IWRAPPER_2I_2(op) \ template<int ImmT>\ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \ {\ return Integer\ {\ @@ -137,7 +137,7 @@ using SIMD128T = SIMD128Impl::AVXImpl; } #define SIMD_IWRAPPER_3(op) \ - static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b, Integer c) \ + static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b, Integer const &c) \ {\ return Integer\ {\ @@ -161,7 +161,7 @@ SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a) SIMD_WRAPPER_2(sub_ps); // return a - b template <RoundMode RMT> -static SIMDINLINE Float SIMDCALL round_ps(Float a) +static SIMDINLINE Float SIMDCALL round_ps(Float const &a) { return Float { @@ -170,8 +170,8 @@ static SIMDINLINE Float SIMDCALL round_ps(Float a) }; } -static SIMDINLINE Float SIMDCALL ceil_ps(Float a) { return round_ps<RoundMode::CEIL_NOEXC>(a); } -static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); } +static SIMDINLINE Float SIMDCALL ceil_ps(Float const &a) { return round_ps<RoundMode::CEIL_NOEXC>(a); } +static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); } //----------------------------------------------------------------------- // Integer (various width) arithmetic operations @@ -212,7 +212,7 @@ SIMD_IWRAPPER_2(xor_si); // return a ^ b (int) // Shift operations //----------------------------------------------------------------------- template<int ImmT> -static SIMDINLINE Integer SIMDCALL slli_epi32(Integer a) // return a << ImmT +static SIMDINLINE Integer SIMDCALL slli_epi32(Integer const &a) // return a << ImmT { return Integer { @@ -224,7 +224,7 @@ static SIMDINLINE Integer SIMDCALL slli_epi32(Integer a) // return a << Imm SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32) template<int ImmT> -static SIMDINLINE Integer SIMDCALL srai_epi32(Integer a) // return a >> ImmT (int32) +static SIMDINLINE Integer SIMDCALL srai_epi32(Integer const &a) // return a >> ImmT (int32) { return Integer { @@ -234,7 +234,7 @@ static SIMDINLINE Integer SIMDCALL srai_epi32(Integer a) // return a >> Imm } template<int ImmT> -static SIMDINLINE Integer SIMDCALL srli_epi32(Integer a) // return a >> ImmT (uint32) +static SIMDINLINE Integer SIMDCALL srli_epi32(Integer const &a) // return a >> ImmT (uint32) { return Integer { @@ -244,7 +244,7 @@ static SIMDINLINE Integer SIMDCALL srli_epi32(Integer a) // return a >> Imm } template<int ImmT> // for each 128-bit lane: -static SIMDINLINE Integer SIMDCALL srli_si(Integer a) // return a >> (ImmT*8) (uint) +static SIMDINLINE Integer SIMDCALL srli_si(Integer const &a) // return a >> (ImmT*8) (uint) { return Integer { @@ -253,7 +253,7 @@ static SIMDINLINE Integer SIMDCALL srli_si(Integer a) // return a >> (I }; } template<int ImmT> -static SIMDINLINE Float SIMDCALL srlisi_ps(Float a) // same as srli_si, but with Float cast to int +static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a) // same as srli_si, but with Float cast to int { return Float { @@ -267,7 +267,7 @@ SIMD_IWRAPPER_2(srlv_epi32); // return a >> b //----------------------------------------------------------------------- // Conversion operations //----------------------------------------------------------------------- -static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a) +static SIMDINLINE Float SIMDCALL castpd_ps(Double const &a) // return *(Float*)(&a) { return Float { @@ -276,7 +276,7 @@ static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Fl }; } -static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a) +static SIMDINLINE Integer SIMDCALL castps_si(Float const &a) // return *(Integer*)(&a) { return Integer { @@ -285,7 +285,7 @@ static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(I }; } -static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a) +static SIMDINLINE Double SIMDCALL castsi_pd(Integer const &a) // return *(Double*)(&a) { return Double { @@ -294,7 +294,7 @@ static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *( }; } -static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a) +static SIMDINLINE Double SIMDCALL castps_pd(Float const &a) // return *(Double*)(&a) { return Double { @@ -303,7 +303,7 @@ static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a) }; } -static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a) +static SIMDINLINE Float SIMDCALL castsi_ps(Integer const &a) // return *(Float*)(&a) { return Float { @@ -312,7 +312,7 @@ static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(F }; } -static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a (int32 --> float) +static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer const &a) // return (float)a (int32 --> float) { return Float { @@ -321,7 +321,7 @@ static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (fl }; } -static SIMDINLINE Integer SIMDCALL cvtepu8_epi16(SIMD256Impl::Integer a) // return (int16)a (uint8 --> int16) +static SIMDINLINE Integer SIMDCALL cvtepu8_epi16(SIMD256Impl::Integer const &a) // return (int16)a (uint8 --> int16) { return Integer { @@ -330,7 +330,7 @@ static SIMDINLINE Integer SIMDCALL cvtepu8_epi16(SIMD256Impl::Integer a) }; } -static SIMDINLINE Integer SIMDCALL cvtepu8_epi32(SIMD256Impl::Integer a) // return (int32)a (uint8 --> int32) +static SIMDINLINE Integer SIMDCALL cvtepu8_epi32(SIMD256Impl::Integer const &a) // return (int32)a (uint8 --> int32) { return Integer { @@ -339,7 +339,7 @@ static SIMDINLINE Integer SIMDCALL cvtepu8_epi32(SIMD256Impl::Integer a) }; } -static SIMDINLINE Integer SIMDCALL cvtepu16_epi32(SIMD256Impl::Integer a) // return (int32)a (uint16 --> int32) +static SIMDINLINE Integer SIMDCALL cvtepu16_epi32(SIMD256Impl::Integer const &a) // return (int32)a (uint16 --> int32) { return Integer { @@ -348,7 +348,7 @@ static SIMDINLINE Integer SIMDCALL cvtepu16_epi32(SIMD256Impl::Integer a) }; } -static SIMDINLINE Integer SIMDCALL cvtepu16_epi64(SIMD256Impl::Integer a) // return (int64)a (uint16 --> int64) +static SIMDINLINE Integer SIMDCALL cvtepu16_epi64(SIMD256Impl::Integer const &a) // return (int64)a (uint16 --> int64) { return Integer { @@ -357,7 +357,7 @@ static SIMDINLINE Integer SIMDCALL cvtepu16_epi64(SIMD256Impl::Integer a) }; } -static SIMDINLINE Integer SIMDCALL cvtepu32_epi64(SIMD256Impl::Integer a) // return (int64)a (uint32 --> int64) +static SIMDINLINE Integer SIMDCALL cvtepu32_epi64(SIMD256Impl::Integer const &a) // return (int64)a (uint32 --> int64) { return Integer { @@ -366,7 +366,7 @@ static SIMDINLINE Integer SIMDCALL cvtepu32_epi64(SIMD256Impl::Integer a) }; } -static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32) +static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float const &a) // return (int32)a (float --> int32) { return Integer { @@ -375,7 +375,7 @@ static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (in }; } -static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32) +static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a) // return (int32)a (rnd_to_zero(float) --> int32) { return Integer { @@ -388,7 +388,7 @@ static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a) // return (in // Comparison operations //----------------------------------------------------------------------- template<CompareType CmpTypeT> -static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b +static SIMDINLINE Float SIMDCALL cmp_ps(Float const &a, Float const &b) // return a (CmpTypeT) b { return Float { @@ -396,15 +396,15 @@ static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) SIMD256T::template cmp_ps<CmpTypeT>(a.v8[1], b.v8[1]), }; } -static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b) { return cmp_ps<CompareType::LT_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b) { return cmp_ps<CompareType::GT_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<CompareType::EQ_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); } -static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmplt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LT_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GT_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::EQ_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmpge_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GE_OQ>(a, b); } +static SIMDINLINE Float SIMDCALL cmple_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LE_OQ>(a, b); } template<CompareType CmpTypeT> -static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float a, Float b) +static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float const &a, Float const &b) { return static_cast<Mask>(movemask_ps(cmp_ps<CmpTypeT>(a, b))); } @@ -420,13 +420,13 @@ SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32) SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64) SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32) -static SIMDINLINE bool SIMDCALL testz_ps(Float a, Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float) +static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b) // return all_lanes_zero(a & b) ? 1 : 0 (float) { return 0 != (SIMD256T::testz_ps(a.v8[0], b.v8[0]) & SIMD256T::testz_ps(a.v8[1], b.v8[1])); } -static SIMDINLINE int SIMDCALL testz_si(Integer a, Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int) +static SIMDINLINE int SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int) { return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) & SIMD256T::testz_si(a.v8[1], b.v8[1])); @@ -438,7 +438,7 @@ static SIMDINLINE int SIMDCALL testz_si(Integer a, Integer b) // return all_lan SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float) SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32) SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float) -static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask) // return mask ? b : a (int) +static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Float const &mask) // return mask ? b : a (int) { return Integer { @@ -447,7 +447,7 @@ static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask }; } -static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Integer mask) // return mask ? b : a (int) +static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Integer const &mask) // return mask ? b : a (int) { return Integer { @@ -467,48 +467,51 @@ static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p) // return } template<int imm> -static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float a) +static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float const &a) { SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm); return a.v8[imm]; } template<int imm> -static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double a) +static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double const &a) { SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm); return a.v8[imm]; } template<int imm> -static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer a) +static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer const &a) { SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm); return a.v8[imm]; } template<int imm> -static SIMDINLINE Float SIMDCALL insert_ps(Float a, SIMD256Impl::Float b) +static SIMDINLINE Float SIMDCALL insert_ps(Float const &a, SIMD256Impl::Float const &b) { SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm); - a.v8[imm] = b; - return a; + Float r = a; + r.v8[imm] = b; + return r; } template<int imm> -static SIMDINLINE Double SIMDCALL insert_pd(Double a, SIMD256Impl::Double b) +static SIMDINLINE Double SIMDCALL insert_pd(Double const &a, SIMD256Impl::Double const &b) { SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm); - a.v8[imm] = b; - return a; + Double r = a; + r.v8[imm] = b; + return r; } template<int imm> -static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b) +static SIMDINLINE Integer SIMDCALL insert_si(Integer const &a, SIMD256Impl::Integer const &b) { SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm); - a.v8[imm] = b; - return a; + Integer r = a; + r.v8[imm] = b; + return r; } SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 @@ -516,7 +519,7 @@ SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 a SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 -static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (int32) +static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32) { Integer result; @@ -533,7 +536,7 @@ static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // ret return result; } -static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float) +static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float) { Float result; @@ -573,7 +576,7 @@ static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return // AVX instructions for emulation. // template <int shuf> -static SIMDINLINE Float SIMDCALL permute2f128_ps(Float a, Float b) +static SIMDINLINE Float SIMDCALL permute2f128_ps(Float const &a, Float const &b) { return Float { @@ -583,7 +586,7 @@ static SIMDINLINE Float SIMDCALL permute2f128_ps(Float a, Float b) } template <int shuf> -static SIMDINLINE Double SIMDCALL permute2f128_pd(Double a, Double b) +static SIMDINLINE Double SIMDCALL permute2f128_pd(Double const &a, Double const &b) { return Double { @@ -593,7 +596,7 @@ static SIMDINLINE Double SIMDCALL permute2f128_pd(Double a, Double b) } template <int shuf> -static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer a, Integer b) +static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const &a, Integer const &b) { return Integer { @@ -624,7 +627,7 @@ SIMD_WRAPPER_2(unpacklo_ps); // Load / store operations //----------------------------------------------------------------------- template<ScaleFactor ScaleT> -static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) +static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) { return Float { @@ -676,7 +679,7 @@ static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (s // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old template<ScaleFactor ScaleT> -static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask) +static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask) { return Float { @@ -685,13 +688,13 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In }; } -static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) +static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer const &mask, Float const &src) { SIMD256T::maskstore_ps(p, mask.v8[0], src.v8[0]); SIMD256T::maskstore_ps(p + TARGET_SIMD_WIDTH, mask.v8[1], src.v8[1]); } -static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a) +static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer const &a) { uint64_t mask = static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[0])); mask |= static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[1])) << (TARGET_SIMD_WIDTH * 4); @@ -699,14 +702,14 @@ static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a) return mask; } -static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a) +static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const &a) { uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[0])); mask |= static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[1])) << (TARGET_SIMD_WIDTH / 2); return mask; } -static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a) +static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const &a) { uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[0])); mask |= static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[1])) << TARGET_SIMD_WIDTH; @@ -759,19 +762,19 @@ static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer) }; } -static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory) +static SIMDINLINE void SIMDCALL store_ps(float *p, Float const &a) // *p = a (stores all elements contiguously in memory) { SIMD256T::store_ps(p, a.v8[0]); SIMD256T::store_ps(p + TARGET_SIMD_WIDTH, a.v8[1]); } -static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a +static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer const &a) // *p = a { SIMD256T::store_si(&p->v8[0], a.v8[0]); SIMD256T::store_si(&p->v8[1], a.v8[1]); } -static SIMDINLINE void SIMDCALL stream_ps(float *p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache) +static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a) // *p = a (same as store_ps, but doesn't keep memory in cache) { SIMD256T::stream_ps(p, a.v8[0]); SIMD256T::stream_ps(p + TARGET_SIMD_WIDTH, a.v8[1]); diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp index 236257fed84..0fad0e1fd8c 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp @@ -153,9 +153,9 @@ namespace SIMDImpl Float z; Float w; }; - SIMDINLINE Float& operator[] (const int i) { return v[i]; } - SIMDINLINE Float const & operator[] (const int i) const { return v[i]; } - SIMDINLINE Vec4& operator=(Vec4 const & in) + SIMDINLINE Float& SIMDCALL operator[] (const int i) { return v[i]; } + SIMDINLINE Float const & SIMDCALL operator[] (const int i) const { return v[i]; } + SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const & in) { v[0] = in.v[0]; v[1] = in.v[1]; @@ -171,9 +171,9 @@ namespace SIMDImpl { SIMDINLINE Float() = default; SIMDINLINE Float(__m128 in) : v(in) {} - SIMDINLINE Float& operator=(__m128 in) { v = in; return *this; } - SIMDINLINE Float& operator=(Float const & in) { v = in.v; return *this; } - SIMDINLINE operator __m128() const { return v; } + SIMDINLINE Float& SIMDCALL operator=(__m128 in) { v = in; return *this; } + SIMDINLINE Float& SIMDCALL operator=(Float const & in) { v = in.v; return *this; } + SIMDINLINE SIMDCALL operator __m128() const { return v; } SIMDALIGN(__m128, 16) v; }; @@ -182,9 +182,10 @@ namespace SIMDImpl { SIMDINLINE Integer() = default; SIMDINLINE Integer(__m128i in) : v(in) {} - SIMDINLINE Integer& operator=(__m128i in) { v = in; return *this; } - SIMDINLINE Integer& operator=(Integer const & in) { v = in.v; return *this; } - SIMDINLINE operator __m128i() const { return v; } + SIMDINLINE Integer& SIMDCALL operator=(__m128i in) { v = in; return *this; } + SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { v = in.v; return *this; } + SIMDINLINE SIMDCALL operator __m128i() const { return v; } + SIMDALIGN(__m128i, 16) v; }; @@ -192,9 +193,10 @@ namespace SIMDImpl { SIMDINLINE Double() = default; SIMDINLINE Double(__m128d in) : v(in) {} - SIMDINLINE Double& operator=(__m128d in) { v = in; return *this; } - SIMDINLINE Double& operator=(Double const & in) { v = in.v; return *this; } - SIMDINLINE operator __m128d() const { return v; } + SIMDINLINE Double& SIMDCALL operator=(__m128d in) { v = in; return *this; } + SIMDINLINE Double& SIMDCALL operator=(Double const & in) { v = in.v; return *this; } + SIMDINLINE SIMDCALL operator __m128d() const { return v; } + SIMDALIGN(__m128d, 16) v; }; @@ -210,13 +212,13 @@ namespace SIMDImpl { SIMDINLINE Float() = default; SIMDINLINE Float(__m256 in) : v(in) {} - SIMDINLINE Float(SIMD128Impl::Float in_lo, SIMD128Impl::Float in_hi = _mm_setzero_ps()) + SIMDINLINE Float(SIMD128Impl::Float const &in_lo, SIMD128Impl::Float const &in_hi = _mm_setzero_ps()) { v = _mm256_insertf128_ps(_mm256_castps128_ps256(in_lo), in_hi, 0x1); } - SIMDINLINE Float& operator=(__m256 in) { v = in; return *this; } - SIMDINLINE Float& operator=(Float const & in) { v = in.v; return *this; } - SIMDINLINE operator __m256() const { return v; } + SIMDINLINE Float& SIMDCALL operator=(__m256 in) { v = in; return *this; } + SIMDINLINE Float& SIMDCALL operator=(Float const & in) { v = in.v; return *this; } + SIMDINLINE SIMDCALL operator __m256() const { return v; } SIMDALIGN(__m256, 32) v; SIMD128Impl::Float v4[2]; @@ -226,13 +228,13 @@ namespace SIMDImpl { SIMDINLINE Integer() = default; SIMDINLINE Integer(__m256i in) : v(in) {} - SIMDINLINE Integer(SIMD128Impl::Integer in_lo, SIMD128Impl::Integer in_hi = _mm_setzero_si128()) + SIMDINLINE Integer(SIMD128Impl::Integer const &in_lo, SIMD128Impl::Integer const &in_hi = _mm_setzero_si128()) { v = _mm256_insertf128_si256(_mm256_castsi128_si256(in_lo), in_hi, 0x1); } - SIMDINLINE Integer& operator=(__m256i in) { v = in; return *this; } - SIMDINLINE Integer& operator=(Integer const & in) { v = in.v; return *this; } - SIMDINLINE operator __m256i() const { return v; } + SIMDINLINE Integer& SIMDCALL operator=(__m256i in) { v = in; return *this; } + SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { v = in.v; return *this; } + SIMDINLINE SIMDCALL operator __m256i() const { return v; } SIMDALIGN(__m256i, 32) v; SIMD128Impl::Integer v4[2]; @@ -241,14 +243,14 @@ namespace SIMDImpl union Double { SIMDINLINE Double() = default; - SIMDINLINE Double(__m256d in) : v(in) {} - SIMDINLINE Double(SIMD128Impl::Double in_lo, SIMD128Impl::Double in_hi = _mm_setzero_pd()) + SIMDINLINE Double(__m256d const &in) : v(in) {} + SIMDINLINE Double(SIMD128Impl::Double const &in_lo, SIMD128Impl::Double const &in_hi = _mm_setzero_pd()) { v = _mm256_insertf128_pd(_mm256_castpd128_pd256(in_lo), in_hi, 0x1); } - SIMDINLINE Double& operator=(__m256d in) { v = in; return *this; } - SIMDINLINE Double& operator=(Double const & in) { v = in.v; return *this; } - SIMDINLINE operator __m256d() const { return v; } + SIMDINLINE Double& SIMDCALL operator=(__m256d in) { v = in; return *this; } + SIMDINLINE Double& SIMDCALL operator=(Double const & in) { v = in.v; return *this; } + SIMDINLINE SIMDCALL operator __m256d() const { return v; } SIMDALIGN(__m256d, 32) v; SIMD128Impl::Double v4[2]; @@ -303,9 +305,9 @@ namespace SIMDImpl { SIMDINLINE Float() = default; SIMDINLINE Float(__m512 in) : v(in) {} - SIMDINLINE Float(SIMD256Impl::Float in_lo, SIMD256Impl::Float in_hi = _mm256_setzero_ps()) { v8[0] = in_lo; v8[1] = in_hi; } - SIMDINLINE Float& operator=(__m512 in) { v = in; return *this; } - SIMDINLINE Float& operator=(Float const & in) + SIMDINLINE Float(SIMD256Impl::Float const &in_lo, SIMD256Impl::Float const &in_hi = _mm256_setzero_ps()) { v8[0] = in_lo; v8[1] = in_hi; } + SIMDINLINE Float& SIMDCALL operator=(__m512 in) { v = in; return *this; } + SIMDINLINE Float& SIMDCALL operator=(Float const & in) { #if SIMD_ARCH >= SIMD_ARCH_AVX512 v = in.v; @@ -315,7 +317,7 @@ namespace SIMDImpl #endif return *this; } - SIMDINLINE operator __m512() const { return v; } + SIMDINLINE SIMDCALL operator __m512() const { return v; } SIMDALIGN(__m512, SIMD_ALIGNMENT_BYTES) v; SIMD256Impl::Float v8[2]; @@ -325,9 +327,9 @@ namespace SIMDImpl { SIMDINLINE Integer() = default; SIMDINLINE Integer(__m512i in) : v(in) {} - SIMDINLINE Integer(SIMD256Impl::Integer in_lo, SIMD256Impl::Integer in_hi = _mm256_setzero_si256()) { v8[0] = in_lo; v8[1] = in_hi; } - SIMDINLINE Integer& operator=(__m512i in) { v = in; return *this; } - SIMDINLINE Integer& operator=(Integer const & in) + SIMDINLINE Integer(SIMD256Impl::Integer const &in_lo, SIMD256Impl::Integer const &in_hi = _mm256_setzero_si256()) { v8[0] = in_lo; v8[1] = in_hi; } + SIMDINLINE Integer& SIMDCALL operator=(__m512i in) { v = in; return *this; } + SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { #if SIMD_ARCH >= SIMD_ARCH_AVX512 v = in.v; @@ -338,7 +340,7 @@ namespace SIMDImpl return *this; } - SIMDINLINE operator __m512i() const { return v; } + SIMDINLINE SIMDCALL operator __m512i() const { return v; } SIMDALIGN(__m512i, SIMD_ALIGNMENT_BYTES) v; SIMD256Impl::Integer v8[2]; @@ -348,9 +350,9 @@ namespace SIMDImpl { SIMDINLINE Double() = default; SIMDINLINE Double(__m512d in) : v(in) {} - SIMDINLINE Double(SIMD256Impl::Double in_lo, SIMD256Impl::Double in_hi = _mm256_setzero_pd()) { v8[0] = in_lo; v8[1] = in_hi; } - SIMDINLINE Double& operator=(__m512d in) { v = in; return *this; } - SIMDINLINE Double& operator=(Double const & in) + SIMDINLINE Double(SIMD256Impl::Double const &in_lo, SIMD256Impl::Double const &in_hi = _mm256_setzero_pd()) { v8[0] = in_lo; v8[1] = in_hi; } + SIMDINLINE Double& SIMDCALL operator=(__m512d in) { v = in; return *this; } + SIMDINLINE Double& SIMDCALL operator=(Double const & in) { #if SIMD_ARCH >= SIMD_ARCH_AVX512 v = in.v; @@ -361,7 +363,7 @@ namespace SIMDImpl return *this; } - SIMDINLINE operator __m512d() const { return v; } + SIMDINLINE SIMDCALL operator __m512d() const { return v; } SIMDALIGN(__m512d, SIMD_ALIGNMENT_BYTES) v; SIMD256Impl::Double v8[2]; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 97ca0ef1ef9..0f430ef3abc 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -50,7 +50,7 @@ static const __m256 vULOffsetsY = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0} #define MASK 0xff #endif -static INLINE simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar vI, simdscalar vJ) +static INLINE simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar const &vI, simdscalar const &vJ) { simdscalar vClipMask = _simd_setzero_ps(); uint32_t numClipDistance = _mm_popcnt_u32(clipMask); @@ -338,7 +338,7 @@ struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> template<typename T> INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos, const uint64_t *const coverageMask, const uint32_t sampleMask, - const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL) + simdscalar const &vXSamplePosUL, simdscalar const &vYSamplePosUL) { uint32_t inputMask[KNOB_SIMD_WIDTH]; generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask); @@ -412,7 +412,7 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS } INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext, - const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL) + const simdscalar &vXSamplePosUL, const simdscalar &vYSamplePosUL) { // evaluate I,J psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid); @@ -424,7 +424,7 @@ INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CON psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid); } -INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar z, float minz, float maxz) +INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const &z, float minz, float maxz) { const simdscalar minzMask = _simd_cmpge_ps(z, _simd_set1_ps(minz)); const simdscalar maxzMask = _simd_cmple_ps(z, _simd_set1_ps(maxz)); @@ -711,7 +711,7 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P // Merge Output to 4x2 SIMD Tile Format INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, uint32_t renderTargetMask) + const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -777,7 +777,7 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW #if USE_8x2_TILE_BACKEND // Merge Output to 8x2 SIMD16 Tile Format INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset) + const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index c1f0f07804f..9fe1b017117 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -36,12 +36,12 @@ #include "tilemgr.h" // Function Prototype -void BinPostSetupLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], simdscalar vRecipW[2], uint32_t primMask, simdscalari primID, simdscalari viewportIdx); -void BinPostSetupPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primID, simdscalari viewportIdx); +void BinPostSetupLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], simdscalar vRecipW[2], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); +void BinPostSetupPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); #if USE_SIMD16_FRONTEND -void BinPostSetupLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], simd16scalar vRecipW[2], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx); -void BinPostSetupPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx); +void BinPostSetupLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], simd16scalar vRecipW[2], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); +void BinPostSetupPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); #endif ////////////////////////////////////////////////////////////////////////// @@ -433,7 +433,7 @@ void BinTriangles( uint32_t workerId, simdvector tri[3], uint32_t triMask, - simdscalari primID) + simdscalari const &primID) { SWR_CONTEXT *pContext = pDC->pContext; @@ -878,7 +878,7 @@ void SIMDCALL BinTriangles_simd16( uint32_t workerId, simd16vector tri[3], uint32_t triMask, - simd16scalari primID) + simd16scalari const &primID) { SWR_CONTEXT *pContext = pDC->pContext; @@ -1386,8 +1386,8 @@ void BinPostSetupPoints( uint32_t workerId, simdvector prim[], uint32_t primMask, - simdscalari primID, - simdscalari viewportIdx) + simdscalari const &primID, + simdscalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -1703,7 +1703,7 @@ void BinPoints( uint32_t workerId, simdvector prim[3], uint32_t primMask, - simdscalari primID) + simdscalari const &primID) { simdvector& primVerts = prim[0]; @@ -1767,8 +1767,8 @@ void BinPostSetupPoints_simd16( uint32_t workerId, simd16vector prim[], uint32_t primMask, - simd16scalari primID, - simd16scalari viewportIdx) + simd16scalari const &primID, + simd16scalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -2086,7 +2086,7 @@ void SIMDCALL BinPoints_simd16( uint32_t workerId, simd16vector prim[3], uint32_t primMask, - simd16scalari primID) + simd16scalari const &primID) { simd16vector& primVerts = prim[0]; @@ -2160,8 +2160,8 @@ void BinPostSetupLines( simdvector prim[], simdscalar recipW[], uint32_t primMask, - simdscalari primID, - simdscalari viewportIdx) + simdscalari const &primID, + simdscalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -2365,8 +2365,8 @@ void BinPostSetupLines_simd16( simd16vector prim[], simd16scalar recipW[], uint32_t primMask, - simd16scalari primID, - simd16scalari viewportIdx) + simd16scalari const &primID, + simd16scalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -2596,7 +2596,7 @@ void BinLines( uint32_t workerId, simdvector prim[], uint32_t primMask, - simdscalari primID) + simdscalari const &primID) { const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -2670,7 +2670,7 @@ void SIMDCALL BinLines_simd16( uint32_t workerId, simd16vector prim[3], uint32_t primMask, - simd16scalari primID) + simd16scalari const &primID) { const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.h b/src/gallium/drivers/swr/rasterizer/core/binner.h index 875e0b70356..16161431d14 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.h +++ b/src/gallium/drivers/swr/rasterizer/core/binner.h @@ -49,7 +49,7 @@ static const simd16scalar g_pixelOffsets_simd16[SWR_PIXEL_LOCATION_UL + 1] = /// @brief Convert the X,Y coords of a triangle to the requested Fixed /// Point precision from FP32. template <typename PT = FixedPointTraits<Fixed_16_8>> -INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn) +INLINE simdscalari fpToFixedPointVertical(const simdscalar &vIn) { simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::ScaleT::value)); return _simd_cvtps_epi32(vFixed); @@ -57,7 +57,7 @@ INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn) #if USE_SIMD16_FRONTEND template <typename PT = FixedPointTraits<Fixed_16_8>> -INLINE simd16scalari fpToFixedPointVertical(const simd16scalar vIn) +INLINE simd16scalari fpToFixedPointVertical(const simd16scalar &vIn) { simd16scalar vFixed = _simd16_mul_ps(vIn, _simd16_set1_ps(PT::ScaleT::value)); return _simd16_cvtps_epi32(vFixed); diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index bf542f18c18..4b5512ccc92 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -160,7 +160,7 @@ int ClipTriToPlane( const float *pInPts, int numInPts, return i; } -void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId) +void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipTriangles, pDC->drawId); @@ -169,7 +169,7 @@ void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvecto AR_END(FEClipTriangles, 1); } -void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId) +void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipLines, pDC->drawId); @@ -178,7 +178,7 @@ void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector pr AR_END(FEClipLines, 1); } -void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId) +void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipPoints, pDC->drawId); @@ -188,7 +188,7 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p } #if USE_SIMD16_FRONTEND -void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId) +void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipTriangles, pDC->drawId); @@ -203,7 +203,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor AR_END(FEClipTriangles, 1); } -void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId) +void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipLines, pDC->drawId); @@ -218,7 +218,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI AR_END(FEClipLines, 1); } -void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId) +void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipPoints, pDC->drawId); diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index ca6596eafbf..ffc69c42298 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -62,7 +62,7 @@ enum SWR_CLIPCODES #define GUARDBAND_CLIP_MASK (FRUSTUM_NEAR|FRUSTUM_FAR|GUARDBAND_LEFT|GUARDBAND_TOP|GUARDBAND_RIGHT|GUARDBAND_BOTTOM|NEGW) INLINE -void ComputeClipCodes(const API_STATE& state, const simdvector& vertex, simdscalar& clipCodes, simdscalari viewportIndexes) +void ComputeClipCodes(const API_STATE& state, const simdvector& vertex, simdscalar& clipCodes, simdscalari const &viewportIndexes) { clipCodes = _simd_setzero_ps(); @@ -131,7 +131,7 @@ void ComputeClipCodes(const API_STATE& state, const simdvector& vertex, simdscal #if USE_SIMD16_FRONTEND INLINE -void ComputeClipCodes(const API_STATE& state, const simd16vector& vertex, simd16scalar& clipCodes, simd16scalari viewportIndexes) +void ComputeClipCodes(const API_STATE& state, const simd16vector& vertex, simd16scalar& clipCodes, simd16scalari const &viewportIndexes) { clipCodes = _simd16_setzero_ps(); @@ -203,13 +203,13 @@ template<uint32_t NumVertsPerPrim> class Clipper { public: - Clipper(uint32_t in_workerId, DRAW_CONTEXT* in_pDC) : + INLINE Clipper(uint32_t in_workerId, DRAW_CONTEXT* in_pDC) : workerId(in_workerId), pDC(in_pDC), state(GetApiState(in_pDC)) { static_assert(NumVertsPerPrim >= 1 && NumVertsPerPrim <= 3, "Invalid NumVertsPerPrim"); } - void ComputeClipCodes(simdvector vertex[], simdscalari viewportIndexes) + INLINE void ComputeClipCodes(simdvector vertex[], simdscalari const &viewportIndexes) { for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { @@ -218,7 +218,7 @@ public: } #if USE_SIMD16_FRONTEND - void ComputeClipCodes(simd16vector vertex[], simd16scalari viewportIndexes) + INLINE void ComputeClipCodes(simd16vector vertex[], simd16scalari const &viewportIndexes) { for (uint32_t i = 0; i < NumVertsPerPrim; ++i) { @@ -227,7 +227,7 @@ public: } #endif - simdscalar ComputeClipCodeIntersection() + INLINE simdscalar ComputeClipCodeIntersection() { simdscalar result = this->clipCodes[0]; for (uint32_t i = 1; i < NumVertsPerPrim; ++i) @@ -238,7 +238,7 @@ public: } #if USE_SIMD16_FRONTEND - simd16scalar ComputeClipCodeIntersection_simd16() + INLINE simd16scalar ComputeClipCodeIntersection_simd16() { simd16scalar result = this->clipCodes_simd16[0]; for (uint32_t i = 1; i < NumVertsPerPrim; ++i) @@ -249,7 +249,7 @@ public: } #endif - simdscalar ComputeClipCodeUnion() + INLINE simdscalar ComputeClipCodeUnion() { simdscalar result = this->clipCodes[0]; for (uint32_t i = 1; i < NumVertsPerPrim; ++i) @@ -260,7 +260,7 @@ public: } #if USE_SIMD16_FRONTEND - simd16scalar ComputeClipCodeUnion_simd16() + INLINE simd16scalar ComputeClipCodeUnion_simd16() { simd16scalar result = this->clipCodes_simd16[0]; for (uint32_t i = 1; i < NumVertsPerPrim; ++i) @@ -271,14 +271,14 @@ public: } #endif - int ComputeNegWMask() + INLINE int ComputeNegWMask() { simdscalar clipCodeUnion = ComputeClipCodeUnion(); clipCodeUnion = _simd_and_ps(clipCodeUnion, _simd_castsi_ps(_simd_set1_epi32(NEGW))); return _simd_movemask_ps(_simd_cmpneq_ps(clipCodeUnion, _simd_setzero_ps())); } - int ComputeClipMask() + INLINE int ComputeClipMask() { simdscalar clipUnion = ComputeClipCodeUnion(); clipUnion = _simd_and_ps(clipUnion, _simd_castsi_ps(_simd_set1_epi32(GUARDBAND_CLIP_MASK))); @@ -286,7 +286,7 @@ public: } #if USE_SIMD16_FRONTEND - int ComputeClipMask_simd16() + INLINE int ComputeClipMask_simd16() { simd16scalar clipUnion = ComputeClipCodeUnion_simd16(); clipUnion = _simd16_and_ps(clipUnion, _simd16_castsi_ps(_simd16_set1_epi32(GUARDBAND_CLIP_MASK))); @@ -295,7 +295,7 @@ public: #endif // clipper is responsible for culling any prims with NAN coordinates - int ComputeNaNMask(simdvector prim[]) + INLINE int ComputeNaNMask(simdvector prim[]) { simdscalar vNanMask = _simd_setzero_ps(); for (uint32_t e = 0; e < NumVertsPerPrim; ++e) @@ -310,7 +310,7 @@ public: } #if USE_SIMD16_FRONTEND - int ComputeNaNMask(simd16vector prim[]) + INLINE int ComputeNaNMask(simd16vector prim[]) { simd16scalar vNanMask = _simd16_setzero_ps(); for (uint32_t e = 0; e < NumVertsPerPrim; ++e) @@ -325,7 +325,7 @@ public: } #endif - int ComputeUserClipCullMask(PA_STATE& pa, simdvector prim[]) + INLINE int ComputeUserClipCullMask(PA_STATE& pa, simdvector prim[]) { uint8_t cullMask = this->state.rastState.cullDistanceMask; simdscalar vClipCullMask = _simd_setzero_ps(); @@ -391,7 +391,7 @@ public: } #if USE_SIMD16_FRONTEND - int ComputeUserClipCullMask(PA_STATE& pa, simd16vector prim[]) + INLINE int ComputeUserClipCullMask(PA_STATE& pa, simd16vector prim[]) { uint8_t cullMask = this->state.rastState.cullDistanceMask; simd16scalar vClipCullMask = _simd16_setzero_ps(); @@ -459,7 +459,7 @@ public: #endif // clip SIMD primitives - void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId) + INLINE void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId) { // input/output vertex store for clipper simdvertex vertices[7]; // maximum 7 verts generated per triangle @@ -943,7 +943,7 @@ public: #endif // execute the clipper stage - void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId) + void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari const &primId) { SWR_ASSERT(this->pDC != nullptr); SWR_CONTEXT* pContext = this->pDC->pContext; @@ -1027,7 +1027,7 @@ public: } #if USE_SIMD16_FRONTEND - void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId) + void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari const &primId) { SWR_ASSERT(pa.pDC != nullptr); SWR_CONTEXT* pContext = pa.pDC->pContext; @@ -1110,19 +1110,19 @@ public: #endif private: - inline simdscalar ComputeInterpFactor(simdscalar boundaryCoord0, simdscalar boundaryCoord1) + inline simdscalar ComputeInterpFactor(simdscalar const &boundaryCoord0, simdscalar const &boundaryCoord1) { return _simd_div_ps(boundaryCoord0, _simd_sub_ps(boundaryCoord0, boundaryCoord1)); } #if USE_SIMD16_FRONTEND - inline simd16scalar ComputeInterpFactor(simd16scalar boundaryCoord0, simd16scalar boundaryCoord1) + inline simd16scalar ComputeInterpFactor(simd16scalar const &boundaryCoord0, simd16scalar const &boundaryCoord1) { return _simd16_div_ps(boundaryCoord0, _simd16_sub_ps(boundaryCoord0, boundaryCoord1)); } #endif - inline simdscalari ComputeOffsets(uint32_t attrib, simdscalari vIndices, uint32_t component) + inline simdscalari ComputeOffsets(uint32_t attrib, simdscalari const &vIndices, uint32_t component) { const uint32_t simdVertexStride = sizeof(simdvertex); const uint32_t componentStride = sizeof(simdscalar); @@ -1143,7 +1143,7 @@ private: } #if USE_SIMD16_FRONTEND - inline simd16scalari ComputeOffsets(uint32_t attrib, simd16scalari vIndices, uint32_t component) + inline simd16scalari ComputeOffsets(uint32_t attrib, simd16scalari const &vIndices, uint32_t component) { const uint32_t simdVertexStride = sizeof(simd16vertex); const uint32_t componentStride = sizeof(simd16scalar); @@ -1168,7 +1168,7 @@ private: #endif // gathers a single component for a given attribute for each SIMD lane - inline simdscalar GatherComponent(const float* pBuffer, uint32_t attrib, simdscalar vMask, simdscalari vIndices, uint32_t component) + inline simdscalar GatherComponent(const float* pBuffer, uint32_t attrib, simdscalar const &vMask, simdscalari const &vIndices, uint32_t component) { simdscalari vOffsets = ComputeOffsets(attrib, vIndices, component); simdscalar vSrc = _mm256_undefined_ps(); @@ -1176,7 +1176,7 @@ private: } #if USE_SIMD16_FRONTEND - inline simd16scalar GatherComponent(const float* pBuffer, uint32_t attrib, simd16scalar vMask, simd16scalari vIndices, uint32_t component) + inline simd16scalar GatherComponent(const float* pBuffer, uint32_t attrib, simd16scalar const &vMask, simd16scalari const &vIndices, uint32_t component) { simd16scalari vOffsets = ComputeOffsets(attrib, vIndices, component); simd16scalar vSrc = _simd16_setzero_ps(); @@ -1184,7 +1184,7 @@ private: } #endif - inline void ScatterComponent(const float* pBuffer, uint32_t attrib, simdscalar vMask, simdscalari vIndices, uint32_t component, simdscalar vSrc) + inline void ScatterComponent(const float* pBuffer, uint32_t attrib, simdscalar const &vMask, simdscalari const &vIndices, uint32_t component, simdscalar const &vSrc) { simdscalari vOffsets = ComputeOffsets(attrib, vIndices, component); @@ -1201,7 +1201,7 @@ private: } #if USE_SIMD16_FRONTEND - inline void ScatterComponent(const float* pBuffer, uint32_t attrib, simd16scalar vMask, simd16scalari vIndices, uint32_t component, simd16scalar vSrc) + inline void ScatterComponent(const float* pBuffer, uint32_t attrib, simd16scalar const &vMask, simd16scalari const &vIndices, uint32_t component, simd16scalar const &vSrc) { simd16scalari vOffsets = ComputeOffsets(attrib, vIndices, component); @@ -1891,12 +1891,12 @@ private: // pipeline stage functions -void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId); -void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId); -void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId); +void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId); +void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId); +void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId); #if USE_SIMD16_FRONTEND -void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId); -void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId); -void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId); +void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId); +void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId); +void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId); #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index a694f2d4112..131b3cbbb03 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -214,12 +214,12 @@ struct PA_STATE; // function signature for pipeline stages that execute after primitive assembly typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], - uint32_t primMask, simdscalari primID); + uint32_t primMask, simdscalari const &primID); #if ENABLE_AVX512_SIMD16 // function signature for pipeline stages that execute after primitive assembly typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], - uint32_t primMask, simd16scalari primID); + uint32_t primMask, simd16scalari const &primID); #endif OSALIGNLINE(struct) API_STATE @@ -343,11 +343,11 @@ struct BarycentricCoeffs // pipeline function pointer types typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&); typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*, - const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar); + const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar const &); typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &); typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&); typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t, - const simdscalar, const simdscalar); + simdscalar const &, simdscalar const &); struct BACKEND_FUNCS { diff --git a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h index 590c569030a..fafc36d1de6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h +++ b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h @@ -30,7 +30,7 @@ #include "format_conversion.h" INLINE -void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simdscalar &stencilps) +void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps) { simdscalari stencil = _simd_castps_si(stencilps); @@ -81,7 +81,7 @@ void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simds template<SWR_FORMAT depthFormatT> -simdscalar QuantizeDepth(simdscalar depth) +simdscalar QuantizeDepth(simdscalar const &depth) { SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0); uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0); @@ -117,7 +117,7 @@ simdscalar QuantizeDepth(simdscalar depth) INLINE simdscalar DepthStencilTest(const API_STATE* pState, - bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, + bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask, uint8_t *pStencilBase, simdscalar* pStencilMask) { static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format"); @@ -132,7 +132,7 @@ simdscalar DepthStencilTest(const API_STATE* pState, // clamp Z to viewport [minZ..maxZ] simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); - interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ)); + simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ)); if (pDSState->depthTestEnable) { @@ -215,7 +215,7 @@ simdscalar DepthStencilTest(const API_STATE* pState, INLINE void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState, - bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask, + bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask, uint8_t *pStencilBase, const simdscalar& stencilMask) { if (pDSState->depthWriteEnable) @@ -223,7 +223,7 @@ void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_ST // clamp Z to viewport [minZ..maxZ] simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); - interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ)); + simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ)); simdscalar vMask = _simd_and_ps(depthMask, coverageMask); _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ); diff --git a/src/gallium/drivers/swr/rasterizer/core/format_conversion.h b/src/gallium/drivers/swr/rasterizer/core/format_conversion.h index 4e642f8c2e4..72843f59062 100644 --- a/src/gallium/drivers/swr/rasterizer/core/format_conversion.h +++ b/src/gallium/drivers/swr/rasterizer/core/format_conversion.h @@ -79,8 +79,9 @@ INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst) /// @param vComp - SIMD vector of floats /// @param Component - component template<SWR_FORMAT Format> -INLINE simdscalar Clamp(simdscalar vComp, uint32_t Component) +INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component) { + simdscalar vComp = vC; if (FormatTraits<Format>::isNormalized(Component)) { if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM) @@ -125,8 +126,9 @@ INLINE simdscalar Clamp(simdscalar vComp, uint32_t Component) /// @param vComp - SIMD vector of floats /// @param Component - component template<SWR_FORMAT Format> -INLINE simdscalar Normalize(simdscalar vComp, uint32_t Component) +INLINE simdscalar Normalize(simdscalar const &vC, uint32_t Component) { + simdscalar vComp = vC; if (FormatTraits<Format>::isNormalized(Component)) { vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<Format>::fromFloat(Component))); @@ -247,8 +249,9 @@ INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst) /// @param vComp - SIMD vector of floats /// @param Component - component template<SWR_FORMAT Format> -INLINE simd16scalar SIMDCALL Clamp(simd16scalar vComp, uint32_t Component) +INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component) { + simd16scalar vComp = v; if (FormatTraits<Format>::isNormalized(Component)) { if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM) @@ -293,14 +296,15 @@ INLINE simd16scalar SIMDCALL Clamp(simd16scalar vComp, uint32_t Component) /// @param vComp - SIMD vector of floats /// @param Component - component template<SWR_FORMAT Format> -INLINE simd16scalar SIMDCALL Normalize(simd16scalar vComp, uint32_t Component) +INLINE simd16scalar SIMDCALL Normalize(simd16scalar const &vComp, uint32_t Component) { + simd16scalar r = vComp; if (FormatTraits<Format>::isNormalized(Component)) { - vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<Format>::fromFloat(Component))); - vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp)); + r = _simd16_mul_ps(r, _simd16_set1_ps(FormatTraits<Format>::fromFloat(Component))); + r = _simd16_castsi_ps(_simd16_cvtps_epi32(r)); } - return vComp; + return r; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h b/src/gallium/drivers/swr/rasterizer/core/format_types.h index 43053b646f6..c3327c1d40b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/format_types.h +++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h @@ -38,12 +38,12 @@ struct PackTraits { static const uint32_t MyNumBits = NumBits; static simdscalar loadSOA(const uint8_t *pSrc) = delete; - static void storeSOA(uint8_t *pDst, simdscalar src) = delete; + static void storeSOA(uint8_t *pDst, simdscalar const &src) = delete; static simdscalar unpack(simdscalar &in) = delete; static simdscalar pack(simdscalar &in) = delete; #if ENABLE_AVX512_SIMD16 static simd16scalar loadSOA_16(const uint8_t *pSrc) = delete; - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) = delete; + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) = delete; static simd16scalar unpack(simd16scalar &in) = delete; static simd16scalar pack(simd16scalar &in) = delete; #endif @@ -58,12 +58,12 @@ struct PackTraits<0, false> static const uint32_t MyNumBits = 0; static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_setzero_ps(); } - static void storeSOA(uint8_t *pDst, simdscalar src) { return; } + static void storeSOA(uint8_t *pDst, simdscalar const &src) { return; } static simdscalar unpack(simdscalar &in) { return _simd_setzero_ps(); } static simdscalar pack(simdscalar &in) { return _simd_setzero_ps(); } #if ENABLE_AVX512_SIMD16 static simd16scalar loadSOA_16(const uint8_t *pSrc) { return _simd16_setzero_ps(); } - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) { return; } + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { return; } static simd16scalar unpack(simd16scalar &in) { return _simd16_setzero_ps(); } static simd16scalar pack(simd16scalar &in) { return _simd16_setzero_ps(); } #endif @@ -88,7 +88,7 @@ struct PackTraits<8, false> #endif } - static void storeSOA(uint8_t *pDst, simdscalar src) + static void storeSOA(uint8_t *pDst, simdscalar const &src) { // store simd bytes #if KNOB_SIMD_WIDTH == 8 @@ -144,7 +144,7 @@ struct PackTraits<8, false> return result; } - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { // store simd16 bytes _mm_store_ps(reinterpret_cast<float *>(pDst), _mm256_castps256_ps128(_simd16_extract_ps(src, 0))); @@ -203,7 +203,7 @@ struct PackTraits<8, true> #endif } - static void storeSOA(uint8_t *pDst, simdscalar src) + static void storeSOA(uint8_t *pDst, simdscalar const &src) { // store simd bytes #if KNOB_SIMD_WIDTH == 8 @@ -260,7 +260,7 @@ struct PackTraits<8, true> return result; } - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { // store simd16 bytes _mm_store_ps(reinterpret_cast<float *>(pDst), _mm256_castps256_ps128(_simd16_extract_ps(src, 0))); @@ -319,7 +319,7 @@ struct PackTraits<16, false> #endif } - static void storeSOA(uint8_t *pDst, simdscalar src) + static void storeSOA(uint8_t *pDst, simdscalar const &src) { #if KNOB_SIMD_WIDTH == 8 // store 16B (2B * 8) @@ -372,7 +372,7 @@ struct PackTraits<16, false> return result; } - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { _simd_store_ps(reinterpret_cast<float *>(pDst), _simd16_extract_ps(src, 0)); } @@ -417,7 +417,7 @@ struct PackTraits<16, true> #endif } - static void storeSOA(uint8_t *pDst, simdscalar src) + static void storeSOA(uint8_t *pDst, simdscalar const &src) { #if KNOB_SIMD_WIDTH == 8 // store 16B (2B * 8) @@ -471,7 +471,7 @@ struct PackTraits<16, true> return result; } - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { _simd_store_ps(reinterpret_cast<float *>(pDst), _simd16_extract_ps(src, 0)); } @@ -506,7 +506,7 @@ struct PackTraits<32, false> static const uint32_t MyNumBits = 32; static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_load_ps((const float*)pSrc); } - static void storeSOA(uint8_t *pDst, simdscalar src) { _simd_store_ps((float*)pDst, src); } + static void storeSOA(uint8_t *pDst, simdscalar const &src) { _simd_store_ps((float*)pDst, src); } static simdscalar unpack(simdscalar &in) { return in; } static simdscalar pack(simdscalar &in) { return in; } #if ENABLE_AVX512_SIMD16 @@ -516,7 +516,7 @@ struct PackTraits<32, false> return _simd16_load_ps(reinterpret_cast<const float *>(pSrc)); } - static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar src) + static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { _simd16_store_ps(reinterpret_cast<float *>(pDst), src); } @@ -814,7 +814,7 @@ static inline __m128 ConvertFloatToSRGB2(__m128& Src) #if ENABLE_AVX512_SIMD16 template< unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden > -inline static simd16scalar SIMDCALL fastpow(simd16scalar value) +inline static simd16scalar SIMDCALL fastpow(simd16scalar const &value) { static const float factor1 = exp2(127.0f * expden / expnum - 127.0f) * powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum); @@ -836,7 +836,7 @@ inline static simd16scalar SIMDCALL fastpow(simd16scalar value) return result; } -inline static simd16scalar SIMDCALL pow512_4(simd16scalar arg) +inline static simd16scalar SIMDCALL pow512_4(simd16scalar const &arg) { // 5/12 is too small, so compute the 4th root of 20/12 instead. // 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow. @@ -857,7 +857,7 @@ inline static simd16scalar SIMDCALL pow512_4(simd16scalar arg) return xavg; } -inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar base, float exp) +inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar &base, float exp) { const float *f = reinterpret_cast<const float *>(&base); @@ -1322,7 +1322,7 @@ struct ComponentTraits return TypeTraits<X, NumBitsX>::loadSOA(pSrc); } - INLINE static void storeSOA(uint32_t comp, uint8_t *pDst, simdscalar src) + INLINE static void storeSOA(uint32_t comp, uint8_t *pDst, simdscalar const &src) { switch (comp) { @@ -1340,41 +1340,48 @@ struct ComponentTraits return; } SWR_INVALID("Invalid component: %d", comp); - TypeTraits<X, NumBitsX>::storeSOA(pDst, src); } INLINE static simdscalar unpack(uint32_t comp, simdscalar &in) { + simdscalar out; switch (comp) { case 0: - return TypeTraits<X, NumBitsX>::unpack(in); + out = TypeTraits<X, NumBitsX>::unpack(in); break; case 1: - return TypeTraits<Y, NumBitsY>::unpack(in); + out = TypeTraits<Y, NumBitsY>::unpack(in); break; case 2: - return TypeTraits<Z, NumBitsZ>::unpack(in); + out = TypeTraits<Z, NumBitsZ>::unpack(in); break; case 3: - return TypeTraits<W, NumBitsW>::unpack(in); + out = TypeTraits<W, NumBitsW>::unpack(in); break; + default: + SWR_INVALID("Invalid component: %d", comp); + out = in; + break; } - SWR_INVALID("Invalid component: %d", comp); - return TypeTraits<X, NumBitsX>::unpack(in); + return out; } INLINE static simdscalar pack(uint32_t comp, simdscalar &in) { + simdscalar out; switch (comp) { case 0: - return TypeTraits<X, NumBitsX>::pack(in); + out = TypeTraits<X, NumBitsX>::pack(in); break; case 1: - return TypeTraits<Y, NumBitsY>::pack(in); + out = TypeTraits<Y, NumBitsY>::pack(in); break; case 2: - return TypeTraits<Z, NumBitsZ>::pack(in); + out = TypeTraits<Z, NumBitsZ>::pack(in); break; case 3: - return TypeTraits<W, NumBitsW>::pack(in); + out = TypeTraits<W, NumBitsW>::pack(in); break; + default: + SWR_INVALID("Invalid component: %d", comp); + out = in; + break; } - SWR_INVALID("Invalid component: %d", comp); - return TypeTraits<X, NumBitsX>::pack(in); + return out; } INLINE static simdscalar convertSrgb(uint32_t comp, simdscalar &in) @@ -1412,7 +1419,7 @@ struct ComponentTraits return TypeTraits<X, NumBitsX>::loadSOA_16(pSrc); } - INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t *pDst, simd16scalar src) + INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t *pDst, simd16scalar const &src) { switch (comp) { diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index daea0889237..406a0e0becd 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -770,7 +770,7 @@ static void GeometryShaderStage( #if USE_SIMD16_FRONTEND uint32_t numPrims_simd8, #endif - simdscalari primID) + simdscalari const &primID) { SWR_CONTEXT *pContext = pDC->pContext; @@ -1069,7 +1069,7 @@ static void TessellationStages( #if USE_SIMD16_FRONTEND uint32_t numPrims_simd8, #endif - simdscalari primID) + simdscalari const &primID) { SWR_CONTEXT *pContext = pDC->pContext; const API_STATE& state = GetApiState(pDC); diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index 3d7b26dd0f1..5cb2f87c158 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -275,7 +275,7 @@ void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices #endif template<uint32_t NumVerts> INLINE -void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari vViewportIdx) +void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari const &vViewportIdx) { // perform a gather of each matrix element based on the viewport array indexes simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4); @@ -296,7 +296,7 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, #if USE_SIMD16_FRONTEND template<uint32_t NumVerts> INLINE -void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simd16scalari vViewportIdx) +void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simd16scalari const &vViewportIdx) { // perform a gather of each matrix element based on the viewport array indexes const simd16scalar m00 = _simd16_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4); @@ -388,10 +388,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative); #endif struct PA_STATE_BASE; // forward decl -void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID); -void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID); +void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID); +void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID); #if USE_SIMD16_FRONTEND -void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID); -void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID); +void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID); +void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID); #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 9e639554a1b..2e52698a078 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -852,7 +852,7 @@ typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pC typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample, uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask); -typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar); +typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &); |