diff options
author | Chris Robinson <[email protected]> | 2019-10-02 22:38:19 -0700 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2019-10-02 22:38:19 -0700 |
commit | b350ae3766f0f85183c410b4c77ac9a0eb388511 (patch) | |
tree | 49b70ba082ac4daba2b8404638386b07d3d87354 /alc/mixer | |
parent | bce6889173245031cf1440d70858180cc2716adf (diff) |
Remove the Offset parameter from ApplyCoeffs
Diffstat (limited to 'alc/mixer')
-rw-r--r-- | alc/mixer/hrtfbase.h | 18 | ||||
-rw-r--r-- | alc/mixer/mixer_c.cpp | 24 | ||||
-rw-r--r-- | alc/mixer/mixer_neon.cpp | 50 | ||||
-rw-r--r-- | alc/mixer/mixer_sse.cpp | 93 |
4 files changed, 97 insertions, 88 deletions
diff --git a/alc/mixer/hrtfbase.h b/alc/mixer/hrtfbase.h index 58d168ed..cbc885c5 100644 --- a/alc/mixer/hrtfbase.h +++ b/alc/mixer/hrtfbase.h @@ -9,10 +9,10 @@ #include "voice.h" -using ApplyCoeffsT = void(size_t Offset, float2 *RESTRICT Values, const ALuint irSize, - const HrirArray &Coeffs, const float left, const float right); +using ApplyCoeffsT = void(&)(float2 *RESTRICT Values, const ALuint irSize, const HrirArray &Coeffs, + const float left, const float right); -template<ApplyCoeffsT &ApplyCoeffs> +template<ApplyCoeffsT ApplyCoeffs> inline void MixHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float *InSamples, float2 *RESTRICT AccumSamples, const size_t OutPos, const ALuint IrSize, MixHrtfFilter *hrtfparams, const size_t BufferSize) @@ -33,7 +33,7 @@ inline void MixHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float g{gain + gainstep*stepcount}; const float left{InSamples[Delay[0]++] * g}; const float right{InSamples[Delay[1]++] * g}; - ApplyCoeffs(i, AccumSamples+i, IrSize, Coeffs, left, right); + ApplyCoeffs(AccumSamples+i, IrSize, Coeffs, left, right); stepcount += 1.0f; } @@ -46,7 +46,7 @@ inline void MixHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, hrtfparams->Gain = gain + gainstep*stepcount; } -template<ApplyCoeffsT &ApplyCoeffs> +template<ApplyCoeffsT ApplyCoeffs> inline void MixHrtfBlendBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float *InSamples, float2 *RESTRICT AccumSamples, const size_t OutPos, const ALuint IrSize, const HrtfFilter *oldparams, MixHrtfFilter *newparams, @@ -70,7 +70,7 @@ inline void MixHrtfBlendBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut const float g{oldGain + oldGainStep*stepcount}; const float left{InSamples[Delay[0]++] * g}; const float right{InSamples[Delay[1]++] * g}; - ApplyCoeffs(i, AccumSamples+i, IrSize, OldCoeffs, left, right); + ApplyCoeffs(AccumSamples+i, IrSize, OldCoeffs, left, right); stepcount += 1.0f; } @@ -84,7 +84,7 @@ inline void MixHrtfBlendBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut const float g{newGainStep*stepcount}; const float left{InSamples[Delay[0]++] * g}; const float right{InSamples[Delay[1]++] * g}; - ApplyCoeffs(i, AccumSamples+i, IrSize, NewCoeffs, left, right); + ApplyCoeffs(AccumSamples+i, IrSize, NewCoeffs, left, right); stepcount += 1.0f; } @@ -97,7 +97,7 @@ inline void MixHrtfBlendBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut newparams->Gain = newGainStep*stepcount; } -template<ApplyCoeffsT &ApplyCoeffs> +template<ApplyCoeffsT ApplyCoeffs> inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const al::span<const FloatBufferLine> InSamples, float2 *RESTRICT AccumSamples, DirectHrtfState *State, const size_t BufferSize) @@ -116,7 +116,7 @@ inline void MixDirectHrtfBase(FloatBufferLine &LeftOut, FloatBufferLine &RightOu for(size_t i{0u};i < BufferSize;++i) { const float insample{input[i]}; - ApplyCoeffs(i, AccumSamples+i, IrSize, Coeffs, insample, insample); + ApplyCoeffs(AccumSamples+i, IrSize, Coeffs, insample, insample); } } for(size_t i{0u};i < BufferSize;++i) diff --git a/alc/mixer/mixer_c.cpp b/alc/mixer/mixer_c.cpp index 8aa32e6b..ffc07acd 100644 --- a/alc/mixer/mixer_c.cpp +++ b/alc/mixer/mixer_c.cpp @@ -68,7 +68,7 @@ const float *DoResample(const InterpState *state, const float *RESTRICT src, ALu ALuint increment, const al::span<float> dst) { const InterpState istate{*state}; - auto proc_sample = [&src,&frac,istate,increment]() -> ALfloat + auto proc_sample = [&src,&frac,istate,increment]() -> float { const float ret{Sampler(istate, src, frac)}; @@ -83,6 +83,17 @@ const float *DoResample(const InterpState *state, const float *RESTRICT src, ALu return dst.begin(); } +inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs, + const float left, const float right) +{ + ASSUME(IrSize >= 4); + for(ALuint c{0};c < IrSize;++c) + { + Values[c][0] += Coeffs[c][0] * left; + Values[c][1] += Coeffs[c][1] * right; + } +} + } // namespace template<> @@ -124,17 +135,6 @@ const ALfloat *Resample_<FastBSincTag,CTag>(const InterpState *state, const ALfl { return DoResample<do_fastbsinc>(state, src-state->bsinc.l, frac, increment, dst); } -static inline void ApplyCoeffs(size_t /*Offset*/, float2 *RESTRICT Values, const ALuint IrSize, - const HrirArray &Coeffs, const float left, const float right) -{ - ASSUME(IrSize >= 4); - for(ALuint c{0};c < IrSize;++c) - { - Values[c][0] += Coeffs[c][0] * left; - Values[c][1] += Coeffs[c][1] * right; - } -} - template<> void MixHrtf_<CTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float *InSamples, float2 *AccumSamples, const size_t OutPos, const ALuint IrSize, diff --git a/alc/mixer/mixer_neon.cpp b/alc/mixer/mixer_neon.cpp index c0fc1651..ae782897 100644 --- a/alc/mixer/mixer_neon.cpp +++ b/alc/mixer/mixer_neon.cpp @@ -13,6 +13,32 @@ #include "hrtfbase.h" +namespace { + +inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs, + const float left, const float right) +{ + float32x4_t leftright4; + { + float32x2_t leftright2 = vdup_n_f32(0.0); + leftright2 = vset_lane_f32(left, leftright2, 0); + leftright2 = vset_lane_f32(right, leftright2, 1); + leftright4 = vcombine_f32(leftright2, leftright2); + } + + ASSUME(IrSize >= 4); + for(ALuint c{0};c < IrSize;c += 2) + { + float32x4_t vals = vld1q_f32(&Values[c][0]); + float32x4_t coefs = vld1q_f32(&Coeffs[c][0]); + + vals = vmlaq_f32(vals, coefs, leftright4); + + vst1q_f32(&Values[c][0], vals); + } +} + +} // namespace template<> const ALfloat *Resample_<LerpTag,NEONTag>(const InterpState*, const ALfloat *RESTRICT src, @@ -163,30 +189,6 @@ const ALfloat *Resample_<FastBSincTag,NEONTag>(const InterpState *state, } -static inline void ApplyCoeffs(size_t /*Offset*/, float2 *RESTRICT Values, const ALuint IrSize, - const HrirArray &Coeffs, const float left, const float right) -{ - ASSUME(IrSize >= 4); - - float32x4_t leftright4; - { - float32x2_t leftright2 = vdup_n_f32(0.0); - leftright2 = vset_lane_f32(left, leftright2, 0); - leftright2 = vset_lane_f32(right, leftright2, 1); - leftright4 = vcombine_f32(leftright2, leftright2); - } - - for(ALuint c{0};c < IrSize;c += 2) - { - float32x4_t vals = vld1q_f32(&Values[c][0]); - float32x4_t coefs = vld1q_f32(&Coeffs[c][0]); - - vals = vmlaq_f32(vals, coefs, leftright4); - - vst1q_f32(&Values[c][0], vals); - } -} - template<> void MixHrtf_<NEONTag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float *InSamples, float2 *AccumSamples, const size_t OutPos, const ALuint IrSize, diff --git a/alc/mixer/mixer_sse.cpp b/alc/mixer/mixer_sse.cpp index 0fb954aa..62ce5eab 100644 --- a/alc/mixer/mixer_sse.cpp +++ b/alc/mixer/mixer_sse.cpp @@ -13,6 +13,56 @@ #include "hrtfbase.h" +namespace { + +inline void ApplyCoeffs(float2 *RESTRICT Values, const ALuint IrSize, const HrirArray &Coeffs, + const float left, const float right) +{ + const __m128 lrlr{_mm_setr_ps(left, right, left, right)}; + + ASSUME(IrSize >= 4); + /* This isn't technically correct to test alignment, but it's true for + * systems that support SSE, which is the only one that needs to know the + * alignment of Values (which alternates between 8- and 16-byte aligned). + */ + if(reinterpret_cast<intptr_t>(Values)&0x8) + { + __m128 imp0, imp1; + __m128 coeffs{_mm_load_ps(&Coeffs[0][0])}; + __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[0][0]))}; + imp0 = _mm_mul_ps(lrlr, coeffs); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi(reinterpret_cast<__m64*>(&Values[0][0]), vals); + ALuint i{1}; + for(;i < IrSize-1;i += 2) + { + coeffs = _mm_load_ps(&Coeffs[i+1][0]); + vals = _mm_load_ps(&Values[i][0]); + imp1 = _mm_mul_ps(lrlr, coeffs); + imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); + vals = _mm_add_ps(imp0, vals); + _mm_store_ps(&Values[i][0], vals); + imp0 = imp1; + } + vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[i][0])); + imp0 = _mm_movehl_ps(imp0, imp0); + vals = _mm_add_ps(imp0, vals); + _mm_storel_pi(reinterpret_cast<__m64*>(&Values[i][0]), vals); + } + else + { + for(ALuint i{0};i < IrSize;i += 2) + { + __m128 coeffs{_mm_load_ps(&Coeffs[i][0])}; + __m128 vals{_mm_load_ps(&Values[i][0])}; + vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); + _mm_store_ps(&Values[i][0], vals); + } + } +} + +} // namespace + template<> const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloat *RESTRICT src, ALuint frac, ALuint increment, const al::span<float> dst) @@ -114,49 +164,6 @@ const ALfloat *Resample_<FastBSincTag,SSETag>(const InterpState *state, } -static inline void ApplyCoeffs(size_t Offset, float2 *RESTRICT Values, const ALuint IrSize, - const HrirArray &Coeffs, const float left, const float right) -{ - const __m128 lrlr{_mm_setr_ps(left, right, left, right)}; - - ASSUME(IrSize >= 4); - - if((Offset&1)) - { - __m128 imp0, imp1; - __m128 coeffs{_mm_load_ps(&Coeffs[0][0])}; - __m128 vals{_mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64*>(&Values[0][0]))}; - imp0 = _mm_mul_ps(lrlr, coeffs); - vals = _mm_add_ps(imp0, vals); - _mm_storel_pi(reinterpret_cast<__m64*>(&Values[0][0]), vals); - ALuint i{1}; - for(;i < IrSize-1;i += 2) - { - coeffs = _mm_load_ps(&Coeffs[i+1][0]); - vals = _mm_load_ps(&Values[i][0]); - imp1 = _mm_mul_ps(lrlr, coeffs); - imp0 = _mm_shuffle_ps(imp0, imp1, _MM_SHUFFLE(1, 0, 3, 2)); - vals = _mm_add_ps(imp0, vals); - _mm_store_ps(&Values[i][0], vals); - imp0 = imp1; - } - vals = _mm_loadl_pi(vals, reinterpret_cast<__m64*>(&Values[i][0])); - imp0 = _mm_movehl_ps(imp0, imp0); - vals = _mm_add_ps(imp0, vals); - _mm_storel_pi(reinterpret_cast<__m64*>(&Values[i][0]), vals); - } - else - { - for(ALuint i{0};i < IrSize;i += 2) - { - __m128 coeffs{_mm_load_ps(&Coeffs[i][0])}; - __m128 vals{_mm_load_ps(&Values[i][0])}; - vals = _mm_add_ps(vals, _mm_mul_ps(lrlr, coeffs)); - _mm_store_ps(&Values[i][0], vals); - } - } -} - template<> void MixHrtf_<SSETag>(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const float *InSamples, float2 *AccumSamples, const size_t OutPos, const ALuint IrSize, |