diff options
author | Jack Lloyd <[email protected]> | 2017-08-13 08:38:26 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-08-13 08:38:26 -0400 |
commit | 8804eeece5af90b728e337c7e0877549ea74eec9 (patch) | |
tree | 558c692afe04770efc3077bba0f3247e99188231 /src/lib/utils/simd | |
parent | 61a7a5757bfe19ff1a686055ab75cae1fc9f485e (diff) |
Add SHACAL2 in generic SIMD
Bit over 2x faster on my desktop
Diffstat (limited to 'src/lib/utils/simd')
-rw-r--r-- | src/lib/utils/simd/simd_32.h | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h index 01b6a7ef0..def933f4a 100644 --- a/src/lib/utils/simd/simd_32.h +++ b/src/lib/utils/simd/simd_32.h @@ -282,6 +282,64 @@ class SIMD_4x32 final #endif } + + /* + Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3) + */ + SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const + { + SIMD_4x32 res; + +#if defined(BOTAN_SIMD_USE_SSE2) + + res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)), + _mm_srli_epi32(m_sse, static_cast<int>(rot1))); + res.m_sse = _mm_xor_si128( + res.m_sse, + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)), + _mm_srli_epi32(m_sse, static_cast<int>(rot2)))); + res.m_sse = _mm_xor_si128( + res.m_sse, + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)), + _mm_srli_epi32(m_sse, static_cast<int>(rot3)))); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + + const unsigned int r1 = static_cast<unsigned int>(32-rot1); + const unsigned int r2 = static_cast<unsigned int>(32-rot2); + const unsigned int r3 = static_cast<unsigned int>(32-rot3); + res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1}); + res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2})); + res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3})); + +#elif defined(BOTAN_SIMD_USE_NEON) + res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)), + vshrq_n_u32(m_neon, static_cast<int>(rot1))); + + res.m_neon = veorq_u32( + res.m_neon, + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)), + vshrq_n_u32(m_neon, static_cast<int>(rot2)))); + + res.m_neon = veorq_u32( + res.m_neon, + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)), + vshrq_n_u32(m_neon, static_cast<int>(rot3)))); + +#else + + for(size_t i = 0; i != 4; ++i) + { + res.m_scalar[i] = + Botan::rotate_right(m_scalar[i], rot1) ^ + Botan::rotate_right(m_scalar[i], rot2) ^ + Botan::rotate_right(m_scalar[i], rot3); + } +#endif + + return res; + } + /** * Rotate each element of SIMD register n bits left */ |