aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils/simd
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-08-13 08:38:26 -0400
committerJack Lloyd <[email protected]>2017-08-13 08:38:26 -0400
commit8804eeece5af90b728e337c7e0877549ea74eec9 (patch)
tree558c692afe04770efc3077bba0f3247e99188231 /src/lib/utils/simd
parent61a7a5757bfe19ff1a686055ab75cae1fc9f485e (diff)
Add SHACAL2 in generic SIMD
Bit over 2x faster on my desktop
Diffstat (limited to 'src/lib/utils/simd')
-rw-r--r--src/lib/utils/simd/simd_32.h58
1 files changed, 58 insertions, 0 deletions
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h
index 01b6a7ef0..def933f4a 100644
--- a/src/lib/utils/simd/simd_32.h
+++ b/src/lib/utils/simd/simd_32.h
@@ -282,6 +282,64 @@ class SIMD_4x32 final
#endif
}
+
+ /*
+ Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3)
+ */
+ SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const
+ {
+ SIMD_4x32 res;
+
+#if defined(BOTAN_SIMD_USE_SSE2)
+
+ res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)),
+ _mm_srli_epi32(m_sse, static_cast<int>(rot1)));
+ res.m_sse = _mm_xor_si128(
+ res.m_sse,
+ _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)),
+ _mm_srli_epi32(m_sse, static_cast<int>(rot2))));
+ res.m_sse = _mm_xor_si128(
+ res.m_sse,
+ _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)),
+ _mm_srli_epi32(m_sse, static_cast<int>(rot3))));
+
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+
+ const unsigned int r1 = static_cast<unsigned int>(32-rot1);
+ const unsigned int r2 = static_cast<unsigned int>(32-rot2);
+ const unsigned int r3 = static_cast<unsigned int>(32-rot3);
+ res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1});
+ res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2}));
+ res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3}));
+
+#elif defined(BOTAN_SIMD_USE_NEON)
+ res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)),
+ vshrq_n_u32(m_neon, static_cast<int>(rot1)));
+
+ res.m_neon = veorq_u32(
+ res.m_neon,
+ vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)),
+ vshrq_n_u32(m_neon, static_cast<int>(rot2))));
+
+ res.m_neon = veorq_u32(
+ res.m_neon,
+ vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)),
+ vshrq_n_u32(m_neon, static_cast<int>(rot3))));
+
+#else
+
+ for(size_t i = 0; i != 4; ++i)
+ {
+ res.m_scalar[i] =
+ Botan::rotate_right(m_scalar[i], rot1) ^
+ Botan::rotate_right(m_scalar[i], rot2) ^
+ Botan::rotate_right(m_scalar[i], rot3);
+ }
+#endif
+
+ return res;
+ }
+
/**
* Rotate each element of SIMD register n bits left
*/