diff options
author | Jack Lloyd <[email protected]> | 2018-08-26 01:45:51 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2019-11-14 12:26:12 -0500 |
commit | 1fb135fe711f081a72ace510c8dd8f8439e9950e (patch) | |
tree | 2cc1087bbc74ee4b3cdc9801bbbd8bfb5fc230d7 /src/lib/utils/simd/simd_avx2 | |
parent | 292330e493547a49484b173bb14a674fee88ad2d (diff) |
Add SHACAL2 AVX2
About 2x faster on Skylake
Diffstat (limited to 'src/lib/utils/simd/simd_avx2')
-rw-r--r-- | src/lib/utils/simd/simd_avx2/simd_avx2.h | 38 |
1 files changed, 37 insertions, 1 deletions
diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h index 91fce86d6..3606bed8b 100644 --- a/src/lib/utils/simd/simd_avx2/simd_avx2.h +++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h @@ -105,6 +105,18 @@ class SIMD_8x32 final return this->rotl<32-ROT>(); } + template<size_t ROT1, size_t ROT2, size_t ROT3> + SIMD_8x32 rho() const + { + SIMD_8x32 res; + + const SIMD_8x32 rot1 = this->rotr<ROT1>(); + const SIMD_8x32 rot2 = this->rotr<ROT2>(); + const SIMD_8x32 rot3 = this->rotr<ROT3>(); + + return rot1 ^ rot2 ^ rot3; + } + SIMD_8x32 operator+(const SIMD_8x32& other) const { SIMD_8x32 retval(*this); @@ -228,6 +240,21 @@ class SIMD_8x32 final } BOTAN_FUNC_ISA("avx2") + static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, + SIMD_8x32& B2, SIMD_8x32& B3, + SIMD_8x32& B4, SIMD_8x32& B5, + SIMD_8x32& B6, SIMD_8x32& B7) + { + transpose(B0, B1, B2, B3); + transpose(B4, B5, B6, B7); + + swap_tops(B0, B4); + swap_tops(B1, B5); + swap_tops(B2, B6); + swap_tops(B3, B7); + } + + BOTAN_FUNC_ISA("avx2") static void reset_registers() { _mm256_zeroupper(); @@ -241,10 +268,19 @@ class SIMD_8x32 final __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; } + BOTAN_FUNC_ISA("avx2") + SIMD_8x32(__m256i x) : m_avx2(x) {} + private: BOTAN_FUNC_ISA("avx2") - SIMD_8x32(__m256i x) : m_avx2(x) {} + static void swap_tops(SIMD_8x32& A, SIMD_8x32& B) + { + SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4)); + SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4)); + A = T0; + B = T1; + } __m256i m_avx2; }; |