aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils/simd/simd_avx2
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2018-08-26 01:45:51 -0400
committerJack Lloyd <[email protected]>2019-11-14 12:26:12 -0500
commit1fb135fe711f081a72ace510c8dd8f8439e9950e (patch)
tree2cc1087bbc74ee4b3cdc9801bbbd8bfb5fc230d7 /src/lib/utils/simd/simd_avx2
parent292330e493547a49484b173bb14a674fee88ad2d (diff)
Add SHACAL2 AVX2
About 2x faster on Skylake
Diffstat (limited to 'src/lib/utils/simd/simd_avx2')
-rw-r--r--src/lib/utils/simd/simd_avx2/simd_avx2.h38
1 files changed, 37 insertions, 1 deletions
diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h
index 91fce86d6..3606bed8b 100644
--- a/src/lib/utils/simd/simd_avx2/simd_avx2.h
+++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h
@@ -105,6 +105,18 @@ class SIMD_8x32 final
return this->rotl<32-ROT>();
}
+ template<size_t ROT1, size_t ROT2, size_t ROT3>
+ SIMD_8x32 rho() const
+ {
+ SIMD_8x32 res;
+
+ const SIMD_8x32 rot1 = this->rotr<ROT1>();
+ const SIMD_8x32 rot2 = this->rotr<ROT2>();
+ const SIMD_8x32 rot3 = this->rotr<ROT3>();
+
+ return rot1 ^ rot2 ^ rot3;
+ }
+
SIMD_8x32 operator+(const SIMD_8x32& other) const
{
SIMD_8x32 retval(*this);
@@ -228,6 +240,21 @@ class SIMD_8x32 final
}
BOTAN_FUNC_ISA("avx2")
+ static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
+ SIMD_8x32& B2, SIMD_8x32& B3,
+ SIMD_8x32& B4, SIMD_8x32& B5,
+ SIMD_8x32& B6, SIMD_8x32& B7)
+ {
+ transpose(B0, B1, B2, B3);
+ transpose(B4, B5, B6, B7);
+
+ swap_tops(B0, B4);
+ swap_tops(B1, B5);
+ swap_tops(B2, B6);
+ swap_tops(B3, B7);
+ }
+
+ BOTAN_FUNC_ISA("avx2")
static void reset_registers()
{
_mm256_zeroupper();
@@ -241,10 +268,19 @@ class SIMD_8x32 final
__m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
+ BOTAN_FUNC_ISA("avx2")
+ SIMD_8x32(__m256i x) : m_avx2(x) {}
+
private:
BOTAN_FUNC_ISA("avx2")
- SIMD_8x32(__m256i x) : m_avx2(x) {}
+ static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
+ {
+ SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
+ SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
+ A = T0;
+ B = T1;
+ }
__m256i m_avx2;
};