Move AVX2 wrapper to utils

author: Jack Lloyd <[email protected]> 2018-08-24 16:36:46 -0400
committer: Jack Lloyd <[email protected]> 2018-08-24 17:08:39 -0400
commit: 10e6b25691312c97219261a6888cd9d880520a64 (patch)
tree: bdfd6b817a4d3c3a07f11d6852e425ab40e6982f /src
parent: 9b1fb9ddf8a538517e5fc6f972e82d596813055b (diff)
4 files changed, 218 insertions, 259 deletions
diff --git a/src/lib/block/serpent/serpent_avx2/info.txt b/src/lib/block/serpent/serpent_avx2/info.txt
index 8c1ac19ef..8225e63a3 100644
--- a/src/lib/block/serpent/serpent_avx2/info.txt
+++ b/src/lib/block/serpent/serpent_avx2/info.txt
@@ -4,9 +4,6 @@ SERPENT_AVX2 -> 20180824
 
 need_isa avx2
 
-<cc>
-gcc
-clang
-msvc
-icc
-</cc>
+<requires>
+simd_avx2
+</requires>
diff --git a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
index 846dd7fae..4e4420d58 100644
--- a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
+++ b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
@@ -6,262 +6,10 @@
 
 #include <botan/serpent.h>
 #include <botan/internal/serpent_sbox.h>
-#include <immintrin.h>
+#include <botan/internal/simd_avx2.h>
 
 namespace Botan {
 
-namespace {
-
-class SIMD_8x32 final
-   {
-   public:
-
-      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
-      SIMD_8x32(const SIMD_8x32& other) = default;
-
-#if !defined(BOTAN_BUILD_COMPILER_IS_MSVC_2013)
-      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
-      SIMD_8x32(SIMD_8x32&& other) = default;
-#endif
-
-      SIMD_8x32()
-         {
-         m_avx2 = _mm256_setzero_si256();
-         }
-
-      /**
-      * Load SIMD register with 8 32-bit elements
-      */
-      explicit SIMD_8x32(const uint32_t B[8])
-         {
-         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
-         }
-
-      /**
-      * Load SIMD register with one 32-bit element repeated
-      */
-      static SIMD_8x32 splat(uint32_t B)
-         {
-         return SIMD_8x32(_mm256_set1_epi32(B));
-         }
-
-      /**
-      * Load a SIMD register with little-endian convention
-      */
-      static SIMD_8x32 load_le(const uint8_t* in)
-         {
-         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
-         }
-
-      /**
-      * Load a SIMD register with big-endian convention
-      */
-      static SIMD_8x32 load_be(const uint8_t* in)
-         {
-         return load_le(in).bswap();
-         }
-
-      /**
-      * Load a SIMD register with little-endian convention
-      */
-      void store_le(uint8_t out[]) const
-         {
-         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
-         }
-
-      /**
-      * Load a SIMD register with big-endian convention
-      */
-      void store_be(uint8_t out[]) const
-         {
-         bswap().store_le(out);
-         }
-
-      /**
-      * Left rotation by a compile time constant
-      */
-      template<size_t ROT>
-      SIMD_8x32 rotl() const
-         {
-         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
-
-         return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
-                                          _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
-         }
-
-      /**
-      * Right rotation by a compile time constant
-      */
-      template<size_t ROT>
-      SIMD_8x32 rotr() const
-         {
-         return this->rotl<32-ROT>();
-         }
-
-      /**
-      * Add elements of a SIMD vector
-      */
-      SIMD_8x32 operator+(const SIMD_8x32& other) const
-         {
-         SIMD_8x32 retval(*this);
-         retval += other;
-         return retval;
-         }
-
-      /**
-      * Subtract elements of a SIMD vector
-      */
-      SIMD_8x32 operator-(const SIMD_8x32& other) const
-         {
-         SIMD_8x32 retval(*this);
-         retval -= other;
-         return retval;
-         }
-
-      /**
-      * XOR elements of a SIMD vector
-      */
-      SIMD_8x32 operator^(const SIMD_8x32& other) const
-         {
-         SIMD_8x32 retval(*this);
-         retval ^= other;
-         return retval;
-         }
-
-      /**
-      * Binary OR elements of a SIMD vector
-      */
-      SIMD_8x32 operator|(const SIMD_8x32& other) const
-         {
-         SIMD_8x32 retval(*this);
-         retval |= other;
-         return retval;
-         }
-
-      /**
-      * Binary AND elements of a SIMD vector
-      */
-      SIMD_8x32 operator&(const SIMD_8x32& other) const
-         {
-         SIMD_8x32 retval(*this);
-         retval &= other;
-         return retval;
-         }
-
-      void operator+=(const SIMD_8x32& other)
-         {
-         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
-         }
-
-      void operator-=(const SIMD_8x32& other)
-         {
-         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
-         }
-
-      void operator^=(const SIMD_8x32& other)
-         {
-         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
-         }
-
-      void operator|=(const SIMD_8x32& other)
-         {
-         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
-         }
-
-      void operator&=(const SIMD_8x32& other)
-         {
-         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
-         }
-
-      template<int SHIFT> SIMD_8x32 shl() const
-         {
-         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
-         }
-
-      template<int SHIFT> SIMD_8x32 shr() const
-         {
-         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
-         }
-
-      SIMD_8x32 operator~() const
-         {
-         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
-         }
-
-      // (~reg) & other
-      SIMD_8x32 andc(const SIMD_8x32& other) const
-         {
-         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
-         }
-
-      /**
-      * Return copy *this with each word byte swapped
-      */
-      SIMD_8x32 bswap() const
-         {
-         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
-                                          7, 6, 5, 4,
-                                          11, 10, 9, 8,
-                                          15, 14, 13, 12,
-                                          19, 18, 17, 16,
-                                          23, 22, 21, 20,
-                                          27, 26, 25, 24,
-                                          31, 30, 29, 28 };
-
-         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
-
-         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
-
-         return SIMD_8x32(output);
-         }
-
-      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
-                            SIMD_8x32& B2, SIMD_8x32& B3)
-         {
-         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
-         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
-         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
-         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
-
-         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
-         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
-         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
-         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
-         }
-
-      static void transpose_out(SIMD_8x32& B0, SIMD_8x32& B1,
-                                SIMD_8x32& B2, SIMD_8x32& B3)
-         {
-         /*
-         SIMD_4x32::transpose(B0.m_lo, B1.m_lo, B2.m_lo, B3.m_lo);
-         SIMD_4x32::transpose(B0.m_hi, B1.m_hi, B2.m_hi, B3.m_hi);
-         SIMD_8x32 T0 = SIMD_8x32(B0.m_lo, B1.m_lo);
-         SIMD_8x32 T1 = SIMD_8x32(B2.m_lo, B3.m_lo);
-         SIMD_8x32 T2 = SIMD_8x32(B0.m_hi, B1.m_hi);
-         SIMD_8x32 T3 = SIMD_8x32(B2.m_hi, B3.m_hi);
-
-         B0 = T0;
-         B1 = T1;
-         B2 = T2;
-         B3 = T3;
-         */
-         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
-         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
-         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
-         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
-
-         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
-         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
-         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
-         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
-         }
-   private:
-      SIMD_8x32(__m256i x) : m_avx2(x) {}
-
-      __m256i m_avx2;
-   };
-
-}
 
 #define key_xor(round, B0, B1, B2, B3)                             \
    do {                                                            \
diff --git a/src/lib/utils/simd/simd_avx2/info.txt b/src/lib/utils/simd/simd_avx2/info.txt
new file mode 100644
index 000000000..e3d043a12
--- /dev/null
+++ b/src/lib/utils/simd/simd_avx2/info.txt
@@ -0,0 +1,16 @@
+<defines>
+SIMD_AVX2 -> 20180824
+</defines>
+
+need_isa avx2
+
+<header:internal>
+simd_avx2.h
+</header:internal>
+
+<cc>
+gcc
+clang
+msvc
+icc
+</cc>
diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h
new file mode 100644
index 000000000..19f930854
--- /dev/null
+++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h
@@ -0,0 +1,198 @@
+/*
+* (C) 2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SIMD_AVX2_H_
+#define BOTAN_SIMD_AVX2_H_
+
+#include <botan/types.h>
+#include <immintrin.h>
+
+namespace Botan {
+
+class SIMD_8x32 final
+   {
+   public:
+
+      SIMD_8x32& operator=(const SIMD_8x32& other) = default;
+      SIMD_8x32(const SIMD_8x32& other) = default;
+
+#if !defined(BOTAN_BUILD_COMPILER_IS_MSVC_2013)
+      SIMD_8x32& operator=(SIMD_8x32&& other) = default;
+      SIMD_8x32(SIMD_8x32&& other) = default;
+#endif
+
+      SIMD_8x32()
+         {
+         m_avx2 = _mm256_setzero_si256();
+         }
+
+      explicit SIMD_8x32(const uint32_t B[8])
+         {
+         m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
+         }
+
+      static SIMD_8x32 splat(uint32_t B)
+         {
+         return SIMD_8x32(_mm256_set1_epi32(B));
+         }
+
+      static SIMD_8x32 load_le(const uint8_t* in)
+         {
+         return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
+         }
+
+      static SIMD_8x32 load_be(const uint8_t* in)
+         {
+         return load_le(in).bswap();
+         }
+
+      void store_le(uint8_t out[]) const
+         {
+         _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
+         }
+
+      void store_be(uint8_t out[]) const
+         {
+         bswap().store_le(out);
+         }
+
+      template<size_t ROT>
+      SIMD_8x32 rotl() const
+         {
+         static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
+
+         return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
+                                          _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
+         }
+
+      template<size_t ROT>
+      SIMD_8x32 rotr() const
+         {
+         return this->rotl<32-ROT>();
+         }
+
+      SIMD_8x32 operator+(const SIMD_8x32& other) const
+         {
+         SIMD_8x32 retval(*this);
+         retval += other;
+         return retval;
+         }
+
+      SIMD_8x32 operator-(const SIMD_8x32& other) const
+         {
+         SIMD_8x32 retval(*this);
+         retval -= other;
+         return retval;
+         }
+
+      SIMD_8x32 operator^(const SIMD_8x32& other) const
+         {
+         SIMD_8x32 retval(*this);
+         retval ^= other;
+         return retval;
+         }
+
+      SIMD_8x32 operator|(const SIMD_8x32& other) const
+         {
+         SIMD_8x32 retval(*this);
+         retval |= other;
+         return retval;
+         }
+
+      SIMD_8x32 operator&(const SIMD_8x32& other) const
+         {
+         SIMD_8x32 retval(*this);
+         retval &= other;
+         return retval;
+         }
+
+      void operator+=(const SIMD_8x32& other)
+         {
+         m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
+         }
+
+      void operator-=(const SIMD_8x32& other)
+         {
+         m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
+         }
+
+      void operator^=(const SIMD_8x32& other)
+         {
+         m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
+         }
+
+      void operator|=(const SIMD_8x32& other)
+         {
+         m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
+         }
+
+      void operator&=(const SIMD_8x32& other)
+         {
+         m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
+         }
+
+      template<int SHIFT> SIMD_8x32 shl() const
+         {
+         return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
+         }
+
+      template<int SHIFT> SIMD_8x32 shr() const
+         {
+         return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
+         }
+
+      SIMD_8x32 operator~() const
+         {
+         return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
+         }
+
+      // (~reg) & other
+      SIMD_8x32 andc(const SIMD_8x32& other) const
+         {
+         return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
+         }
+
+      SIMD_8x32 bswap() const
+         {
+         const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
+                                          7, 6, 5, 4,
+                                          11, 10, 9, 8,
+                                          15, 14, 13, 12,
+                                          19, 18, 17, 16,
+                                          23, 22, 21, 20,
+                                          27, 26, 25, 24,
+                                          31, 30, 29, 28 };
+
+         const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
+
+         const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
+
+         return SIMD_8x32(output);
+         }
+
+      static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
+                            SIMD_8x32& B2, SIMD_8x32& B3)
+         {
+         const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
+         const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
+         const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
+         const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
+
+         B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
+         B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
+         B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
+         B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
+         }
+
+   private:
+      SIMD_8x32(__m256i x) : m_avx2(x) {}
+
+      __m256i m_avx2;
+   };
+
+}
+
+#endif
author	Jack Lloyd <[email protected]>	2018-08-24 16:36:46 -0400
committer	Jack Lloyd <[email protected]>	2018-08-24 17:08:39 -0400
commit	10e6b25691312c97219261a6888cd9d880520a64 (patch)
tree	bdfd6b817a4d3c3a07f11d6852e425ab40e6982f /src
parent	9b1fb9ddf8a538517e5fc6f972e82d596813055b (diff)