Fix SHACAL2 AVX2 inlining

If compiled without -mavx2 (eg in amalgamation) things would not be inlined correctly
author: Jack Lloyd <[email protected]> 2020-03-03 08:30:34 -0500
committer: Jack Lloyd <[email protected]> 2020-03-03 08:30:34 -0500
commit: 06fecb0da49bbddc188136278442cda3db7b7269 (patch)
tree: 151c70f5384078472172abed39398081845aac74
parent: 71cd69c8ecb4b031f20662a3451d4e70e43aaa1b (diff)
2 files changed, 22 insertions, 13 deletions
diff --git a/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp b/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp
index ae8cdc327..a465a3828 100644
--- a/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp
+++ b/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp
@@ -11,20 +11,20 @@ namespace Botan {
 
 namespace {
 
-inline
-void SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
-                 const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
-                 uint32_t RK)
+void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2")
+   SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
+               const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
+               uint32_t RK)
    {
    H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK);
    D += H;
    H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
    }
 
-inline
-void SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
-                 const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
-                 uint32_t RK)
+void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2")
+   SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
+               const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
+               uint32_t RK)
    {
    H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
    D -= H;
@@ -33,7 +33,7 @@ void SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIM
 
 }
 
-void SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const
+void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const
    {
    SIMD_8x32::reset_registers();
 
@@ -76,7 +76,7 @@ void SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const
    SIMD_8x32::zero_registers();
    }
 
-void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const
+BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const
    {
    SIMD_8x32::reset_registers();
 
diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h
index 3606bed8b..3498c2ad0 100644
--- a/src/lib/utils/simd/simd_avx2/simd_avx2.h
+++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h
@@ -23,7 +23,7 @@ class SIMD_8x32 final
       SIMD_8x32(SIMD_8x32&& other) = default;
 
       BOTAN_FUNC_ISA("avx2")
-      SIMD_8x32()
+      BOTAN_FORCE_INLINE SIMD_8x32()
          {
          m_avx2 = _mm256_setzero_si256();
          }
@@ -77,6 +77,9 @@ class SIMD_8x32 final
          {
          static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
 
+#if defined(__AVX512VL__)
+         return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
+#else
          BOTAN_IF_CONSTEXPR(ROT == 8)
             {
             const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
@@ -96,6 +99,7 @@ class SIMD_8x32 final
             return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
                                              _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
             }
+#endif
          }
 
       template<size_t ROT>
@@ -106,7 +110,7 @@ class SIMD_8x32 final
          }
 
       template<size_t ROT1, size_t ROT2, size_t ROT3>
-      SIMD_8x32 rho() const
+      SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
          {
          SIMD_8x32 res;
 
@@ -117,6 +121,7 @@ class SIMD_8x32 final
          return rot1 ^ rot2 ^ rot3;
          }
 
+      BOTAN_FUNC_ISA("avx2")
       SIMD_8x32 operator+(const SIMD_8x32& other) const
          {
          SIMD_8x32 retval(*this);
@@ -124,6 +129,7 @@ class SIMD_8x32 final
          return retval;
          }
 
+      BOTAN_FUNC_ISA("avx2")
       SIMD_8x32 operator-(const SIMD_8x32& other) const
          {
          SIMD_8x32 retval(*this);
@@ -131,6 +137,7 @@ class SIMD_8x32 final
          return retval;
          }
 
+      BOTAN_FUNC_ISA("avx2")
       SIMD_8x32 operator^(const SIMD_8x32& other) const
          {
          SIMD_8x32 retval(*this);
@@ -138,6 +145,7 @@ class SIMD_8x32 final
          return retval;
          }
 
+      BOTAN_FUNC_ISA("avx2")
       SIMD_8x32 operator|(const SIMD_8x32& other) const
          {
          SIMD_8x32 retval(*this);
@@ -145,6 +153,7 @@ class SIMD_8x32 final
          return retval;
          }
 
+      BOTAN_FUNC_ISA("avx2")
       SIMD_8x32 operator&(const SIMD_8x32& other) const
          {
          SIMD_8x32 retval(*this);
@@ -187,7 +196,7 @@ class SIMD_8x32 final
          return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
          }
 
-      template<int SHIFT> BOTAN_FUNC_ISA("avx2")SIMD_8x32 shr() const
+      template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
          {
          return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
          }
author	Jack Lloyd <[email protected]>	2020-03-03 08:30:34 -0500
committer	Jack Lloyd <[email protected]>	2020-03-03 08:30:34 -0500
commit	06fecb0da49bbddc188136278442cda3db7b7269 (patch)
tree	151c70f5384078472172abed39398081845aac74
parent	71cd69c8ecb4b031f20662a3451d4e70e43aaa1b (diff)