aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2022-02-12 09:55:39 -0500
committerJack Lloyd <[email protected]>2022-02-12 09:55:39 -0500
commit04daa2acae41c4f3922aa35ccb83e7bdf13baaea (patch)
tree9e2c82a6e30f49ec8966fca2d21b6aac683bb4ff
parent96d5b36605f42ff110add2f75cb14e918022168f (diff)
Remove macro usage from Serpent
Also consolidate the implementation of the linear operations. Interestingly, this change allows GCC 11 to auto-vectorize the baseline version on its own.
-rw-r--r--src/lib/block/serpent/serpent.cpp53
-rw-r--r--src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp57
-rw-r--r--src/lib/block/serpent/serpent_sbox.h69
-rw-r--r--src/lib/block/serpent/serpent_simd/serpent_simd.cpp53
-rw-r--r--src/lib/utils/simd/simd_32.h24
-rw-r--r--src/lib/utils/simd/simd_avx2/simd_avx2.h25
6 files changed, 146 insertions, 135 deletions
diff --git a/src/lib/block/serpent/serpent.cpp b/src/lib/block/serpent/serpent.cpp
index 7c4384d3d..bfdfea7c7 100644
--- a/src/lib/block/serpent/serpent.cpp
+++ b/src/lib/block/serpent/serpent.cpp
@@ -16,48 +16,13 @@
namespace Botan {
-namespace {
-
-/*
-* Serpent's Linear Transform
-*/
-inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
- {
- B0 = rotl<13>(B0); B2 = rotl<3>(B2);
- B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3);
- B1 = rotl<1>(B1); B3 = rotl<7>(B3);
- B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7);
- B0 = rotl<5>(B0); B2 = rotl<22>(B2);
- }
-
-/*
-* Serpent's Inverse Linear Transform
-*/
-inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
- {
- B2 = rotr<22>(B2); B0 = rotr<5>(B0);
- B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3;
- B3 = rotr<7>(B3); B1 = rotr<1>(B1);
- B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2;
- B2 = rotr<3>(B2); B0 = rotr<13>(B0);
- }
-
-}
-
-/*
-* XOR a key block with a data block
-*/
-#define key_xor(round, B0, B1, B2, B3) \
- B0 ^= m_round_key[4*round ]; \
- B1 ^= m_round_key[4*round+1]; \
- B2 ^= m_round_key[4*round+2]; \
- B3 ^= m_round_key[4*round+3];
-
/*
* Serpent Encryption
*/
void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
+ using namespace Botan::Serpent_F;
+
verify_key_set(m_round_key.empty() == false);
#if defined(BOTAN_HAS_SERPENT_AVX2)
@@ -86,6 +51,8 @@ void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+ const Key_Inserter key_xor(m_round_key.data());
+
BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i)
{
uint32_t B0, B1, B2, B3;
@@ -133,6 +100,8 @@ void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
*/
void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
+ using namespace Botan::Serpent_F;
+
verify_key_set(m_round_key.empty() == false);
#if defined(BOTAN_HAS_SERPENT_AVX2)
@@ -161,6 +130,8 @@ void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+ const Key_Inserter key_xor(m_round_key.data());
+
BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i)
{
uint32_t B0, B1, B2, B3;
@@ -203,15 +174,13 @@ void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
}
-#undef key_xor
-#undef transform
-#undef i_transform
-
/*
* Serpent Key Schedule
*/
void Serpent::key_schedule(const uint8_t key[], size_t length)
{
+ using namespace Botan::Serpent_F;
+
const uint32_t PHI = 0x9E3779B9;
secure_vector<uint32_t> W(140);
@@ -294,6 +263,4 @@ std::string Serpent::provider() const
return "base";
}
-#undef key_xor
-
}
diff --git a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
index 028ee77e5..03629a890 100644
--- a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
+++ b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
@@ -5,54 +5,16 @@
*/
#include <botan/internal/serpent.h>
-#include <botan/internal/serpent_sbox.h>
#include <botan/internal/simd_avx2.h>
+#include <botan/internal/serpent_sbox.h>
namespace Botan {
-
-#define key_xor(round, B0, B1, B2, B3) \
- do { \
- B0 ^= SIMD_8x32::splat(m_round_key[4*round ]); \
- B1 ^= SIMD_8x32::splat(m_round_key[4*round+1]); \
- B2 ^= SIMD_8x32::splat(m_round_key[4*round+2]); \
- B3 ^= SIMD_8x32::splat(m_round_key[4*round+3]); \
- } while(0)
-
-/*
-* Serpent's linear transformations
-*/
-#define transform(B0, B1, B2, B3) \
- do { \
- B0 = B0.rotl<13>(); \
- B2 = B2.rotl<3>(); \
- B1 ^= B0 ^ B2; \
- B3 ^= B2 ^ B0.shl<3>(); \
- B1 = B1.rotl<1>(); \
- B3 = B3.rotl<7>(); \
- B0 ^= B1 ^ B3; \
- B2 ^= B3 ^ B1.shl<7>(); \
- B0 = B0.rotl<5>(); \
- B2 = B2.rotl<22>(); \
- } while(0)
-
-#define i_transform(B0, B1, B2, B3) \
- do { \
- B2 = B2.rotr<22>(); \
- B0 = B0.rotr<5>(); \
- B2 ^= B3 ^ B1.shl<7>(); \
- B0 ^= B1 ^ B3; \
- B3 = B3.rotr<7>(); \
- B1 = B1.rotr<1>(); \
- B3 ^= B2 ^ B0.shl<3>(); \
- B1 ^= B0 ^ B2; \
- B2 = B2.rotr<3>(); \
- B0 = B0.rotr<13>(); \
- } while(0)
-
BOTAN_FUNC_ISA("avx2")
void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
{
+ using namespace Botan::Serpent_F;
+
SIMD_8x32::reset_registers();
SIMD_8x32 B0 = SIMD_8x32::load_le(in);
@@ -62,6 +24,8 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
SIMD_8x32::transpose(B0, B1, B2, B3);
+ const Key_Inserter key_xor(m_round_key.data());
+
key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -70,6 +34,7 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -78,6 +43,7 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -86,6 +52,7 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -107,6 +74,8 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
BOTAN_FUNC_ISA("avx2")
void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const
{
+ using namespace Botan::Serpent_F;
+
SIMD_8x32::reset_registers();
SIMD_8x32 B0 = SIMD_8x32::load_le(in);
@@ -116,6 +85,8 @@ void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const
SIMD_8x32::transpose(B0, B1, B2, B3);
+ const Key_Inserter key_xor(m_round_key.data());
+
key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
@@ -162,8 +133,4 @@ void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const
SIMD_8x32::zero_registers();
}
-#undef key_xor
-#undef transform
-#undef i_transform
-
}
diff --git a/src/lib/block/serpent/serpent_sbox.h b/src/lib/block/serpent/serpent_sbox.h
index 31471e724..f37cbb4dc 100644
--- a/src/lib/block/serpent/serpent_sbox.h
+++ b/src/lib/block/serpent/serpent_sbox.h
@@ -1,5 +1,4 @@
/*
-* Serpent SBox Expressions
* (C) 1999-2007,2013 Jack Lloyd
*
* The sbox expressions used here were discovered by Dag Arne Osvik and
@@ -8,11 +7,13 @@
* Botan is released under the Simplified BSD License (see license.txt)
*/
-#ifndef BOTAN_SERPENT_SBOX_H_
-#define BOTAN_SERPENT_SBOX_H_
+#ifndef BOTAN_SERPENT_FUNCS_H_
+#define BOTAN_SERPENT_FUNCS_H_
#include <botan/build.h>
+namespace Botan::Serpent_F {
+
template<typename T>
BOTAN_FORCE_INLINE void SBoxE0(T& a, T& b, T& c, T& d)
{
@@ -443,4 +444,66 @@ BOTAN_FORCE_INLINE void SBoxD7(T& a, T& b, T& c, T& d)
d = t0;
}
+template<size_t S>
+BOTAN_FORCE_INLINE uint32_t shl(uint32_t v)
+ {
+ return v << S;
+ }
+
+/*
+* Serpent's Linear Transform
+*/
+template<typename T>
+BOTAN_FORCE_INLINE void transform(T& B0, T& B1, T& B2, T& B3)
+ {
+ B0 = rotl<13>(B0);
+ B2 = rotl<3>(B2);
+ B1 ^= B0 ^ B2;
+ B3 ^= B2 ^ shl<3>(B0);
+ B1 = rotl<1>(B1);
+ B3 = rotl<7>(B3);
+ B0 ^= B1 ^ B3;
+ B2 ^= B3 ^ shl<7>(B1);
+ B0 = rotl<5>(B0);
+ B2 = rotl<22>(B2);
+ }
+
+/*
+* Serpent's Inverse Linear Transform
+*/
+template<typename T>
+BOTAN_FORCE_INLINE void i_transform(T& B0, T& B1, T& B2, T& B3)
+ {
+ B2 = rotr<22>(B2);
+ B0 = rotr<5>(B0);
+ B2 ^= B3 ^ shl<7>(B1);
+ B0 ^= B1 ^ B3;
+ B3 = rotr<7>(B3);
+ B1 = rotr<1>(B1);
+ B3 ^= B2 ^ shl<3>(B0);
+ B1 ^= B0 ^ B2;
+ B2 = rotr<3>(B2);
+ B0 = rotr<13>(B0);
+ }
+
+class Key_Inserter
+ {
+ public:
+ Key_Inserter(const uint32_t* RK) : m_RK(RK) {}
+
+ template<typename T>
+ inline void operator()(size_t R, T& B0, T& B1, T& B2, T& B3) const
+ {
+ B0 ^= m_RK[4*R ];
+ B1 ^= m_RK[4*R+1];
+ B2 ^= m_RK[4*R+2];
+ B3 ^= m_RK[4*R+3];
+ }
+
+ private:
+ const uint32_t* m_RK;
+ };
+
+}
+
#endif
diff --git a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
index 5d1bc28f2..1a3c85568 100644
--- a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
+++ b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
@@ -6,55 +6,18 @@
*/
#include <botan/internal/serpent.h>
-#include <botan/internal/serpent_sbox.h>
#include <botan/internal/simd_32.h>
+#include <botan/internal/serpent_sbox.h>
namespace Botan {
-#define key_xor(round, B0, B1, B2, B3) \
- do { \
- B0 ^= SIMD_4x32::splat(m_round_key[4*round ]); \
- B1 ^= SIMD_4x32::splat(m_round_key[4*round+1]); \
- B2 ^= SIMD_4x32::splat(m_round_key[4*round+2]); \
- B3 ^= SIMD_4x32::splat(m_round_key[4*round+3]); \
- } while(0)
-
-/*
-* Serpent's linear transformations
-*/
-#define transform(B0, B1, B2, B3) \
- do { \
- B0 = B0.rotl<13>(); \
- B2 = B2.rotl<3>(); \
- B1 ^= B0 ^ B2; \
- B3 ^= B2 ^ B0.shl<3>(); \
- B1 = B1.rotl<1>(); \
- B3 = B3.rotl<7>(); \
- B0 ^= B1 ^ B3; \
- B2 ^= B3 ^ B1.shl<7>(); \
- B0 = B0.rotl<5>(); \
- B2 = B2.rotl<22>(); \
- } while(0)
-
-#define i_transform(B0, B1, B2, B3) \
- do { \
- B2 = B2.rotr<22>(); \
- B0 = B0.rotr<5>(); \
- B2 ^= B3 ^ B1.shl<7>(); \
- B0 ^= B1 ^ B3; \
- B3 = B3.rotr<7>(); \
- B1 = B1.rotr<1>(); \
- B3 ^= B2 ^ B0.shl<3>(); \
- B1 ^= B0 ^ B2; \
- B2 = B2.rotr<3>(); \
- B0 = B0.rotr<13>(); \
- } while(0)
-
/*
* SIMD Serpent Encryption of 4 blocks in parallel
*/
void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const
{
+ using namespace Botan::Serpent_F;
+
SIMD_4x32 B0 = SIMD_4x32::load_le(in);
SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16);
SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32);
@@ -62,6 +25,8 @@ void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const
SIMD_4x32::transpose(B0, B1, B2, B3);
+ const Key_Inserter key_xor(m_round_key.data());
+
key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -111,6 +76,8 @@ void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const
*/
void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const
{
+ using namespace Botan::Serpent_F;
+
SIMD_4x32 B0 = SIMD_4x32::load_le(in);
SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16);
SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32);
@@ -118,6 +85,8 @@ void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const
SIMD_4x32::transpose(B0, B1, B2, B3);
+ const Key_Inserter key_xor(m_round_key.data());
+
key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
@@ -162,8 +131,4 @@ void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const
B3.store_le(out + 48);
}
-#undef key_xor
-#undef transform
-#undef i_transform
-
}
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h
index 78018d864..8704bd631 100644
--- a/src/lib/utils/simd/simd_32.h
+++ b/src/lib/utils/simd/simd_32.h
@@ -421,6 +421,11 @@ class SIMD_4x32 final
#endif
}
+ void operator^=(uint32_t other)
+ {
+ *this ^= SIMD_4x32::splat(other);
+ }
+
void operator|=(const SIMD_4x32& other)
{
#if defined(BOTAN_SIMD_USE_SSE2)
@@ -648,6 +653,25 @@ class SIMD_4x32 final
native_simd_type m_simd;
};
+template<size_t R>
+inline SIMD_4x32 rotl(SIMD_4x32 input)
+ {
+ return input.rotl<R>();
+ }
+
+template<size_t R>
+inline SIMD_4x32 rotr(SIMD_4x32 input)
+ {
+ return input.rotr<R>();
+ }
+
+// For Serpent:
+template<size_t S>
+inline SIMD_4x32 shl(SIMD_4x32 input)
+ {
+ return input.shl<S>();
+ }
+
}
#endif
diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h
index 566dfd0a8..f90b6618b 100644
--- a/src/lib/utils/simd/simd_avx2/simd_avx2.h
+++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h
@@ -184,6 +184,12 @@ class SIMD_8x32 final
}
BOTAN_FUNC_ISA("avx2")
+ void operator^=(uint32_t other)
+ {
+ *this ^= SIMD_8x32::splat(other);
+ }
+
+ BOTAN_FUNC_ISA("avx2")
void operator|=(const SIMD_8x32& other)
{
m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
@@ -318,6 +324,25 @@ class SIMD_8x32 final
__m256i m_avx2;
};
+template<size_t R>
+inline SIMD_8x32 rotl(SIMD_8x32 input)
+ {
+ return input.rotl<R>();
+ }
+
+template<size_t R>
+inline SIMD_8x32 rotr(SIMD_8x32 input)
+ {
+ return input.rotr<R>();
+ }
+
+// For Serpent:
+template<size_t S>
+inline SIMD_8x32 shl(SIMD_8x32 input)
+ {
+ return input.shl<S>();
+ }
+
}
#endif