diff options
author | Jack Lloyd <[email protected]> | 2022-02-12 09:55:39 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2022-02-12 09:55:39 -0500 |
commit | 04daa2acae41c4f3922aa35ccb83e7bdf13baaea (patch) | |
tree | 9e2c82a6e30f49ec8966fca2d21b6aac683bb4ff | |
parent | 96d5b36605f42ff110add2f75cb14e918022168f (diff) |
Remove macro usage from Serpent
Also consolidate the implementation of the linear operations.
Interestingly, this change allows GCC 11 to auto-vectorize the
baseline version on its own.
-rw-r--r-- | src/lib/block/serpent/serpent.cpp | 53 | ||||
-rw-r--r-- | src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp | 57 | ||||
-rw-r--r-- | src/lib/block/serpent/serpent_sbox.h | 69 | ||||
-rw-r--r-- | src/lib/block/serpent/serpent_simd/serpent_simd.cpp | 53 | ||||
-rw-r--r-- | src/lib/utils/simd/simd_32.h | 24 | ||||
-rw-r--r-- | src/lib/utils/simd/simd_avx2/simd_avx2.h | 25 |
6 files changed, 146 insertions, 135 deletions
diff --git a/src/lib/block/serpent/serpent.cpp b/src/lib/block/serpent/serpent.cpp index 7c4384d3d..bfdfea7c7 100644 --- a/src/lib/block/serpent/serpent.cpp +++ b/src/lib/block/serpent/serpent.cpp @@ -16,48 +16,13 @@ namespace Botan { -namespace { - -/* -* Serpent's Linear Transform -*/ -inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) - { - B0 = rotl<13>(B0); B2 = rotl<3>(B2); - B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3); - B1 = rotl<1>(B1); B3 = rotl<7>(B3); - B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7); - B0 = rotl<5>(B0); B2 = rotl<22>(B2); - } - -/* -* Serpent's Inverse Linear Transform -*/ -inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) - { - B2 = rotr<22>(B2); B0 = rotr<5>(B0); - B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3; - B3 = rotr<7>(B3); B1 = rotr<1>(B1); - B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2; - B2 = rotr<3>(B2); B0 = rotr<13>(B0); - } - -} - -/* -* XOR a key block with a data block -*/ -#define key_xor(round, B0, B1, B2, B3) \ - B0 ^= m_round_key[4*round ]; \ - B1 ^= m_round_key[4*round+1]; \ - B2 ^= m_round_key[4*round+2]; \ - B3 ^= m_round_key[4*round+3]; - /* * Serpent Encryption */ void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { + using namespace Botan::Serpent_F; + verify_key_set(m_round_key.empty() == false); #if defined(BOTAN_HAS_SERPENT_AVX2) @@ -86,6 +51,8 @@ void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif + const Key_Inserter key_xor(m_round_key.data()); + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) { uint32_t B0, B1, B2, B3; @@ -133,6 +100,8 @@ void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const */ void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { + using namespace Botan::Serpent_F; + verify_key_set(m_round_key.empty() == false); #if defined(BOTAN_HAS_SERPENT_AVX2) @@ -161,6 +130,8 @@ void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif + const Key_Inserter key_xor(m_round_key.data()); + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) { uint32_t B0, B1, B2, B3; @@ -203,15 +174,13 @@ void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } } -#undef key_xor -#undef transform -#undef i_transform - /* * Serpent Key Schedule */ void Serpent::key_schedule(const uint8_t key[], size_t length) { + using namespace Botan::Serpent_F; + const uint32_t PHI = 0x9E3779B9; secure_vector<uint32_t> W(140); @@ -294,6 +263,4 @@ std::string Serpent::provider() const return "base"; } -#undef key_xor - } diff --git a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp index 028ee77e5..03629a890 100644 --- a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp +++ b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp @@ -5,54 +5,16 @@ */ #include <botan/internal/serpent.h> -#include <botan/internal/serpent_sbox.h> #include <botan/internal/simd_avx2.h> +#include <botan/internal/serpent_sbox.h> namespace Botan { - -#define key_xor(round, B0, B1, B2, B3) \ - do { \ - B0 ^= SIMD_8x32::splat(m_round_key[4*round ]); \ - B1 ^= SIMD_8x32::splat(m_round_key[4*round+1]); \ - B2 ^= SIMD_8x32::splat(m_round_key[4*round+2]); \ - B3 ^= SIMD_8x32::splat(m_round_key[4*round+3]); \ - } while(0) - -/* -* Serpent's linear transformations -*/ -#define transform(B0, B1, B2, B3) \ - do { \ - B0 = B0.rotl<13>(); \ - B2 = B2.rotl<3>(); \ - B1 ^= B0 ^ B2; \ - B3 ^= B2 ^ B0.shl<3>(); \ - B1 = B1.rotl<1>(); \ - B3 = B3.rotl<7>(); \ - B0 ^= B1 ^ B3; \ - B2 ^= B3 ^ B1.shl<7>(); \ - B0 = B0.rotl<5>(); \ - B2 = B2.rotl<22>(); \ - } while(0) - -#define i_transform(B0, B1, B2, B3) \ - do { \ - B2 = B2.rotr<22>(); \ - B0 = B0.rotr<5>(); \ - B2 ^= B3 ^ B1.shl<7>(); \ - B0 ^= B1 ^ B3; \ - B3 = B3.rotr<7>(); \ - B1 = B1.rotr<1>(); \ - B3 ^= B2 ^ B0.shl<3>(); \ - B1 ^= B0 ^ B2; \ - B2 = B2.rotr<3>(); \ - B0 = B0.rotr<13>(); \ - } while(0) - BOTAN_FUNC_ISA("avx2") void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const { + using namespace Botan::Serpent_F; + SIMD_8x32::reset_registers(); SIMD_8x32 B0 = SIMD_8x32::load_le(in); @@ -62,6 +24,8 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const SIMD_8x32::transpose(B0, B1, B2, B3); + const Key_Inserter key_xor(m_round_key.data()); + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -70,6 +34,7 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -78,6 +43,7 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -86,6 +52,7 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -107,6 +74,8 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const BOTAN_FUNC_ISA("avx2") void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const { + using namespace Botan::Serpent_F; + SIMD_8x32::reset_registers(); SIMD_8x32 B0 = SIMD_8x32::load_le(in); @@ -116,6 +85,8 @@ void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const SIMD_8x32::transpose(B0, B1, B2, B3); + const Key_Inserter key_xor(m_round_key.data()); + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); @@ -162,8 +133,4 @@ void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const SIMD_8x32::zero_registers(); } -#undef key_xor -#undef transform -#undef i_transform - } diff --git a/src/lib/block/serpent/serpent_sbox.h b/src/lib/block/serpent/serpent_sbox.h index 31471e724..f37cbb4dc 100644 --- a/src/lib/block/serpent/serpent_sbox.h +++ b/src/lib/block/serpent/serpent_sbox.h @@ -1,5 +1,4 @@ /* -* Serpent SBox Expressions * (C) 1999-2007,2013 Jack Lloyd * * The sbox expressions used here were discovered by Dag Arne Osvik and @@ -8,11 +7,13 @@ * Botan is released under the Simplified BSD License (see license.txt) */ -#ifndef BOTAN_SERPENT_SBOX_H_ -#define BOTAN_SERPENT_SBOX_H_ +#ifndef BOTAN_SERPENT_FUNCS_H_ +#define BOTAN_SERPENT_FUNCS_H_ #include <botan/build.h> +namespace Botan::Serpent_F { + template<typename T> BOTAN_FORCE_INLINE void SBoxE0(T& a, T& b, T& c, T& d) { @@ -443,4 +444,66 @@ BOTAN_FORCE_INLINE void SBoxD7(T& a, T& b, T& c, T& d) d = t0; } +template<size_t S> +BOTAN_FORCE_INLINE uint32_t shl(uint32_t v) + { + return v << S; + } + +/* +* Serpent's Linear Transform +*/ +template<typename T> +BOTAN_FORCE_INLINE void transform(T& B0, T& B1, T& B2, T& B3) + { + B0 = rotl<13>(B0); + B2 = rotl<3>(B2); + B1 ^= B0 ^ B2; + B3 ^= B2 ^ shl<3>(B0); + B1 = rotl<1>(B1); + B3 = rotl<7>(B3); + B0 ^= B1 ^ B3; + B2 ^= B3 ^ shl<7>(B1); + B0 = rotl<5>(B0); + B2 = rotl<22>(B2); + } + +/* +* Serpent's Inverse Linear Transform +*/ +template<typename T> +BOTAN_FORCE_INLINE void i_transform(T& B0, T& B1, T& B2, T& B3) + { + B2 = rotr<22>(B2); + B0 = rotr<5>(B0); + B2 ^= B3 ^ shl<7>(B1); + B0 ^= B1 ^ B3; + B3 = rotr<7>(B3); + B1 = rotr<1>(B1); + B3 ^= B2 ^ shl<3>(B0); + B1 ^= B0 ^ B2; + B2 = rotr<3>(B2); + B0 = rotr<13>(B0); + } + +class Key_Inserter + { + public: + Key_Inserter(const uint32_t* RK) : m_RK(RK) {} + + template<typename T> + inline void operator()(size_t R, T& B0, T& B1, T& B2, T& B3) const + { + B0 ^= m_RK[4*R ]; + B1 ^= m_RK[4*R+1]; + B2 ^= m_RK[4*R+2]; + B3 ^= m_RK[4*R+3]; + } + + private: + const uint32_t* m_RK; + }; + +} + #endif diff --git a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp index 5d1bc28f2..1a3c85568 100644 --- a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp +++ b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp @@ -6,55 +6,18 @@ */ #include <botan/internal/serpent.h> -#include <botan/internal/serpent_sbox.h> #include <botan/internal/simd_32.h> +#include <botan/internal/serpent_sbox.h> namespace Botan { -#define key_xor(round, B0, B1, B2, B3) \ - do { \ - B0 ^= SIMD_4x32::splat(m_round_key[4*round ]); \ - B1 ^= SIMD_4x32::splat(m_round_key[4*round+1]); \ - B2 ^= SIMD_4x32::splat(m_round_key[4*round+2]); \ - B3 ^= SIMD_4x32::splat(m_round_key[4*round+3]); \ - } while(0) - -/* -* Serpent's linear transformations -*/ -#define transform(B0, B1, B2, B3) \ - do { \ - B0 = B0.rotl<13>(); \ - B2 = B2.rotl<3>(); \ - B1 ^= B0 ^ B2; \ - B3 ^= B2 ^ B0.shl<3>(); \ - B1 = B1.rotl<1>(); \ - B3 = B3.rotl<7>(); \ - B0 ^= B1 ^ B3; \ - B2 ^= B3 ^ B1.shl<7>(); \ - B0 = B0.rotl<5>(); \ - B2 = B2.rotl<22>(); \ - } while(0) - -#define i_transform(B0, B1, B2, B3) \ - do { \ - B2 = B2.rotr<22>(); \ - B0 = B0.rotr<5>(); \ - B2 ^= B3 ^ B1.shl<7>(); \ - B0 ^= B1 ^ B3; \ - B3 = B3.rotr<7>(); \ - B1 = B1.rotr<1>(); \ - B3 ^= B2 ^ B0.shl<3>(); \ - B1 ^= B0 ^ B2; \ - B2 = B2.rotr<3>(); \ - B0 = B0.rotr<13>(); \ - } while(0) - /* * SIMD Serpent Encryption of 4 blocks in parallel */ void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const { + using namespace Botan::Serpent_F; + SIMD_4x32 B0 = SIMD_4x32::load_le(in); SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); @@ -62,6 +25,8 @@ void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const SIMD_4x32::transpose(B0, B1, B2, B3); + const Key_Inserter key_xor(m_round_key.data()); + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -111,6 +76,8 @@ void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const */ void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const { + using namespace Botan::Serpent_F; + SIMD_4x32 B0 = SIMD_4x32::load_le(in); SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); @@ -118,6 +85,8 @@ void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const SIMD_4x32::transpose(B0, B1, B2, B3); + const Key_Inserter key_xor(m_round_key.data()); + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); @@ -162,8 +131,4 @@ void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const B3.store_le(out + 48); } -#undef key_xor -#undef transform -#undef i_transform - } diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h index 78018d864..8704bd631 100644 --- a/src/lib/utils/simd/simd_32.h +++ b/src/lib/utils/simd/simd_32.h @@ -421,6 +421,11 @@ class SIMD_4x32 final #endif } + void operator^=(uint32_t other) + { + *this ^= SIMD_4x32::splat(other); + } + void operator|=(const SIMD_4x32& other) { #if defined(BOTAN_SIMD_USE_SSE2) @@ -648,6 +653,25 @@ class SIMD_4x32 final native_simd_type m_simd; }; +template<size_t R> +inline SIMD_4x32 rotl(SIMD_4x32 input) + { + return input.rotl<R>(); + } + +template<size_t R> +inline SIMD_4x32 rotr(SIMD_4x32 input) + { + return input.rotr<R>(); + } + +// For Serpent: +template<size_t S> +inline SIMD_4x32 shl(SIMD_4x32 input) + { + return input.shl<S>(); + } + } #endif diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h index 566dfd0a8..f90b6618b 100644 --- a/src/lib/utils/simd/simd_avx2/simd_avx2.h +++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h @@ -184,6 +184,12 @@ class SIMD_8x32 final } BOTAN_FUNC_ISA("avx2") + void operator^=(uint32_t other) + { + *this ^= SIMD_8x32::splat(other); + } + + BOTAN_FUNC_ISA("avx2") void operator|=(const SIMD_8x32& other) { m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2); @@ -318,6 +324,25 @@ class SIMD_8x32 final __m256i m_avx2; }; +template<size_t R> +inline SIMD_8x32 rotl(SIMD_8x32 input) + { + return input.rotl<R>(); + } + +template<size_t R> +inline SIMD_8x32 rotr(SIMD_8x32 input) + { + return input.rotr<R>(); + } + +// For Serpent: +template<size_t S> +inline SIMD_8x32 shl(SIMD_8x32 input) + { + return input.shl<S>(); + } + } #endif |