diff options
author | Jack Lloyd <[email protected]> | 2017-08-15 14:40:52 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-08-15 14:40:52 -0400 |
commit | 0f530047200524d23e8107dc16dd5a5e84014071 (patch) | |
tree | a60da7cf73cf8972eeb58a027a7ea6bbec1bb9d1 /src/lib | |
parent | 2266362024009f0364a07dd1bcff5115180f40a7 (diff) | |
parent | 9ab8ec3de32cad721b6b52401be67c5219c9f77b (diff) |
Merge GH #1151 Add SHACAL2
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/block/aes/aes.cpp | 16 | ||||
-rw-r--r-- | src/lib/block/aes/aes.h | 6 | ||||
-rw-r--r-- | src/lib/block/block_cipher.cpp | 11 | ||||
-rw-r--r-- | src/lib/block/idea/idea.cpp | 12 | ||||
-rw-r--r-- | src/lib/block/idea/idea.h | 2 | ||||
-rw-r--r-- | src/lib/block/noekeon/noekeon.cpp | 12 | ||||
-rw-r--r-- | src/lib/block/noekeon/noekeon.h | 2 | ||||
-rw-r--r-- | src/lib/block/shacal2/info.txt | 5 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2.cpp | 235 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2.h | 47 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_simd/info.txt | 8 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp | 119 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_x86/info.txt | 16 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp | 137 | ||||
-rw-r--r-- | src/lib/block/threefish/threefish.cpp | 12 | ||||
-rw-r--r-- | src/lib/block/threefish/threefish.h | 2 | ||||
-rw-r--r-- | src/lib/utils/simd/simd_32.h | 58 |
17 files changed, 700 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp index 21228e0c1..75591bfd2 100644 --- a/src/lib/block/aes/aes.cpp +++ b/src/lib/block/aes/aes.cpp @@ -414,6 +414,18 @@ void aes_key_schedule(const uint8_t key[], size_t length, copy_mem(DK.data(), XDK.data(), DK.size()); } +size_t aes_parallelism() + { +#if defined(BOTAN_HAS_AES_NI) + if(CPUID::has_aes_ni()) + { + return 4; + } +#endif + + return 1; + } + const char* aes_provider() { #if defined(BOTAN_HAS_AES_NI) @@ -439,6 +451,10 @@ std::string AES_128::provider() const { return aes_provider(); } std::string AES_192::provider() const { return aes_provider(); } std::string AES_256::provider() const { return aes_provider(); } +size_t AES_128::parallelism() const { return aes_parallelism(); } +size_t AES_192::parallelism() const { return aes_parallelism(); } +size_t AES_256::parallelism() const { return aes_parallelism(); } + void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { #if defined(BOTAN_HAS_AES_NI) diff --git a/src/lib/block/aes/aes.h b/src/lib/block/aes/aes.h index 52f877e36..a74280947 100644 --- a/src/lib/block/aes/aes.h +++ b/src/lib/block/aes/aes.h @@ -26,6 +26,8 @@ class BOTAN_DLL AES_128 final : public Block_Cipher_Fixed_Params<16, 16> std::string provider() const override; std::string name() const override { return "AES-128"; } BlockCipher* clone() const override { return new AES_128; } + size_t parallelism() const override; + private: void key_schedule(const uint8_t key[], size_t length) override; @@ -59,6 +61,8 @@ class BOTAN_DLL AES_192 final : public Block_Cipher_Fixed_Params<16, 24> std::string provider() const override; std::string name() const override { return "AES-192"; } BlockCipher* clone() const override { return new AES_192; } + size_t parallelism() const override; + private: #if defined(BOTAN_HAS_AES_SSSE3) void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; @@ -93,6 +97,8 @@ class BOTAN_DLL AES_256 final : public Block_Cipher_Fixed_Params<16, 32> std::string name() const override { return "AES-256"; } BlockCipher* clone() const override { return new AES_256; } + size_t parallelism() const override; + private: #if defined(BOTAN_HAS_AES_SSSE3) void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; diff --git a/src/lib/block/block_cipher.cpp b/src/lib/block/block_cipher.cpp index b56f1c571..a0eafbd11 100644 --- a/src/lib/block/block_cipher.cpp +++ b/src/lib/block/block_cipher.cpp @@ -66,6 +66,10 @@ #include <botan/serpent.h> #endif +#if defined(BOTAN_HAS_SHACAL2) + #include <botan/shacal2.h> +#endif + #if defined(BOTAN_HAS_SM4) #include <botan/sm4.h> #endif @@ -135,6 +139,13 @@ BlockCipher::create(const std::string& algo, } #endif +#if defined(BOTAN_HAS_SHACAL2) + if(algo == "SHACAL2") + { + return std::unique_ptr<BlockCipher>(new SHACAL2); + } +#endif + #if defined(BOTAN_HAS_TWOFISH) if(algo == "Twofish") { diff --git a/src/lib/block/idea/idea.cpp b/src/lib/block/idea/idea.cpp index 4eab6a4f3..2be15be2e 100644 --- a/src/lib/block/idea/idea.cpp +++ b/src/lib/block/idea/idea.cpp @@ -107,6 +107,18 @@ void idea_op(const uint8_t in[], uint8_t out[], size_t blocks, const uint16_t K[ } +size_t IDEA::parallelism() const + { +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + return 8; + } +#endif + + return 1; + } + std::string IDEA::provider() const { #if defined(BOTAN_HAS_IDEA_SSE2) diff --git a/src/lib/block/idea/idea.h b/src/lib/block/idea/idea.h index 5a718867b..eaef5deee 100644 --- a/src/lib/block/idea/idea.h +++ b/src/lib/block/idea/idea.h @@ -26,6 +26,8 @@ class BOTAN_DLL IDEA final : public Block_Cipher_Fixed_Params<8, 16> std::string provider() const override; std::string name() const override { return "IDEA"; } BlockCipher* clone() const override { return new IDEA; } + size_t parallelism() const override; + private: #if defined(BOTAN_HAS_IDEA_SSE2) void sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const; diff --git a/src/lib/block/noekeon/noekeon.cpp b/src/lib/block/noekeon/noekeon.cpp index e8bd7b308..c82badd4c 100644 --- a/src/lib/block/noekeon/noekeon.cpp +++ b/src/lib/block/noekeon/noekeon.cpp @@ -73,6 +73,18 @@ inline void gamma(uint32_t& A0, uint32_t& A1, uint32_t& A2, uint32_t& A3) } +size_t Noekeon::parallelism() const + { +#if defined(BOTAN_HAS_NOEKEON_SIMD) + if(CPUID::has_simd_32()) + { + return 4; + } +#endif + + return 1; + } + std::string Noekeon::provider() const { #if defined(BOTAN_HAS_NOEKEON_SIMD) diff --git a/src/lib/block/noekeon/noekeon.h b/src/lib/block/noekeon/noekeon.h index 83af6d8d7..de49d658f 100644 --- a/src/lib/block/noekeon/noekeon.h +++ b/src/lib/block/noekeon/noekeon.h @@ -25,6 +25,8 @@ class BOTAN_DLL Noekeon final : public Block_Cipher_Fixed_Params<16, 16> void clear() override; std::string name() const override { return "Noekeon"; } BlockCipher* clone() const override { return new Noekeon; } + size_t parallelism() const override; + private: #if defined(BOTAN_HAS_NOEKEON_SIMD) void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const; diff --git a/src/lib/block/shacal2/info.txt b/src/lib/block/shacal2/info.txt new file mode 100644 index 000000000..62e00503f --- /dev/null +++ b/src/lib/block/shacal2/info.txt @@ -0,0 +1,5 @@ +<defines> +SHACAL2 -> 20170813 +</defines> + + diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp new file mode 100644 index 000000000..30ad711db --- /dev/null +++ b/src/lib/block/shacal2/shacal2.cpp @@ -0,0 +1,235 @@ +/* +* SHACAL-2 +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/loadstor.h> +#include <botan/cpuid.h> + +namespace Botan { + +namespace { + +inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, + uint32_t E, uint32_t F, uint32_t G, uint32_t& H, + uint32_t RK) + { + const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22); + const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25); + + H += E_rho + ((E & F) ^ (~E & G)) + RK; + D += H; + H += A_rho + ((A & B) | ((A | B) & C)); + } + +inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, + uint32_t E, uint32_t F, uint32_t G, uint32_t& H, + uint32_t RK) + { + const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22); + const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25); + + H -= A_rho + ((A & B) | ((A | B) & C)); + D -= H; + H -= E_rho + ((E & F) ^ (~E & G)) + RK; + } + +} + +/* +* SHACAL2 Encryption +*/ +void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return x86_encrypt_blocks(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + uint32_t E = load_be<uint32_t>(in, 4); + uint32_t F = load_be<uint32_t>(in, 5); + uint32_t G = load_be<uint32_t>(in, 6); + uint32_t H = load_be<uint32_t>(in, 7); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + store_be(out, A, B, C, D, E, F, G, H); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SHACAL2 Encryption +*/ +void SHACAL2::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + uint32_t E = load_be<uint32_t>(in, 4); + uint32_t F = load_be<uint32_t>(in, 5); + uint32_t G = load_be<uint32_t>(in, 6); + uint32_t H = load_be<uint32_t>(in, 7); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + store_be(out, A, B, C, D, E, F, G, H); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SHACAL2 Key Schedule +*/ +void SHACAL2::key_schedule(const uint8_t key[], size_t len) + { + const uint32_t RC[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 + }; + + if(m_RK.empty()) + m_RK.resize(64); + else + clear_mem(m_RK.data(), m_RK.size()); + + load_be(m_RK.data(), key, len/4); + + for(size_t i = 16; i != 64; ++i) + { + const uint32_t sigma0_15 = rotate_right(m_RK[i-15], 7) ^ rotate_right(m_RK[i-15], 18) ^ (m_RK[i-15] >> 3); + const uint32_t sigma1_2 = rotate_right(m_RK[i-2], 17) ^ rotate_right(m_RK[i-2], 19) ^ (m_RK[i-2] >> 10); + m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2; + } + + for(size_t i = 0; i != 64; ++i) + { + m_RK[i] += RC[i]; + } + } + +size_t SHACAL2::parallelism() const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return 4; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + return 4; + } +#endif + + return 1; + } + +std::string SHACAL2::provider() const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return "intel_sha"; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + return "simd"; + } +#endif + + return "base"; + } + +/* +* Clear memory of sensitive data +*/ +void SHACAL2::clear() + { + zap(m_RK); + } + +} diff --git a/src/lib/block/shacal2/shacal2.h b/src/lib/block/shacal2/shacal2.h new file mode 100644 index 000000000..d63c1453d --- /dev/null +++ b/src/lib/block/shacal2/shacal2.h @@ -0,0 +1,47 @@ +/* +* SHACAL-2 +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SHACAL2_H__ +#define BOTAN_SHACAL2_H__ + +#include <botan/block_cipher.h> + +namespace Botan { + +/** +* SHACAL2 +*/ +class BOTAN_DLL SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + std::string provider() const override; + void clear() override; + std::string name() const override { return "SHACAL2"; } + BlockCipher* clone() const override { return new SHACAL2; } + size_t parallelism() const override; + + private: + void key_schedule(const uint8_t[], size_t) override; + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const; + void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const; +#endif + +#if defined(BOTAN_HAS_SHACAL2_X86) + void x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + secure_vector<uint32_t> m_RK; + }; + +} + +#endif diff --git a/src/lib/block/shacal2/shacal2_simd/info.txt b/src/lib/block/shacal2/shacal2_simd/info.txt new file mode 100644 index 000000000..8d715c668 --- /dev/null +++ b/src/lib/block/shacal2/shacal2_simd/info.txt @@ -0,0 +1,8 @@ +<defines> +SHACAL2_SIMD -> 20170813 +</defines> + +<requires> +shacal2 +simd +</requires> diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp new file mode 100644 index 000000000..a4324c8fb --- /dev/null +++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp @@ -0,0 +1,119 @@ +/* +* SHACAL-2 using SIMD +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +namespace { + +inline +void SHACAL2_Fwd(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32& D, + const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H, + uint32_t RK) + { + H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); + D += H; + H += A.rho(2,13,22) + ((A & B) | ((A | B) & C)); + } + +inline +void SHACAL2_Rev(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32& D, + const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H, + uint32_t RK) + { + H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); + } + +} + +void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +} diff --git a/src/lib/block/shacal2/shacal2_x86/info.txt b/src/lib/block/shacal2/shacal2_x86/info.txt new file mode 100644 index 000000000..b8d6a50b7 --- /dev/null +++ b/src/lib/block/shacal2/shacal2_x86/info.txt @@ -0,0 +1,16 @@ +<defines> +SHACAL2_X86 -> 20170814 +</defines> + +<requires> +shacal2 +</requires> + +need_isa sha,sse4.1 + +<cc> +gcc:5.0 +clang:3.9 +msvc:19.0 # MSVS 2015 +</cc> + diff --git a/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp new file mode 100644 index 000000000..a917955e3 --- /dev/null +++ b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp @@ -0,0 +1,137 @@ +/* +* SHACAL-2 using x86 SHA extensions +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <immintrin.h> + +namespace Botan { + +/* +Only encryption is supported since the inverse round function would +require a different instruction +*/ + +void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i BSWAP_MASK = _mm_set_epi64x(0x0C0D0E0F08090A0B, 0x0405060700010203); + + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + while(blocks >= 2) + { + __m128i B0_0 = _mm_loadu_si128(in_mm); + __m128i B0_1 = _mm_loadu_si128(in_mm+1); + __m128i B1_0 = _mm_loadu_si128(in_mm+2); + __m128i B1_1 = _mm_loadu_si128(in_mm+3); + + B0_0 = _mm_shuffle_epi8(B0_0, BSWAP_MASK); + B0_1 = _mm_shuffle_epi8(B0_1, BSWAP_MASK); + B1_0 = _mm_shuffle_epi8(B1_0, BSWAP_MASK); + B1_1 = _mm_shuffle_epi8(B1_1, BSWAP_MASK); + + B0_0 = _mm_shuffle_epi32(B0_0, 0xB1); // CDAB + B0_1 = _mm_shuffle_epi32(B0_1, 0x1B); // EFGH + B1_0 = _mm_shuffle_epi32(B1_0, 0xB1); // CDAB + B1_1 = _mm_shuffle_epi32(B1_1, 0x1B); // EFGH + + __m128i TMP = _mm_alignr_epi8(B0_0, B0_1, 8); // ABEF + B0_1 = _mm_blend_epi16(B0_1, B0_0, 0xF0); // CDGH + B0_0 = TMP; + + TMP = _mm_alignr_epi8(B1_0, B1_1, 8); // ABEF + B1_1 = _mm_blend_epi16(B1_1, B1_0, 0xF0); // CDGH + B1_0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + const __m128i RK0 = _mm_set_epi32(0,0,m_RK[8*i+1],m_RK[8*i+0]); + const __m128i RK1 = _mm_set_epi32(0,0,m_RK[8*i+3],m_RK[8*i+2]); + const __m128i RK2 = _mm_set_epi32(0,0,m_RK[8*i+5],m_RK[8*i+4]); + const __m128i RK3 = _mm_set_epi32(0,0,m_RK[8*i+7],m_RK[8*i+6]); + + B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK0); + B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK0); + + B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK1); + B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK1); + + B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK2); + B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK2); + + B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK3); + B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK3); + } + + TMP = _mm_shuffle_epi32(B0_0, 0x1B); // FEBA + B0_1 = _mm_shuffle_epi32(B0_1, 0xB1); // DCHG + B0_0 = _mm_blend_epi16(TMP, B0_1, 0xF0); // DCBA + B0_1 = _mm_alignr_epi8(B0_1, TMP, 8); // ABEF + + TMP = _mm_shuffle_epi32(B1_0, 0x1B); // FEBA + B1_1 = _mm_shuffle_epi32(B1_1, 0xB1); // DCHG + B1_0 = _mm_blend_epi16(TMP, B1_1, 0xF0); // DCBA + B1_1 = _mm_alignr_epi8(B1_1, TMP, 8); // ABEF + + B0_0 = _mm_shuffle_epi8(B0_0, BSWAP_MASK); + B0_1 = _mm_shuffle_epi8(B0_1, BSWAP_MASK); + B1_0 = _mm_shuffle_epi8(B1_0, BSWAP_MASK); + B1_1 = _mm_shuffle_epi8(B1_1, BSWAP_MASK); + + // Save state + _mm_storeu_si128(out_mm + 0, B0_0); + _mm_storeu_si128(out_mm + 1, B0_1); + _mm_storeu_si128(out_mm + 2, B1_0); + _mm_storeu_si128(out_mm + 3, B1_1); + + blocks -= 2; + in_mm += 4; + out_mm += 4; + } + + while(blocks) + { + __m128i B0 = _mm_loadu_si128(in_mm); + __m128i B1 = _mm_loadu_si128(in_mm+1); + + B0 = _mm_shuffle_epi8(B0, BSWAP_MASK); + B1 = _mm_shuffle_epi8(B1, BSWAP_MASK); + + B0 = _mm_shuffle_epi32(B0, 0xB1); // CDAB + B1 = _mm_shuffle_epi32(B1, 0x1B); // EFGH + + __m128i TMP = _mm_alignr_epi8(B0, B1, 8); // ABEF + B1 = _mm_blend_epi16(B1, B0, 0xF0); // CDGH + B0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+1],m_RK[8*i+0])); + B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+3],m_RK[8*i+2])); + B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+5],m_RK[8*i+4])); + B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+7],m_RK[8*i+6])); + } + + TMP = _mm_shuffle_epi32(B0, 0x1B); // FEBA + B1 = _mm_shuffle_epi32(B1, 0xB1); // DCHG + B0 = _mm_blend_epi16(TMP, B1, 0xF0); // DCBA + B1 = _mm_alignr_epi8(B1, TMP, 8); // ABEF + + B0 = _mm_shuffle_epi8(B0, BSWAP_MASK); + B1 = _mm_shuffle_epi8(B1, BSWAP_MASK); + + // Save state + _mm_storeu_si128(out_mm, B0); + _mm_storeu_si128(out_mm + 1, B1); + + blocks--; + in_mm += 2; + out_mm += 2; + } + } + +} diff --git a/src/lib/block/threefish/threefish.cpp b/src/lib/block/threefish/threefish.cpp index 28a144fb6..99ce135d5 100644 --- a/src/lib/block/threefish/threefish.cpp +++ b/src/lib/block/threefish/threefish.cpp @@ -98,6 +98,18 @@ void Threefish_512::skein_feedfwd(const secure_vector<uint64_t>& M, m_K[4] ^ m_K[5] ^ m_K[6] ^ m_K[7] ^ 0x1BD11BDAA9FC1A22; } +size_t Threefish_512::parallelism() const + { +#if defined(BOTAN_HAS_THREEFISH_512_AVX2) + if(CPUID::has_avx2()) + { + return 2; + } +#endif + + return 1; + } + std::string Threefish_512::provider() const { #if defined(BOTAN_HAS_THREEFISH_512_AVX2) diff --git a/src/lib/block/threefish/threefish.h b/src/lib/block/threefish/threefish.h index 8fe690f52..cdd27cb11 100644 --- a/src/lib/block/threefish/threefish.h +++ b/src/lib/block/threefish/threefish.h @@ -27,6 +27,8 @@ class BOTAN_DLL Threefish_512 final : public Block_Cipher_Fixed_Params<64, 64> std::string provider() const override; std::string name() const override { return "Threefish-512"; } BlockCipher* clone() const override { return new Threefish_512; } + size_t parallelism() const override; + protected: const secure_vector<uint64_t>& get_T() const { return m_T; } const secure_vector<uint64_t>& get_K() const { return m_K; } diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h index 01b6a7ef0..def933f4a 100644 --- a/src/lib/utils/simd/simd_32.h +++ b/src/lib/utils/simd/simd_32.h @@ -282,6 +282,64 @@ class SIMD_4x32 final #endif } + + /* + Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3) + */ + SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const + { + SIMD_4x32 res; + +#if defined(BOTAN_SIMD_USE_SSE2) + + res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)), + _mm_srli_epi32(m_sse, static_cast<int>(rot1))); + res.m_sse = _mm_xor_si128( + res.m_sse, + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)), + _mm_srli_epi32(m_sse, static_cast<int>(rot2)))); + res.m_sse = _mm_xor_si128( + res.m_sse, + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)), + _mm_srli_epi32(m_sse, static_cast<int>(rot3)))); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + + const unsigned int r1 = static_cast<unsigned int>(32-rot1); + const unsigned int r2 = static_cast<unsigned int>(32-rot2); + const unsigned int r3 = static_cast<unsigned int>(32-rot3); + res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1}); + res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2})); + res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3})); + +#elif defined(BOTAN_SIMD_USE_NEON) + res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)), + vshrq_n_u32(m_neon, static_cast<int>(rot1))); + + res.m_neon = veorq_u32( + res.m_neon, + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)), + vshrq_n_u32(m_neon, static_cast<int>(rot2)))); + + res.m_neon = veorq_u32( + res.m_neon, + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)), + vshrq_n_u32(m_neon, static_cast<int>(rot3)))); + +#else + + for(size_t i = 0; i != 4; ++i) + { + res.m_scalar[i] = + Botan::rotate_right(m_scalar[i], rot1) ^ + Botan::rotate_right(m_scalar[i], rot2) ^ + Botan::rotate_right(m_scalar[i], rot3); + } +#endif + + return res; + } + /** * Rotate each element of SIMD register n bits left */ |