diff options
author | Jack Lloyd <[email protected]> | 2017-08-13 08:38:26 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-08-13 08:38:26 -0400 |
commit | 8804eeece5af90b728e337c7e0877549ea74eec9 (patch) | |
tree | 558c692afe04770efc3077bba0f3247e99188231 /src/lib/block/shacal2 | |
parent | 61a7a5757bfe19ff1a686055ab75cae1fc9f485e (diff) |
Add SHACAL2 in generic SIMD
Bit over 2x faster on my desktop
Diffstat (limited to 'src/lib/block/shacal2')
-rw-r--r-- | src/lib/block/shacal2/shacal2.cpp | 27 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2.h | 5 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_simd/info.txt | 8 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp | 117 |
4 files changed, 157 insertions, 0 deletions
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp index 1b24d0cef..ea3cd9319 100644 --- a/src/lib/block/shacal2/shacal2.cpp +++ b/src/lib/block/shacal2/shacal2.cpp @@ -7,6 +7,7 @@ #include <botan/shacal2.h> #include <botan/loadstor.h> +#include <botan/cpuid.h> namespace Botan { @@ -43,6 +44,19 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, */ void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + for(size_t i = 0; i != blocks; ++i) { uint32_t A = load_be<uint32_t>(in, 0); @@ -78,6 +92,19 @@ void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const */ void SHACAL2::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + for(size_t i = 0; i != blocks; ++i) { uint32_t A = load_be<uint32_t>(in, 0); diff --git a/src/lib/block/shacal2/shacal2.h b/src/lib/block/shacal2/shacal2.h index 14fde2fbc..f0ade5e50 100644 --- a/src/lib/block/shacal2/shacal2.h +++ b/src/lib/block/shacal2/shacal2.h @@ -28,6 +28,11 @@ class BOTAN_DLL SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4> private: void key_schedule(const uint8_t[], size_t) override; +#if defined(BOTAN_HAS_SHACAL2_SIMD) + void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const; + void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const; +#endif + secure_vector<uint32_t> m_RK; }; diff --git a/src/lib/block/shacal2/shacal2_simd/info.txt b/src/lib/block/shacal2/shacal2_simd/info.txt new file mode 100644 index 000000000..8d715c668 --- /dev/null +++ b/src/lib/block/shacal2/shacal2_simd/info.txt @@ -0,0 +1,8 @@ +<defines> +SHACAL2_SIMD -> 20170813 +</defines> + +<requires> +shacal2 +simd +</requires> diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp new file mode 100644 index 000000000..349fe5722 --- /dev/null +++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp @@ -0,0 +1,117 @@ +/* +* SHACAL-2 using SIMD +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +namespace { + +inline void SHACAL2_Fwd(SIMD_32 A, SIMD_32 B, SIMD_32 C, SIMD_32& D, + SIMD_32 E, SIMD_32 F, SIMD_32 G, SIMD_32& H, + uint32_t RK) + { + H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); + D += H; + H += A.rho(2,13,22) + ((A & B) | ((A | B) & C)); + } + +inline void SHACAL2_Rev(SIMD_32 A, SIMD_32 B, SIMD_32 C, SIMD_32& D, + SIMD_32 E, SIMD_32 F, SIMD_32 G, SIMD_32& H, + uint32_t RK) + { + H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); + } + +} + +void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +} |