diff options
author | Jack Lloyd <[email protected]> | 2017-08-14 07:40:49 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-08-14 07:40:49 -0400 |
commit | b8a691fb0d44eb67886179810768ac9453963cea (patch) | |
tree | ae597cbd061951be669e8feae5001ca94c42b13e /src/lib | |
parent | 7e2c92527209d47098c0b7b9712fafcd2455590e (diff) |
Add support for SHACAL2 using x86 SHA extensions
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/block/shacal2/shacal2.cpp | 21 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2.h | 4 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_x86/info.txt | 16 | ||||
-rw-r--r-- | src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp | 68 |
4 files changed, 109 insertions, 0 deletions
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp index faf0a2d81..30ad711db 100644 --- a/src/lib/block/shacal2/shacal2.cpp +++ b/src/lib/block/shacal2/shacal2.cpp @@ -44,6 +44,13 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, */ void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return x86_encrypt_blocks(in, out, blocks); + } +#endif + #if defined(BOTAN_HAS_SHACAL2_SIMD) if(CPUID::has_simd_32()) { @@ -181,6 +188,13 @@ void SHACAL2::key_schedule(const uint8_t key[], size_t len) size_t SHACAL2::parallelism() const { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return 4; + } +#endif + #if defined(BOTAN_HAS_SHACAL2_SIMD) if(CPUID::has_simd_32()) { @@ -193,6 +207,13 @@ size_t SHACAL2::parallelism() const std::string SHACAL2::provider() const { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return "intel_sha"; + } +#endif + #if defined(BOTAN_HAS_SHACAL2_SIMD) if(CPUID::has_simd_32()) { diff --git a/src/lib/block/shacal2/shacal2.h b/src/lib/block/shacal2/shacal2.h index 7e1fa4ac0..d63c1453d 100644 --- a/src/lib/block/shacal2/shacal2.h +++ b/src/lib/block/shacal2/shacal2.h @@ -35,6 +35,10 @@ class BOTAN_DLL SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4> void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const; #endif +#if defined(BOTAN_HAS_SHACAL2_X86) + void x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + secure_vector<uint32_t> m_RK; }; diff --git a/src/lib/block/shacal2/shacal2_x86/info.txt b/src/lib/block/shacal2/shacal2_x86/info.txt new file mode 100644 index 000000000..b8d6a50b7 --- /dev/null +++ b/src/lib/block/shacal2/shacal2_x86/info.txt @@ -0,0 +1,16 @@ +<defines> +SHACAL2_X86 -> 20170814 +</defines> + +<requires> +shacal2 +</requires> + +need_isa sha,sse4.1 + +<cc> +gcc:5.0 +clang:3.9 +msvc:19.0 # MSVS 2015 +</cc> + diff --git a/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp new file mode 100644 index 000000000..5a346698a --- /dev/null +++ b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp @@ -0,0 +1,68 @@ +/* +* SHACAL-2 using x86 SHA extensions +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <immintrin.h> + +namespace Botan { + +/* +Only encryption is supported since the inverse round function would +require a different instruction +*/ + +void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const + { + // TODO x4 unrolling + + const __m128i BSWAP_MASK = _mm_set_epi64x(0x0C0D0E0F08090A0B, 0x0405060700010203); + + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + while(blocks) + { + __m128i B0 = _mm_loadu_si128(in_mm); + __m128i B1 = _mm_loadu_si128(in_mm+1); + + B0 = _mm_shuffle_epi8(B0, BSWAP_MASK); + B1 = _mm_shuffle_epi8(B1, BSWAP_MASK); + + B0 = _mm_shuffle_epi32(B0, 0xB1); // CDAB + B1 = _mm_shuffle_epi32(B1, 0x1B); // EFGH + + __m128i TMP = _mm_alignr_epi8(B0, B1, 8); // ABEF + B1 = _mm_blend_epi16(B1, B0, 0xF0); // CDGH + B0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+1],m_RK[8*i+0])); + B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+3],m_RK[8*i+2])); + B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+5],m_RK[8*i+4])); + B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+7],m_RK[8*i+6])); + } + + TMP = _mm_shuffle_epi32(B0, 0x1B); // FEBA + B1 = _mm_shuffle_epi32(B1, 0xB1); // DCHG + B0 = _mm_blend_epi16(TMP, B1, 0xF0); // DCBA + B1 = _mm_alignr_epi8(B1, TMP, 8); // ABEF + + B0 = _mm_shuffle_epi8(B0, BSWAP_MASK); + B1 = _mm_shuffle_epi8(B1, BSWAP_MASK); + + // Save state + _mm_storeu_si128(out_mm, B0); + _mm_storeu_si128(out_mm + 1, B1); + + blocks--; + in_mm += 2; + out_mm += 2; + } + } + +} |