From adad731c0e01c779002551adfb7d74c9da44f6ef Mon Sep 17 00:00:00 2001 From: lloyd Date: Fri, 27 Dec 2013 23:37:14 +0000 Subject: Move clmul to its own file (for abi flags) --- src/modes/aead/gcm/clmul/clmul.cpp | 76 ++++++++++++++++++++++++++++++++++++++ src/modes/aead/gcm/clmul/clmul.h | 14 +++++++ src/modes/aead/gcm/clmul/info.txt | 8 ++++ src/modes/aead/gcm/gcm.cpp | 75 ++----------------------------------- src/modes/aead/gcm/info.txt | 2 - 5 files changed, 101 insertions(+), 74 deletions(-) create mode 100644 src/modes/aead/gcm/clmul/clmul.cpp create mode 100644 src/modes/aead/gcm/clmul/clmul.h create mode 100644 src/modes/aead/gcm/clmul/info.txt (limited to 'src/modes') diff --git a/src/modes/aead/gcm/clmul/clmul.cpp b/src/modes/aead/gcm/clmul/clmul.cpp new file mode 100644 index 000000000..cc6d581e3 --- /dev/null +++ b/src/modes/aead/gcm/clmul/clmul.cpp @@ -0,0 +1,76 @@ +/* +* CLMUL hook +* (C) 2013 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include + +namespace Botan { + +void gcm_multiply_clmul(byte x[16], const byte H[16]) + { + /* + * Algorithms 1 and 5 from Intel's CLMUL guide + */ + const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + __m128i a = _mm_loadu_si128(reinterpret_cast(&x[0])); + __m128i b = _mm_loadu_si128(reinterpret_cast(&H[0])); + + a = _mm_shuffle_epi8(a, BSWAP_MASK); + b = _mm_shuffle_epi8(b, BSWAP_MASK); + + __m128i T0, T1, T2, T3, T4, T5; + + T0 = _mm_clmulepi64_si128(a, b, 0x00); + T1 = _mm_clmulepi64_si128(a, b, 0x01); + T2 = _mm_clmulepi64_si128(a, b, 0x10); + T3 = _mm_clmulepi64_si128(a, b, 0x11); + + T1 = _mm_xor_si128(T1, T2); + T2 = _mm_slli_si128(T1, 8); + T1 = _mm_srli_si128(T1, 8); + T0 = _mm_xor_si128(T0, T2); + T3 = _mm_xor_si128(T3, T1); + + T4 = _mm_srli_epi32(T0, 31); + T0 = _mm_slli_epi32(T0, 1); + + T5 = _mm_srli_epi32(T3, 31); + T3 = _mm_slli_epi32(T3, 1); + + T2 = _mm_srli_si128(T4, 12); + T5 = _mm_slli_si128(T5, 4); + T4 = _mm_slli_si128(T4, 4); + T0 = _mm_or_si128(T0, T4); + T3 = _mm_or_si128(T3, T5); + T3 = _mm_or_si128(T3, T2); + + T4 = _mm_slli_epi32(T0, 31); + T5 = _mm_slli_epi32(T0, 30); + T2 = _mm_slli_epi32(T0, 25); + + T4 = _mm_xor_si128(T4, T5); + T4 = _mm_xor_si128(T4, T2); + T5 = _mm_srli_si128(T4, 4); + T3 = _mm_xor_si128(T3, T5); + T4 = _mm_slli_si128(T4, 12); + T0 = _mm_xor_si128(T0, T4); + T3 = _mm_xor_si128(T3, T0); + + T4 = _mm_srli_epi32(T0, 1); + T1 = _mm_srli_epi32(T0, 2); + T2 = _mm_srli_epi32(T0, 7); + T3 = _mm_xor_si128(T3, T1); + T3 = _mm_xor_si128(T3, T2); + T3 = _mm_xor_si128(T3, T4); + + T3 = _mm_shuffle_epi8(T3, BSWAP_MASK); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&x[0]), T3); + } + +} diff --git a/src/modes/aead/gcm/clmul/clmul.h b/src/modes/aead/gcm/clmul/clmul.h new file mode 100644 index 000000000..ba197f2f7 --- /dev/null +++ b/src/modes/aead/gcm/clmul/clmul.h @@ -0,0 +1,14 @@ +/* +* CLMUL hook +* (C) 2013 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include + +namespace Botan { + +void gcm_multiply_clmul(byte x[16], const byte H[16]); + +} diff --git a/src/modes/aead/gcm/clmul/info.txt b/src/modes/aead/gcm/clmul/info.txt new file mode 100644 index 000000000..8a21b6ac2 --- /dev/null +++ b/src/modes/aead/gcm/clmul/info.txt @@ -0,0 +1,8 @@ + +define GCM_CLMUL 20131227 + +need_isa clmul,ssse3 + + +clmul.h + diff --git a/src/modes/aead/gcm/gcm.cpp b/src/modes/aead/gcm/gcm.cpp index 004b25d80..b39e6ac92 100644 --- a/src/modes/aead/gcm/gcm.cpp +++ b/src/modes/aead/gcm/gcm.cpp @@ -10,85 +10,16 @@ #include #include -#if defined(BOTAN_TARGET_SUPPORTS_CLMUL) - #include +#if defined(BOTAN_HAS_GCM_CLMUL) + #include #include #endif namespace Botan { -namespace { - -#if defined(BOTAN_TARGET_SUPPORTS_CLMUL) -void gcm_multiply_clmul(byte x[16], const byte H[16]) - { - /* - * Algorithms 1 and 5 from Intel's CLMUL guide - */ - const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - __m128i a = _mm_loadu_si128(reinterpret_cast(&x[0])); - __m128i b = _mm_loadu_si128(reinterpret_cast(&H[0])); - - a = _mm_shuffle_epi8(a, BSWAP_MASK); - b = _mm_shuffle_epi8(b, BSWAP_MASK); - - __m128i T0, T1, T2, T3, T4, T5; - - T0 = _mm_clmulepi64_si128(a, b, 0x00); - T1 = _mm_clmulepi64_si128(a, b, 0x01); - T2 = _mm_clmulepi64_si128(a, b, 0x10); - T3 = _mm_clmulepi64_si128(a, b, 0x11); - - T1 = _mm_xor_si128(T1, T2); - T2 = _mm_slli_si128(T1, 8); - T1 = _mm_srli_si128(T1, 8); - T0 = _mm_xor_si128(T0, T2); - T3 = _mm_xor_si128(T3, T1); - - T4 = _mm_srli_epi32(T0, 31); - T0 = _mm_slli_epi32(T0, 1); - - T5 = _mm_srli_epi32(T3, 31); - T3 = _mm_slli_epi32(T3, 1); - - T2 = _mm_srli_si128(T4, 12); - T5 = _mm_slli_si128(T5, 4); - T4 = _mm_slli_si128(T4, 4); - T0 = _mm_or_si128(T0, T4); - T3 = _mm_or_si128(T3, T5); - T3 = _mm_or_si128(T3, T2); - - T4 = _mm_slli_epi32(T0, 31); - T5 = _mm_slli_epi32(T0, 30); - T2 = _mm_slli_epi32(T0, 25); - - T4 = _mm_xor_si128(T4, T5); - T4 = _mm_xor_si128(T4, T2); - T5 = _mm_srli_si128(T4, 4); - T3 = _mm_xor_si128(T3, T5); - T4 = _mm_slli_si128(T4, 12); - T0 = _mm_xor_si128(T0, T4); - T3 = _mm_xor_si128(T3, T0); - - T4 = _mm_srli_epi32(T0, 1); - T1 = _mm_srli_epi32(T0, 2); - T2 = _mm_srli_epi32(T0, 7); - T3 = _mm_xor_si128(T3, T1); - T3 = _mm_xor_si128(T3, T2); - T3 = _mm_xor_si128(T3, T4); - - T3 = _mm_shuffle_epi8(T3, BSWAP_MASK); - - _mm_storeu_si128(reinterpret_cast<__m128i*>(&x[0]), T3); - } -#endif - -} - void GHASH::gcm_multiply(secure_vector& x) const { -#if defined(BOTAN_TARGET_SUPPORTS_CLMUL) +#if defined(BOTAN_HAS_GCM_CLMUL) if(CPUID::has_clmul()) return gcm_multiply_clmul(&x[0], &m_H[0]); #endif diff --git a/src/modes/aead/gcm/info.txt b/src/modes/aead/gcm/info.txt index be3ad285b..cf5f68bb5 100644 --- a/src/modes/aead/gcm/info.txt +++ b/src/modes/aead/gcm/info.txt @@ -1,7 +1,5 @@ define AEAD_GCM 20131128 -need_isa clmul,ssse3 - block ctr -- cgit v1.2.3