From 238eec08e89f2afed12c953d717b7e9e8c61b976 Mon Sep 17 00:00:00 2001 From: lloyd Date: Fri, 27 Dec 2013 22:46:51 +0000 Subject: Clean up GCM. Add CLMUL support. --- src/modes/aead/gcm/gcm.cpp | 216 ++++++++++++++++++++++++++++++++++---------- src/modes/aead/gcm/gcm.h | 46 ++++++++++ src/modes/aead/gcm/info.txt | 2 + src/utils/cpuid.h | 8 +- 4 files changed, 220 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/modes/aead/gcm/gcm.cpp b/src/modes/aead/gcm/gcm.cpp index 7b8e0aa36..ab931478a 100644 --- a/src/modes/aead/gcm/gcm.cpp +++ b/src/modes/aead/gcm/gcm.cpp @@ -10,13 +10,91 @@ #include #include +#if defined(BOTAN_TARGET_SUPPORTS_CLMUL) + #include + #include +#endif + namespace Botan { namespace { +#if defined(BOTAN_TARGET_SUPPORTS_CLMUL) +__m128i gcm_multiply_clmul(__m128i a, __m128i b) + { + /* + * Algorithms 1 and 5 from Intel's CLMUL guide + */ + __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + a = _mm_shuffle_epi8(a, BSWAP_MASK); + b = _mm_shuffle_epi8(b, BSWAP_MASK); + + __m128i T0, T1, T2, T3, T4, T5; + + T0 = _mm_clmulepi64_si128(a, b, 0x00); + T1 = _mm_clmulepi64_si128(a, b, 0x01); + T2 = _mm_clmulepi64_si128(a, b, 0x10); + T3 = _mm_clmulepi64_si128(a, b, 0x11); + + T1 = _mm_xor_si128(T1, T2); + T2 = _mm_slli_si128(T1, 8); + T1 = _mm_srli_si128(T1, 8); + T0 = _mm_xor_si128(T0, T2); + T3 = _mm_xor_si128(T3, T1); + + T4 = _mm_srli_epi32(T0, 31); + T0 = _mm_slli_epi32(T0, 1); + + T5 = _mm_srli_epi32(T3, 31); + T3 = _mm_slli_epi32(T3, 1); + + T2 = _mm_srli_si128(T4, 12); + T5 = _mm_slli_si128(T5, 4); + T4 = _mm_slli_si128(T4, 4); + T0 = _mm_or_si128(T0, T4); + T3 = _mm_or_si128(T3, T5); + T3 = _mm_or_si128(T3, T2); + + T4 = _mm_slli_epi32(T0, 31); + T5 = _mm_slli_epi32(T0, 30); + T2 = _mm_slli_epi32(T0, 25); + + T4 = _mm_xor_si128(T4, T5); + T4 = _mm_xor_si128(T4, T2); + T5 = _mm_srli_si128(T4, 4); + T3 = _mm_xor_si128(T3, T5); + T4 = _mm_slli_si128(T4, 12); + T0 = _mm_xor_si128(T0, T4); + T3 = _mm_xor_si128(T3, T0); + + T4 = _mm_srli_epi32(T0, 1); + T1 = _mm_srli_epi32(T0, 2); + T2 = _mm_srli_epi32(T0, 7); + T3 = _mm_xor_si128(T3, T1); + T3 = _mm_xor_si128(T3, T2); + T3 = _mm_xor_si128(T3, T4); + + return _mm_shuffle_epi8(T3, BSWAP_MASK); + } +#endif + void gcm_multiply(secure_vector& x, const secure_vector& h) { +#if defined(BOTAN_TARGET_SUPPORTS_CLMUL) + if(CPUID::has_clmul()) + { + __m128i xmm = _mm_loadu_si128(reinterpret_cast(&x[0])); + __m128i hmm = _mm_loadu_si128(reinterpret_cast(&h[0])); + + xmm = gcm_multiply_clmul(xmm, hmm); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&x[0]), xmm); + return; + } +#endif + static const u64bit R = 0xE100000000000000; u64bit H[2] = { @@ -26,7 +104,7 @@ void gcm_multiply(secure_vector& x, u64bit Z[2] = { 0, 0 }; - // Both CLMUL and SSE2 versions would be useful + // SSE2 might be useful here for(size_t i = 0; i != 2; ++i) { @@ -50,9 +128,10 @@ void gcm_multiply(secure_vector& x, store_be(&x[0], Z[0], Z[1]); } -void ghash_update(const secure_vector& H, - secure_vector& ghash, - const byte input[], size_t length) +} + +void GHASH::ghash_update(secure_vector& ghash, + const byte input[], size_t length) { const size_t BS = 16; @@ -66,52 +145,106 @@ void ghash_update(const secure_vector& H, xor_buf(&ghash[0], &input[0], to_proc); - gcm_multiply(ghash, H); + gcm_multiply(ghash, m_H); input += to_proc; length -= to_proc; } } -void ghash_finalize(const secure_vector& H, - secure_vector& ghash, - size_t ad_len, size_t text_len) +void GHASH::key_schedule(const byte key[], size_t length) + { + m_H.assign(key, key+length); + m_H_ad.resize(16); + m_ad_len = 0; + m_text_len = 0; + } + +void GHASH::start(const byte nonce[], size_t len) + { + m_nonce.assign(nonce, nonce + len); + m_ghash = m_H_ad; + } + +void GHASH::set_associated_data(const byte input[], size_t length) + { + zeroise(m_H_ad); + + ghash_update(m_H_ad, input, length); + m_ad_len = length; + } + +void GHASH::update(const byte input[], size_t length) + { + BOTAN_ASSERT(m_ghash.size() == 16, "Key was set"); + + m_text_len += length; + + ghash_update(m_ghash, input, length); + } + +void GHASH::add_final_block(secure_vector& hash, + size_t ad_len, size_t text_len) { secure_vector final_block(16); store_be(&final_block[0], 8*ad_len, 8*text_len); - ghash_update(H, ghash, &final_block[0], final_block.size()); + ghash_update(hash, &final_block[0], final_block.size()); } -} +secure_vector GHASH::final() + { + add_final_block(m_ghash, m_ad_len, m_text_len); + + secure_vector mac; + mac.swap(m_ghash); + + mac ^= m_nonce; + m_text_len = 0; + return mac; + } + +secure_vector GHASH::nonce_hash(const byte nonce[], size_t nonce_len) + { + BOTAN_ASSERT(m_ghash.size() == 0, "nonce_hash called during wrong time"); + secure_vector y0(16); + + ghash_update(y0, nonce, nonce_len); + add_final_block(y0, 0, nonce_len); + + return y0; + } + +void GHASH::clear() + { + zeroise(m_H); + zeroise(m_H_ad); + m_ghash.clear(); + m_text_len = m_ad_len = 0; + } /* * GCM_Mode Constructor */ GCM_Mode::GCM_Mode(BlockCipher* cipher, size_t tag_size) : m_tag_size(tag_size), - m_cipher_name(cipher->name()), - m_H(BS), m_H_ad(BS), m_mac(BS), m_enc_y0(BS), - m_ad_len(0), m_text_len(0) + m_cipher_name(cipher->name()) { if(cipher->block_size() != BS) throw std::invalid_argument("GCM requires a 128 bit cipher so cannot be used with " + cipher->name()); + m_ghash.reset(new GHASH); + m_ctr.reset(new CTR_BE(cipher)); // CTR_BE takes ownership of cipher - if(m_tag_size < 8 || m_tag_size > 16) + if(m_tag_size != 8 && m_tag_size != 16) throw Invalid_Argument(name() + ": Bad tag size " + std::to_string(m_tag_size)); } void GCM_Mode::clear() { - zeroise(m_H); - zeroise(m_H_ad); - zeroise(m_mac); - zeroise(m_enc_y0); - m_ad_len = 0; - m_text_len = 0; - m_ctr.reset(); + m_ctr->clear(); + m_ghash->clear(); } std::string GCM_Mode::name() const @@ -136,16 +269,14 @@ void GCM_Mode::key_schedule(const byte key[], size_t keylen) const std::vector zeros(BS); m_ctr->set_iv(&zeros[0], zeros.size()); - zeroise(m_H); - m_ctr->cipher(&m_H[0], &m_H[0], m_H.size()); + secure_vector H(BS); + m_ctr->encipher(H); + m_ghash->set_key(H); } void GCM_Mode::set_associated_data(const byte ad[], size_t ad_len) { - zeroise(m_H_ad); - - ghash_update(m_H, m_H_ad, ad, ad_len); - m_ad_len = ad_len; + m_ghash->set_associated_data(ad, ad_len); } secure_vector GCM_Mode::start(const byte nonce[], size_t nonce_len) @@ -162,17 +293,15 @@ secure_vector GCM_Mode::start(const byte nonce[], size_t nonce_len) } else { - ghash_update(m_H, y0, nonce, nonce_len); - ghash_finalize(m_H, y0, 0, nonce_len); + y0 = m_ghash->nonce_hash(nonce, nonce_len); } m_ctr->set_iv(&y0[0], y0.size()); - zeroise(m_enc_y0); + secure_vector m_enc_y0(BS); m_ctr->encipher(m_enc_y0); - m_text_len = 0; - m_mac = m_H_ad; + m_ghash->start(&m_enc_y0[0], m_enc_y0.size()); return secure_vector(); } @@ -184,19 +313,14 @@ void GCM_Encryption::update(secure_vector& buffer, size_t offset) byte* buf = &buffer[offset]; m_ctr->cipher(buf, buf, sz); - ghash_update(m_H, m_mac, buf, sz); - m_text_len += sz; + m_ghash->update(buf, sz); } void GCM_Encryption::finish(secure_vector& buffer, size_t offset) { update(buffer, offset); - - ghash_finalize(m_H, m_mac, m_ad_len, m_text_len); - - m_mac ^= m_enc_y0; - - buffer += std::make_pair(&m_mac[0], tag_size()); + auto mac = m_ghash->final(); + buffer += std::make_pair(&mac[0], tag_size()); } void GCM_Decryption::update(secure_vector& buffer, size_t offset) @@ -205,9 +329,8 @@ void GCM_Decryption::update(secure_vector& buffer, size_t offset) const size_t sz = buffer.size() - offset; byte* buf = &buffer[offset]; - ghash_update(m_H, m_mac, buf, sz); + m_ghash->update(buf, sz); m_ctr->cipher(buf, buf, sz); - m_text_len += sz; } void GCM_Decryption::finish(secure_vector& buffer, size_t offset) @@ -223,18 +346,15 @@ void GCM_Decryption::finish(secure_vector& buffer, size_t offset) // handle any final input before the tag if(remaining) { - ghash_update(m_H, m_mac, buf, remaining); + m_ghash->update(buf, remaining); m_ctr->cipher(buf, buf, remaining); - m_text_len += remaining; } - ghash_finalize(m_H, m_mac, m_ad_len, m_text_len); - - m_mac ^= m_enc_y0; + auto mac = m_ghash->final(); const byte* included_tag = &buffer[remaining]; - if(!same_mem(&m_mac[0], included_tag, tag_size())) + if(!same_mem(&mac[0], included_tag, tag_size())) throw Integrity_Failure("GCM tag check failed"); buffer.resize(offset + remaining); diff --git a/src/modes/aead/gcm/gcm.h b/src/modes/aead/gcm/gcm.h index f828a3ff2..e6e974a60 100644 --- a/src/modes/aead/gcm/gcm.h +++ b/src/modes/aead/gcm/gcm.h @@ -15,6 +15,8 @@ namespace Botan { +class GHASH; + /** * GCM Mode */ @@ -48,11 +50,14 @@ class BOTAN_DLL GCM_Mode : public AEAD_Mode const std::string m_cipher_name; std::unique_ptr m_ctr; + std::unique_ptr m_ghash; +#if 0 secure_vector m_H; secure_vector m_H_ad; secure_vector m_mac; secure_vector m_enc_y0; size_t m_ad_len, m_text_len; +#endif }; /** @@ -104,6 +109,47 @@ class BOTAN_DLL GCM_Decryption : public GCM_Mode void finish(secure_vector& final_block, size_t offset) override; }; +/** +* GCM's GHASH +* Maybe a Transform? +*/ +class BOTAN_DLL GHASH : public SymmetricAlgorithm + { + public: + void set_associated_data(const byte ad[], size_t ad_len); + + secure_vector nonce_hash(const byte nonce[], size_t len); + + void start(const byte nonce[], size_t len); + + /* + * Assumes input len is multiple of 16 + */ + void update(const byte in[], size_t len); + + secure_vector final(); + + Key_Length_Specification key_spec() const { return Key_Length_Specification(16); } + + void clear(); + + std::string name() const { return "GHASH"; } + private: + void key_schedule(const byte key[], size_t key_len) override; + + void ghash_update(secure_vector& x, + const byte input[], size_t input_len); + + void add_final_block(secure_vector& x, + size_t ad_len, size_t pt_len); + + secure_vector m_H; + secure_vector m_H_ad; + secure_vector m_nonce; + secure_vector m_ghash; + size_t m_ad_len = 0, m_text_len = 0; + }; + } #endif diff --git a/src/modes/aead/gcm/info.txt b/src/modes/aead/gcm/info.txt index cf5f68bb5..be3ad285b 100644 --- a/src/modes/aead/gcm/info.txt +++ b/src/modes/aead/gcm/info.txt @@ -1,5 +1,7 @@ define AEAD_GCM 20131128 +need_isa clmul,ssse3 + block ctr diff --git a/src/utils/cpuid.h b/src/utils/cpuid.h index 008605e0c..67657d2ee 100644 --- a/src/utils/cpuid.h +++ b/src/utils/cpuid.h @@ -83,10 +83,10 @@ class BOTAN_DLL CPUID { return x86_processor_flags_has(CPUID_AESNI_BIT); } /** - * Check if the processor supports PCMULUDQ + * Check if the processor supports CLMUL */ - static bool has_pcmuludq() - { return x86_processor_flags_has(CPUID_PCMUL_BIT); } + static bool has_clmul() + { return x86_processor_flags_has(CPUID_CLMUL_BIT); } /** * Check if the processor supports Intel SHA extension @@ -120,7 +120,7 @@ class BOTAN_DLL CPUID enum CPUID_bits { CPUID_RDTSC_BIT = 4, CPUID_SSE2_BIT = 26, - CPUID_PCMUL_BIT = 33, + CPUID_CLMUL_BIT = 33, CPUID_SSSE3_BIT = 41, CPUID_SSE41_BIT = 51, CPUID_SSE42_BIT = 52, -- cgit v1.2.3