diff options
author | Jack Lloyd <[email protected]> | 2017-09-16 14:45:17 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-09-16 14:45:17 -0400 |
commit | f39aa7bb4e43e27b10d3f890da7ba1acba9f14ca (patch) | |
tree | 44eb974c45b4c644c2932502962a6c403b00f8b4 | |
parent | 4c8b0316defbe9e6bfb3b3ce68c506b437760af0 (diff) |
De-inline xor_buf, add SIMD and unrolling
Improves CBC and OCB performance with AES-NI quite noticably
-rw-r--r-- | src/lib/hash/streebog/streebog.cpp | 35 | ||||
-rw-r--r-- | src/lib/utils/mem_ops.cpp | 86 | ||||
-rw-r--r-- | src/lib/utils/mem_ops.h | 46 |
3 files changed, 133 insertions, 34 deletions
diff --git a/src/lib/hash/streebog/streebog.cpp b/src/lib/hash/streebog/streebog.cpp index ae2fe1fef..c0e60f10a 100644 --- a/src/lib/hash/streebog/streebog.cpp +++ b/src/lib/hash/streebog/streebog.cpp @@ -53,20 +53,28 @@ inline void lps(uint64_t* block) } } -inline void e(uint64_t* K, const uint8_t* m) +inline void e(uint64_t* K, const uint64_t* m) { - uint64_t tmp[8]; + uint64_t A[8]; uint64_t C[8]; - std::memcpy(tmp, K, 64); - xor_buf(K, reinterpret_cast<const uint64_t*>(m), 8); + copy_mem(A, K, 8); + + for(size_t i = 0; i != 8; ++i) + { + K[i] ^= m[i]; + } + for(int i = 0; i < 12; ++i) { lps(K); load_le(C, reinterpret_cast<const uint8_t*>(&STREEBOG_C[i][0]), 8); - xor_buf(tmp, C, 8); - lps(tmp); - xor_buf(K, tmp, 8); + + for(size_t i = 0; i != 8; ++i) + A[i] ^= C[i]; + lps(A); + for(size_t i = 0; i != 8; ++i) + K[i] ^= A[i]; } } @@ -77,12 +85,17 @@ inline void g(uint64_t* h, const uint8_t* m, uint64_t N) // force N to little-endian store_le(N, reinterpret_cast<uint8_t*>(&N)); - std::memcpy(hN, h, 64); + copy_mem(hN, h, 8); hN[0] ^= N; lps(hN); - e(hN, m); - xor_buf(h, hN, 8); - xor_buf(h, reinterpret_cast<const uint64_t*>(m), 8); + const uint64_t* m64 = reinterpret_cast<const uint64_t*>(m); + + e(hN, m64); + + for(size_t i = 0; i != 8; ++i) + { + h[i] ^= hN[i] ^ m64[i]; + } } } //namespace diff --git a/src/lib/utils/mem_ops.cpp b/src/lib/utils/mem_ops.cpp new file mode 100644 index 000000000..41a1bc547 --- /dev/null +++ b/src/lib/utils/mem_ops.cpp @@ -0,0 +1,86 @@ +/* +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/mem_ops.h> + +#if defined(BOTAN_HAS_SIMD_32) + #include <botan/internal/simd_32.h> + #include <botan/cpuid.h> +#endif + +namespace Botan { + +bool constant_time_compare(const uint8_t x[], + const uint8_t y[], + size_t len) + { + volatile uint8_t difference = 0; + + for(size_t i = 0; i != len; ++i) + difference |= (x[i] ^ y[i]); + + return difference == 0; + } + +void xor_buf(uint8_t x[], + const uint8_t y[], + size_t len) + { +#if defined(BOTAN_HAS_SIMD_32) + if(CPUID::has_simd_32()) + { + while(len >= 16) + { + SIMD_32 x16 = SIMD_32::load_le(x); + SIMD_32 y16 = SIMD_32::load_le(y); + x16 ^= y16; + x16.store_le(x); + + len -= 16; + x += 16; + y += 16; + } + } +#endif + + while(len >= 8) + { + x[0] ^= y[0]; x[1] ^= y[1]; + x[2] ^= y[2]; x[3] ^= y[3]; + x[4] ^= y[4]; x[5] ^= y[5]; + x[6] ^= y[6]; x[7] ^= y[7]; + x += 8; y += 8; len -= 8; + } + + for(size_t i = 0; i != len; ++i) + { + x[i] ^= y[i]; + } + } + +void xor_buf(uint8_t out[], + const uint8_t in[], + const uint8_t in2[], + size_t length) + { + while(length >= 8) + { + out[0] = in[0] ^ in2[0]; + out[1] = in[1] ^ in2[1]; + out[2] = in[2] ^ in2[2]; + out[3] = in[3] ^ in2[3]; + out[4] = in[4] ^ in2[4]; + out[5] = in[5] ^ in2[5]; + out[6] = in[6] ^ in2[6]; + out[7] = in[7] ^ in2[7]; + in += 8; in2 += 8; out += 8; length -= 8; + } + + for(size_t i = 0; i != length; ++i) + out[i] = in[i] ^ in2[i]; + } + +} diff --git a/src/lib/utils/mem_ops.h b/src/lib/utils/mem_ops.h index 13c987526..8f6aff12e 100644 --- a/src/lib/utils/mem_ops.h +++ b/src/lib/utils/mem_ops.h @@ -32,6 +32,17 @@ namespace Botan { BOTAN_DLL void secure_scrub_memory(void* ptr, size_t n); /** +* Memory comparison, input insensitive +* @param x a pointer to an array +* @param y a pointer to another array +* @param n the number of Ts in x and y +* @return true iff x[i] == y[i] forall i in [0...n) +*/ +BOTAN_DLL bool constant_time_compare(const uint8_t x[], + const uint8_t y[], + size_t len); + +/** * Zero out some bytes * @param ptr a pointer to memory to zero * @param bytes the number of bytes to zero in ptr @@ -106,19 +117,14 @@ template<typename T> inline bool same_mem(const T* p1, const T* p2, size_t n) } /** -* XOR_ arrays. Postcondition out[i] = in[i] ^ out[i] forall i = 0...length +* XOR arrays. Postcondition out[i] = in[i] ^ out[i] forall i = 0...length * @param out the input/output buffer * @param in the read-only input buffer * @param length the length of the buffers */ -template<typename T> -void xor_buf(T out[], const T in[], size_t length) - { - for(size_t i = 0; i != length; ++i) - { - out[i] ^= in[i]; - } - } +BOTAN_DLL void xor_buf(uint8_t x[], + const uint8_t y[], + size_t len); /** * XOR arrays. Postcondition out[i] = in[i] ^ in2[i] forall i = 0...length @@ -127,16 +133,10 @@ void xor_buf(T out[], const T in[], size_t length) * @param in2 the second output buffer * @param length the length of the three buffers */ -template<typename T> void xor_buf(T out[], - const T in[], - const T in2[], - size_t length) - { - for(size_t i = 0; i != length; ++i) - { - out[i] = in[i] ^ in2[i]; - } - } +BOTAN_DLL void xor_buf(uint8_t out[], + const uint8_t in[], + const uint8_t in2[], + size_t length); template<typename Alloc, typename Alloc2> void xor_buf(std::vector<uint8_t, Alloc>& out, @@ -163,10 +163,10 @@ void xor_buf(std::vector<uint8_t, Alloc>& out, xor_buf(out.data(), in, in2.data(), n); } -template<typename T, typename Alloc, typename Alloc2> -std::vector<T, Alloc>& -operator^=(std::vector<T, Alloc>& out, - const std::vector<T, Alloc2>& in) +template<typename Alloc, typename Alloc2> +std::vector<uint8_t, Alloc>& +operator^=(std::vector<uint8_t, Alloc>& out, + const std::vector<uint8_t, Alloc2>& in) { if(out.size() < in.size()) out.resize(in.size()); |