aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-09-16 14:45:17 -0400
committerJack Lloyd <[email protected]>2017-09-16 14:45:17 -0400
commitf39aa7bb4e43e27b10d3f890da7ba1acba9f14ca (patch)
tree44eb974c45b4c644c2932502962a6c403b00f8b4
parent4c8b0316defbe9e6bfb3b3ce68c506b437760af0 (diff)
De-inline xor_buf, add SIMD and unrolling
Improves CBC and OCB performance with AES-NI quite noticably
-rw-r--r--src/lib/hash/streebog/streebog.cpp35
-rw-r--r--src/lib/utils/mem_ops.cpp86
-rw-r--r--src/lib/utils/mem_ops.h46
3 files changed, 133 insertions, 34 deletions
diff --git a/src/lib/hash/streebog/streebog.cpp b/src/lib/hash/streebog/streebog.cpp
index ae2fe1fef..c0e60f10a 100644
--- a/src/lib/hash/streebog/streebog.cpp
+++ b/src/lib/hash/streebog/streebog.cpp
@@ -53,20 +53,28 @@ inline void lps(uint64_t* block)
}
}
-inline void e(uint64_t* K, const uint8_t* m)
+inline void e(uint64_t* K, const uint64_t* m)
{
- uint64_t tmp[8];
+ uint64_t A[8];
uint64_t C[8];
- std::memcpy(tmp, K, 64);
- xor_buf(K, reinterpret_cast<const uint64_t*>(m), 8);
+ copy_mem(A, K, 8);
+
+ for(size_t i = 0; i != 8; ++i)
+ {
+ K[i] ^= m[i];
+ }
+
for(int i = 0; i < 12; ++i)
{
lps(K);
load_le(C, reinterpret_cast<const uint8_t*>(&STREEBOG_C[i][0]), 8);
- xor_buf(tmp, C, 8);
- lps(tmp);
- xor_buf(K, tmp, 8);
+
+ for(size_t i = 0; i != 8; ++i)
+ A[i] ^= C[i];
+ lps(A);
+ for(size_t i = 0; i != 8; ++i)
+ K[i] ^= A[i];
}
}
@@ -77,12 +85,17 @@ inline void g(uint64_t* h, const uint8_t* m, uint64_t N)
// force N to little-endian
store_le(N, reinterpret_cast<uint8_t*>(&N));
- std::memcpy(hN, h, 64);
+ copy_mem(hN, h, 8);
hN[0] ^= N;
lps(hN);
- e(hN, m);
- xor_buf(h, hN, 8);
- xor_buf(h, reinterpret_cast<const uint64_t*>(m), 8);
+ const uint64_t* m64 = reinterpret_cast<const uint64_t*>(m);
+
+ e(hN, m64);
+
+ for(size_t i = 0; i != 8; ++i)
+ {
+ h[i] ^= hN[i] ^ m64[i];
+ }
}
} //namespace
diff --git a/src/lib/utils/mem_ops.cpp b/src/lib/utils/mem_ops.cpp
new file mode 100644
index 000000000..41a1bc547
--- /dev/null
+++ b/src/lib/utils/mem_ops.cpp
@@ -0,0 +1,86 @@
+/*
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/mem_ops.h>
+
+#if defined(BOTAN_HAS_SIMD_32)
+ #include <botan/internal/simd_32.h>
+ #include <botan/cpuid.h>
+#endif
+
+namespace Botan {
+
+bool constant_time_compare(const uint8_t x[],
+ const uint8_t y[],
+ size_t len)
+ {
+ volatile uint8_t difference = 0;
+
+ for(size_t i = 0; i != len; ++i)
+ difference |= (x[i] ^ y[i]);
+
+ return difference == 0;
+ }
+
+void xor_buf(uint8_t x[],
+ const uint8_t y[],
+ size_t len)
+ {
+#if defined(BOTAN_HAS_SIMD_32)
+ if(CPUID::has_simd_32())
+ {
+ while(len >= 16)
+ {
+ SIMD_32 x16 = SIMD_32::load_le(x);
+ SIMD_32 y16 = SIMD_32::load_le(y);
+ x16 ^= y16;
+ x16.store_le(x);
+
+ len -= 16;
+ x += 16;
+ y += 16;
+ }
+ }
+#endif
+
+ while(len >= 8)
+ {
+ x[0] ^= y[0]; x[1] ^= y[1];
+ x[2] ^= y[2]; x[3] ^= y[3];
+ x[4] ^= y[4]; x[5] ^= y[5];
+ x[6] ^= y[6]; x[7] ^= y[7];
+ x += 8; y += 8; len -= 8;
+ }
+
+ for(size_t i = 0; i != len; ++i)
+ {
+ x[i] ^= y[i];
+ }
+ }
+
+void xor_buf(uint8_t out[],
+ const uint8_t in[],
+ const uint8_t in2[],
+ size_t length)
+ {
+ while(length >= 8)
+ {
+ out[0] = in[0] ^ in2[0];
+ out[1] = in[1] ^ in2[1];
+ out[2] = in[2] ^ in2[2];
+ out[3] = in[3] ^ in2[3];
+ out[4] = in[4] ^ in2[4];
+ out[5] = in[5] ^ in2[5];
+ out[6] = in[6] ^ in2[6];
+ out[7] = in[7] ^ in2[7];
+ in += 8; in2 += 8; out += 8; length -= 8;
+ }
+
+ for(size_t i = 0; i != length; ++i)
+ out[i] = in[i] ^ in2[i];
+ }
+
+}
diff --git a/src/lib/utils/mem_ops.h b/src/lib/utils/mem_ops.h
index 13c987526..8f6aff12e 100644
--- a/src/lib/utils/mem_ops.h
+++ b/src/lib/utils/mem_ops.h
@@ -32,6 +32,17 @@ namespace Botan {
BOTAN_DLL void secure_scrub_memory(void* ptr, size_t n);
/**
+* Memory comparison, input insensitive
+* @param x a pointer to an array
+* @param y a pointer to another array
+* @param n the number of Ts in x and y
+* @return true iff x[i] == y[i] forall i in [0...n)
+*/
+BOTAN_DLL bool constant_time_compare(const uint8_t x[],
+ const uint8_t y[],
+ size_t len);
+
+/**
* Zero out some bytes
* @param ptr a pointer to memory to zero
* @param bytes the number of bytes to zero in ptr
@@ -106,19 +117,14 @@ template<typename T> inline bool same_mem(const T* p1, const T* p2, size_t n)
}
/**
-* XOR_ arrays. Postcondition out[i] = in[i] ^ out[i] forall i = 0...length
+* XOR arrays. Postcondition out[i] = in[i] ^ out[i] forall i = 0...length
* @param out the input/output buffer
* @param in the read-only input buffer
* @param length the length of the buffers
*/
-template<typename T>
-void xor_buf(T out[], const T in[], size_t length)
- {
- for(size_t i = 0; i != length; ++i)
- {
- out[i] ^= in[i];
- }
- }
+BOTAN_DLL void xor_buf(uint8_t x[],
+ const uint8_t y[],
+ size_t len);
/**
* XOR arrays. Postcondition out[i] = in[i] ^ in2[i] forall i = 0...length
@@ -127,16 +133,10 @@ void xor_buf(T out[], const T in[], size_t length)
* @param in2 the second output buffer
* @param length the length of the three buffers
*/
-template<typename T> void xor_buf(T out[],
- const T in[],
- const T in2[],
- size_t length)
- {
- for(size_t i = 0; i != length; ++i)
- {
- out[i] = in[i] ^ in2[i];
- }
- }
+BOTAN_DLL void xor_buf(uint8_t out[],
+ const uint8_t in[],
+ const uint8_t in2[],
+ size_t length);
template<typename Alloc, typename Alloc2>
void xor_buf(std::vector<uint8_t, Alloc>& out,
@@ -163,10 +163,10 @@ void xor_buf(std::vector<uint8_t, Alloc>& out,
xor_buf(out.data(), in, in2.data(), n);
}
-template<typename T, typename Alloc, typename Alloc2>
-std::vector<T, Alloc>&
-operator^=(std::vector<T, Alloc>& out,
- const std::vector<T, Alloc2>& in)
+template<typename Alloc, typename Alloc2>
+std::vector<uint8_t, Alloc>&
+operator^=(std::vector<uint8_t, Alloc>& out,
+ const std::vector<uint8_t, Alloc2>& in)
{
if(out.size() < in.size())
out.resize(in.size());