aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-10-12 19:43:35 -0400
committerJack Lloyd <[email protected]>2017-10-12 19:43:35 -0400
commitebf147bcf6b84249cf289009ba81c3f3611ea2de (patch)
tree8c7baf608daa54477ea8c1ffadba9819708051c8 /src/lib/utils
parentfa8b83578dcffc394b8449207f60662d7e562728 (diff)
OCB optimizations
From ~5 cbp to ~2.5 cbp on Skylake
Diffstat (limited to 'src/lib/utils')
-rw-r--r--src/lib/utils/bit_ops.h11
-rw-r--r--src/lib/utils/mem_ops.cpp61
-rw-r--r--src/lib/utils/mem_ops.h66
3 files changed, 70 insertions, 68 deletions
diff --git a/src/lib/utils/bit_ops.h b/src/lib/utils/bit_ops.h
index a59404c75..2da0e55fb 100644
--- a/src/lib/utils/bit_ops.h
+++ b/src/lib/utils/bit_ops.h
@@ -102,6 +102,17 @@ inline size_t ctz(T n)
return 8*sizeof(T);
}
+#if defined(BOTAN_BUILD_COMPILER_IS_GCC)
+
+template<>
+inline size_t ctz(uint32_t n)
+ {
+ return __builtin_ctz(n);
+ }
+
+#endif
+
+
template<typename T>
size_t ceil_log2(T x)
{
diff --git a/src/lib/utils/mem_ops.cpp b/src/lib/utils/mem_ops.cpp
index 29c93eb15..3fd463195 100644
--- a/src/lib/utils/mem_ops.cpp
+++ b/src/lib/utils/mem_ops.cpp
@@ -53,65 +53,4 @@ bool constant_time_compare(const uint8_t x[],
return difference == 0;
}
-void xor_buf(uint8_t x[],
- const uint8_t y[],
- size_t len)
- {
- while(len >= 16)
- {
- x[0] ^= y[0];
- x[1] ^= y[1];
- x[2] ^= y[2];
- x[3] ^= y[3];
- x[4] ^= y[4];
- x[5] ^= y[5];
- x[6] ^= y[6];
- x[7] ^= y[7];
- x[8] ^= y[8];
- x[9] ^= y[9];
- x[10] ^= y[10];
- x[11] ^= y[11];
- x[12] ^= y[12];
- x[13] ^= y[13];
- x[14] ^= y[14];
- x[15] ^= y[15];
- x += 16; y += 16; len -= 16;
- }
-
- for(size_t i = 0; i != len; ++i)
- {
- x[i] ^= y[i];
- }
- }
-
-void xor_buf(uint8_t out[],
- const uint8_t in[],
- const uint8_t in2[],
- size_t length)
- {
- while(length >= 16)
- {
- out[0] = in[0] ^ in2[0];
- out[1] = in[1] ^ in2[1];
- out[2] = in[2] ^ in2[2];
- out[3] = in[3] ^ in2[3];
- out[4] = in[4] ^ in2[4];
- out[5] = in[5] ^ in2[5];
- out[6] = in[6] ^ in2[6];
- out[7] = in[7] ^ in2[7];
- out[8] = in[8] ^ in2[8];
- out[9] = in[9] ^ in2[9];
- out[10] = in[10] ^ in2[10];
- out[11] = in[11] ^ in2[11];
- out[12] = in[12] ^ in2[12];
- out[13] = in[13] ^ in2[13];
- out[14] = in[14] ^ in2[14];
- out[15] = in[15] ^ in2[15];
- in += 16; in2 += 16; out += 16; length -= 16;
- }
-
- for(size_t i = 0; i != length; ++i)
- out[i] = in[i] ^ in2[i];
- }
-
}
diff --git a/src/lib/utils/mem_ops.h b/src/lib/utils/mem_ops.h
index ed4d6cb27..175f38e2f 100644
--- a/src/lib/utils/mem_ops.h
+++ b/src/lib/utils/mem_ops.h
@@ -160,9 +160,36 @@ template<typename T> inline bool same_mem(const T* p1, const T* p2, size_t n)
* @param in the read-only input buffer
* @param length the length of the buffers
*/
-BOTAN_PUBLIC_API(2,3) void xor_buf(uint8_t out[],
- const uint8_t in[],
- size_t length);
+inline void xor_buf(uint8_t out[],
+ const uint8_t in[],
+ size_t length)
+ {
+ while(length >= 16)
+ {
+ out[0] ^= in[0];
+ out[1] ^= in[1];
+ out[2] ^= in[2];
+ out[3] ^= in[3];
+ out[4] ^= in[4];
+ out[5] ^= in[5];
+ out[6] ^= in[6];
+ out[7] ^= in[7];
+ out[8] ^= in[8];
+ out[9] ^= in[9];
+ out[10] ^= in[10];
+ out[11] ^= in[11];
+ out[12] ^= in[12];
+ out[13] ^= in[13];
+ out[14] ^= in[14];
+ out[15] ^= in[15];
+ out += 16; in += 16; length -= 16;
+ }
+
+ for(size_t i = 0; i != length; ++i)
+ {
+ out[i] ^= in[i];
+ }
+ }
/**
* XOR arrays. Postcondition out[i] = in[i] ^ in2[i] forall i = 0...length
@@ -171,10 +198,35 @@ BOTAN_PUBLIC_API(2,3) void xor_buf(uint8_t out[],
* @param in2 the second output buffer
* @param length the length of the three buffers
*/
-BOTAN_PUBLIC_API(2,3) void xor_buf(uint8_t out[],
- const uint8_t in[],
- const uint8_t in2[],
- size_t length);
+inline void xor_buf(uint8_t out[],
+ const uint8_t in[],
+ const uint8_t in2[],
+ size_t length)
+ {
+ while(length >= 16)
+ {
+ out[0] = in[0] ^ in2[0];
+ out[1] = in[1] ^ in2[1];
+ out[2] = in[2] ^ in2[2];
+ out[3] = in[3] ^ in2[3];
+ out[4] = in[4] ^ in2[4];
+ out[5] = in[5] ^ in2[5];
+ out[6] = in[6] ^ in2[6];
+ out[7] = in[7] ^ in2[7];
+ out[8] = in[8] ^ in2[8];
+ out[9] = in[9] ^ in2[9];
+ out[10] = in[10] ^ in2[10];
+ out[11] = in[11] ^ in2[11];
+ out[12] = in[12] ^ in2[12];
+ out[13] = in[13] ^ in2[13];
+ out[14] = in[14] ^ in2[14];
+ out[15] = in[15] ^ in2[15];
+ in += 16; in2 += 16; out += 16; length -= 16;
+ }
+
+ for(size_t i = 0; i != length; ++i)
+ out[i] = in[i] ^ in2[i];
+ }
template<typename Alloc, typename Alloc2>
void xor_buf(std::vector<uint8_t, Alloc>& out,