diff options
author | Jack Lloyd <[email protected]> | 2017-10-12 19:43:35 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-10-12 19:43:35 -0400 |
commit | ebf147bcf6b84249cf289009ba81c3f3611ea2de (patch) | |
tree | 8c7baf608daa54477ea8c1ffadba9819708051c8 /src/lib/utils | |
parent | fa8b83578dcffc394b8449207f60662d7e562728 (diff) |
OCB optimizations
From ~5 cbp to ~2.5 cbp on Skylake
Diffstat (limited to 'src/lib/utils')
-rw-r--r-- | src/lib/utils/bit_ops.h | 11 | ||||
-rw-r--r-- | src/lib/utils/mem_ops.cpp | 61 | ||||
-rw-r--r-- | src/lib/utils/mem_ops.h | 66 |
3 files changed, 70 insertions, 68 deletions
diff --git a/src/lib/utils/bit_ops.h b/src/lib/utils/bit_ops.h index a59404c75..2da0e55fb 100644 --- a/src/lib/utils/bit_ops.h +++ b/src/lib/utils/bit_ops.h @@ -102,6 +102,17 @@ inline size_t ctz(T n) return 8*sizeof(T); } +#if defined(BOTAN_BUILD_COMPILER_IS_GCC) + +template<> +inline size_t ctz(uint32_t n) + { + return __builtin_ctz(n); + } + +#endif + + template<typename T> size_t ceil_log2(T x) { diff --git a/src/lib/utils/mem_ops.cpp b/src/lib/utils/mem_ops.cpp index 29c93eb15..3fd463195 100644 --- a/src/lib/utils/mem_ops.cpp +++ b/src/lib/utils/mem_ops.cpp @@ -53,65 +53,4 @@ bool constant_time_compare(const uint8_t x[], return difference == 0; } -void xor_buf(uint8_t x[], - const uint8_t y[], - size_t len) - { - while(len >= 16) - { - x[0] ^= y[0]; - x[1] ^= y[1]; - x[2] ^= y[2]; - x[3] ^= y[3]; - x[4] ^= y[4]; - x[5] ^= y[5]; - x[6] ^= y[6]; - x[7] ^= y[7]; - x[8] ^= y[8]; - x[9] ^= y[9]; - x[10] ^= y[10]; - x[11] ^= y[11]; - x[12] ^= y[12]; - x[13] ^= y[13]; - x[14] ^= y[14]; - x[15] ^= y[15]; - x += 16; y += 16; len -= 16; - } - - for(size_t i = 0; i != len; ++i) - { - x[i] ^= y[i]; - } - } - -void xor_buf(uint8_t out[], - const uint8_t in[], - const uint8_t in2[], - size_t length) - { - while(length >= 16) - { - out[0] = in[0] ^ in2[0]; - out[1] = in[1] ^ in2[1]; - out[2] = in[2] ^ in2[2]; - out[3] = in[3] ^ in2[3]; - out[4] = in[4] ^ in2[4]; - out[5] = in[5] ^ in2[5]; - out[6] = in[6] ^ in2[6]; - out[7] = in[7] ^ in2[7]; - out[8] = in[8] ^ in2[8]; - out[9] = in[9] ^ in2[9]; - out[10] = in[10] ^ in2[10]; - out[11] = in[11] ^ in2[11]; - out[12] = in[12] ^ in2[12]; - out[13] = in[13] ^ in2[13]; - out[14] = in[14] ^ in2[14]; - out[15] = in[15] ^ in2[15]; - in += 16; in2 += 16; out += 16; length -= 16; - } - - for(size_t i = 0; i != length; ++i) - out[i] = in[i] ^ in2[i]; - } - } diff --git a/src/lib/utils/mem_ops.h b/src/lib/utils/mem_ops.h index ed4d6cb27..175f38e2f 100644 --- a/src/lib/utils/mem_ops.h +++ b/src/lib/utils/mem_ops.h @@ -160,9 +160,36 @@ template<typename T> inline bool same_mem(const T* p1, const T* p2, size_t n) * @param in the read-only input buffer * @param length the length of the buffers */ -BOTAN_PUBLIC_API(2,3) void xor_buf(uint8_t out[], - const uint8_t in[], - size_t length); +inline void xor_buf(uint8_t out[], + const uint8_t in[], + size_t length) + { + while(length >= 16) + { + out[0] ^= in[0]; + out[1] ^= in[1]; + out[2] ^= in[2]; + out[3] ^= in[3]; + out[4] ^= in[4]; + out[5] ^= in[5]; + out[6] ^= in[6]; + out[7] ^= in[7]; + out[8] ^= in[8]; + out[9] ^= in[9]; + out[10] ^= in[10]; + out[11] ^= in[11]; + out[12] ^= in[12]; + out[13] ^= in[13]; + out[14] ^= in[14]; + out[15] ^= in[15]; + out += 16; in += 16; length -= 16; + } + + for(size_t i = 0; i != length; ++i) + { + out[i] ^= in[i]; + } + } /** * XOR arrays. Postcondition out[i] = in[i] ^ in2[i] forall i = 0...length @@ -171,10 +198,35 @@ BOTAN_PUBLIC_API(2,3) void xor_buf(uint8_t out[], * @param in2 the second output buffer * @param length the length of the three buffers */ -BOTAN_PUBLIC_API(2,3) void xor_buf(uint8_t out[], - const uint8_t in[], - const uint8_t in2[], - size_t length); +inline void xor_buf(uint8_t out[], + const uint8_t in[], + const uint8_t in2[], + size_t length) + { + while(length >= 16) + { + out[0] = in[0] ^ in2[0]; + out[1] = in[1] ^ in2[1]; + out[2] = in[2] ^ in2[2]; + out[3] = in[3] ^ in2[3]; + out[4] = in[4] ^ in2[4]; + out[5] = in[5] ^ in2[5]; + out[6] = in[6] ^ in2[6]; + out[7] = in[7] ^ in2[7]; + out[8] = in[8] ^ in2[8]; + out[9] = in[9] ^ in2[9]; + out[10] = in[10] ^ in2[10]; + out[11] = in[11] ^ in2[11]; + out[12] = in[12] ^ in2[12]; + out[13] = in[13] ^ in2[13]; + out[14] = in[14] ^ in2[14]; + out[15] = in[15] ^ in2[15]; + in += 16; in2 += 16; out += 16; length -= 16; + } + + for(size_t i = 0; i != length; ++i) + out[i] = in[i] ^ in2[i]; + } template<typename Alloc, typename Alloc2> void xor_buf(std::vector<uint8_t, Alloc>& out, |