diff options
author | Jack Lloyd <[email protected]> | 2015-12-31 11:59:27 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2015-12-31 11:59:27 -0500 |
commit | b269c8d1ce0cb5412712a8df963b2205364e17ae (patch) | |
tree | 8245489665f1f31c35626d4cf5275afbf0914276 | |
parent | 18d81936b55874ba76eca59b1d01e8028cc2bf9a (diff) |
Use memcpy instead of misaligned pointer casts for reading words.
It works on x86, but C says it is undefined and it makes UBSan
unhappy. Happily, this memcpy approach probably also works fine under
processors which previously used the byte-at-a-time approach such as
ARM. But for right now using memcpy here is still gated under the
processor alignment flags.
In my tests recent GCC and Clang seemed to produce basically identical
code for either approach when using -O3; I imagine most compilers
these days are very good at analyzing/inlining/unrolling memcpys.
Also remove the manually unrolled versions of xor_buf, which caused
problems with GCC and -O3 due to it vectorizing the loads into
(aligned) SSE2 loads, which would fail when a misaligned pointer was
passed. Which always seemed kind of bogus to me, but I guess that's
what undefined behavior is for. Enable -O3 for GCC.
With this change the test suite is clean under GCC ASan+UBSan and
Clang ASan+UBSan, with the exception of one failure due to a bug in
libstdc++ (GCC bug 60734) when compiled by Clang.
-rw-r--r-- | doc/news.rst | 9 | ||||
-rw-r--r-- | src/build-data/cc/gcc.txt | 8 | ||||
-rw-r--r-- | src/lib/utils/loadstor.h | 58 | ||||
-rw-r--r-- | src/lib/utils/mem_ops.h | 66 |
4 files changed, 59 insertions, 82 deletions
diff --git a/doc/news.rst b/doc/news.rst index 9ddca58bd..8a52f288d 100644 --- a/doc/news.rst +++ b/doc/news.rst @@ -38,6 +38,15 @@ Version 1.11.26, Not Yet Released infinite loop or the discarding of an incorrect number of bytes. Reported on mailing list by Falko Strenzke. +* Previously if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK was defined, + the code doing low level loads/stores would use pointer casts to + access larger words out of a (potentially misaligned) byte array, + rather than using byte-at-a-time accesses. However even on platforms + such as x86 where this works, it triggers UBSan errors under Clang. + Instead use memcpy, which the C standard says is usable for such + purposes even with misaligned values. With recent GCC and Clang, the + same code seems to be emitted for either approach. + * Avoid calling memcpy, memset, or memmove with a length of zero to avoid undefined behavior, as calling these functions with an invalid or null pointer, even with a length of zero, is invalid. Often there diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt index 2e6c0c364..79c9ba5f4 100644 --- a/src/build-data/cc/gcc.txt +++ b/src/build-data/cc/gcc.txt @@ -13,13 +13,17 @@ warning_flags "-Wall -Wextra -Wstrict-aliasing -Wstrict-overflow=5 -Wcast-align compile_flags "-c" debug_info_flags "-g" -optimization_flags "-O2" +optimization_flags "-O3" shared_flags "-fPIC" coverage_flags "--coverage" -#sanitizer_flags "-D_GLIBCXX_DEBUG -fsanitize=address -fsanitize=undefined" + +# GCC 4.8 sanitizer_flags "-D_GLIBCXX_DEBUG -fsanitize=address" +# GCC 4.9 and later +#sanitizer_flags "-D_GLIBCXX_DEBUG -fsanitize=address,undefined -fno-sanitize-recover=undefined" + visibility_build_flags "-fvisibility=hidden" visibility_attribute '__attribute__((visibility("default")))' diff --git a/src/lib/utils/loadstor.h b/src/lib/utils/loadstor.h index 8d33c2eef..a6c2b7969 100644 --- a/src/lib/utils/loadstor.h +++ b/src/lib/utils/loadstor.h @@ -1,6 +1,6 @@ /* * Load/Store Operators -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2007,2015 Jack Lloyd * 2007 Yves Jerschow * * Botan is released under the Simplified BSD License (see license.txt) @@ -144,10 +144,13 @@ inline T load_le(const byte in[], size_t off) template<> inline u16bit load_be<u16bit>(const byte in[], size_t off) { + in += off * sizeof(u16bit); + #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u16bit*>(in) + off)); + u16bit x; + std::memcpy(&x, in, sizeof(x)); + return BOTAN_ENDIAN_N2B(x); #else - in += off * sizeof(u16bit); return make_u16bit(in[0], in[1]); #endif } @@ -161,10 +164,13 @@ inline u16bit load_be<u16bit>(const byte in[], size_t off) template<> inline u16bit load_le<u16bit>(const byte in[], size_t off) { + in += off * sizeof(u16bit); + #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u16bit*>(in) + off)); + u16bit x; + std::memcpy(&x, in, sizeof(x)); + return BOTAN_ENDIAN_N2L(x); #else - in += off * sizeof(u16bit); return make_u16bit(in[1], in[0]); #endif } @@ -178,10 +184,12 @@ inline u16bit load_le<u16bit>(const byte in[], size_t off) template<> inline u32bit load_be<u32bit>(const byte in[], size_t off) { + in += off * sizeof(u32bit); #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u32bit*>(in) + off)); + u32bit x; + std::memcpy(&x, in, sizeof(x)); + return BOTAN_ENDIAN_N2B(x); #else - in += off * sizeof(u32bit); return make_u32bit(in[0], in[1], in[2], in[3]); #endif } @@ -195,10 +203,12 @@ inline u32bit load_be<u32bit>(const byte in[], size_t off) template<> inline u32bit load_le<u32bit>(const byte in[], size_t off) { + in += off * sizeof(u32bit); #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u32bit*>(in) + off)); + u32bit x; + std::memcpy(&x, in, sizeof(x)); + return BOTAN_ENDIAN_N2L(x); #else - in += off * sizeof(u32bit); return make_u32bit(in[3], in[2], in[1], in[0]); #endif } @@ -212,10 +222,12 @@ inline u32bit load_le<u32bit>(const byte in[], size_t off) template<> inline u64bit load_be<u64bit>(const byte in[], size_t off) { + in += off * sizeof(u64bit); #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u64bit*>(in) + off)); + u64bit x; + std::memcpy(&x, in, sizeof(x)); + return BOTAN_ENDIAN_N2B(x); #else - in += off * sizeof(u64bit); return make_u64bit(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); #endif @@ -230,10 +242,12 @@ inline u64bit load_be<u64bit>(const byte in[], size_t off) template<> inline u64bit load_le<u64bit>(const byte in[], size_t off) { + in += off * sizeof(u64bit); #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u64bit*>(in) + off)); + u64bit x; + std::memcpy(&x, in, sizeof(x)); + return BOTAN_ENDIAN_N2L(x); #else - in += off * sizeof(u64bit); return make_u64bit(in[7], in[6], in[5], in[4], in[3], in[2], in[1], in[0]); #endif @@ -431,7 +445,8 @@ inline void load_be(T out[], inline void store_be(u16bit in, byte out[2]) { #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - *reinterpret_cast<u16bit*>(out) = BOTAN_ENDIAN_B2N(in); + u16bit o = BOTAN_ENDIAN_N2B(in); + std::memcpy(out, &o, sizeof(o)); #else out[0] = get_byte(0, in); out[1] = get_byte(1, in); @@ -446,7 +461,8 @@ inline void store_be(u16bit in, byte out[2]) inline void store_le(u16bit in, byte out[2]) { #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - *reinterpret_cast<u16bit*>(out) = BOTAN_ENDIAN_L2N(in); + u16bit o = BOTAN_ENDIAN_N2L(in); + std::memcpy(out, &o, sizeof(o)); #else out[0] = get_byte(1, in); out[1] = get_byte(0, in); @@ -461,7 +477,8 @@ inline void store_le(u16bit in, byte out[2]) inline void store_be(u32bit in, byte out[4]) { #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - *reinterpret_cast<u32bit*>(out) = BOTAN_ENDIAN_B2N(in); + u32bit o = BOTAN_ENDIAN_B2N(in); + std::memcpy(out, &o, sizeof(o)); #else out[0] = get_byte(0, in); out[1] = get_byte(1, in); @@ -478,7 +495,8 @@ inline void store_be(u32bit in, byte out[4]) inline void store_le(u32bit in, byte out[4]) { #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - *reinterpret_cast<u32bit*>(out) = BOTAN_ENDIAN_L2N(in); + u32bit o = BOTAN_ENDIAN_L2N(in); + std::memcpy(out, &o, sizeof(o)); #else out[0] = get_byte(3, in); out[1] = get_byte(2, in); @@ -495,7 +513,8 @@ inline void store_le(u32bit in, byte out[4]) inline void store_be(u64bit in, byte out[8]) { #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - *reinterpret_cast<u64bit*>(out) = BOTAN_ENDIAN_B2N(in); + u64bit o = BOTAN_ENDIAN_B2N(in); + std::memcpy(out, &o, sizeof(o)); #else out[0] = get_byte(0, in); out[1] = get_byte(1, in); @@ -516,7 +535,8 @@ inline void store_be(u64bit in, byte out[8]) inline void store_le(u64bit in, byte out[8]) { #if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - *reinterpret_cast<u64bit*>(out) = BOTAN_ENDIAN_L2N(in); + u64bit o = BOTAN_ENDIAN_L2N(in); + std::memcpy(out, &o, sizeof(o)); #else out[0] = get_byte(7, in); out[1] = get_byte(6, in); diff --git a/src/lib/utils/mem_ops.h b/src/lib/utils/mem_ops.h index d11e3368c..0d2d0dab0 100644 --- a/src/lib/utils/mem_ops.h +++ b/src/lib/utils/mem_ops.h @@ -1,6 +1,6 @@ /* * Memory Operations -* (C) 1999-2009,2012 Jack Lloyd +* (C) 1999-2009,2012,2015 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -81,7 +81,7 @@ template<typename T> inline bool same_mem(const T* p1, const T* p2, size_t n) } /** -* XOR arrays. Postcondition out[i] = in[i] ^ out[i] forall i = 0...length +* XOR_ arrays. Postcondition out[i] = in[i] ^ out[i] forall i = 0...length * @param out the input/output buffer * @param in the read-only input buffer * @param length the length of the buffers @@ -89,18 +89,10 @@ template<typename T> inline bool same_mem(const T* p1, const T* p2, size_t n) template<typename T> void xor_buf(T out[], const T in[], size_t length) { - while(length >= 8) - { - out[0] ^= in[0]; out[1] ^= in[1]; - out[2] ^= in[2]; out[3] ^= in[3]; - out[4] ^= in[4]; out[5] ^= in[5]; - out[6] ^= in[6]; out[7] ^= in[7]; - - out += 8; in += 8; length -= 8; - } - for(size_t i = 0; i != length; ++i) + { out[i] ^= in[i]; + } } /** @@ -115,60 +107,12 @@ template<typename T> void xor_buf(T out[], const T in2[], size_t length) { - while(length >= 8) - { - out[0] = in[0] ^ in2[0]; - out[1] = in[1] ^ in2[1]; - out[2] = in[2] ^ in2[2]; - out[3] = in[3] ^ in2[3]; - out[4] = in[4] ^ in2[4]; - out[5] = in[5] ^ in2[5]; - out[6] = in[6] ^ in2[6]; - out[7] = in[7] ^ in2[7]; - - in += 8; in2 += 8; out += 8; length -= 8; - } - for(size_t i = 0; i != length; ++i) - out[i] = in[i] ^ in2[i]; - } - -#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK - -template<> -inline void xor_buf<byte>(byte out[], const byte in[], size_t length) - { - while(length >= 8) { - *reinterpret_cast<u64bit*>(out) ^= *reinterpret_cast<const u64bit*>(in); - out += 8; in += 8; length -= 8; - } - - for(size_t i = 0; i != length; ++i) - out[i] ^= in[i]; - } - -template<> -inline void xor_buf<byte>(byte out[], - const byte in[], - const byte in2[], - size_t length) - { - while(length >= 8) - { - *reinterpret_cast<u64bit*>(out) = - *reinterpret_cast<const u64bit*>(in) ^ - *reinterpret_cast<const u64bit*>(in2); - - in += 8; in2 += 8; out += 8; length -= 8; - } - - for(size_t i = 0; i != length; ++i) out[i] = in[i] ^ in2[i]; + } } -#endif - template<typename Alloc, typename Alloc2> void xor_buf(std::vector<byte, Alloc>& out, const std::vector<byte, Alloc2>& in, |