From 2ed99de0d0ae5d424f95e9cf71d790c2b07a137e Mon Sep 17 00:00:00 2001 From: lloyd Date: Sun, 9 Mar 2008 07:42:07 +0000 Subject: If the macro BOTAN_TARGET_UNALIGNED_LOADSTOR_OK (from build.h) is on, the word read/write functions will be faster through the use of (slightly unsafe) pointer manipulations. On some CPUs (like SPARC), these antics can cause crashes (usually visible by SIGBUS) if what you are attempting to read or write as an integer is not aligned on a word boundary. However they are safe on x86 and x86-64. Performance increases across the board on a Core2. In most algorithms the improvement seems to be about 3%, except a few standouts like RC6 (15%), MD4 (20%), RIPEMD-128 (8%). Will be better with faster xor_buf and byte swapping. --- include/loadstor.h | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include') diff --git a/include/loadstor.h b/include/loadstor.h index 47c3126cd..432aff4dc 100644 --- a/include/loadstor.h +++ b/include/loadstor.h @@ -8,6 +8,30 @@ #include +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + +#include + +#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) + +#define BOTAN_ENDIAN_N2B(x) (x) +#define BOTAN_ENDIAN_B2N(x) (x) + +#define BOTAN_ENDIAN_N2L(x) reverse_bytes(x) +#define BOTAN_ENDIAN_L2N(x) reverse_bytes(x) + +#elif defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) + +#define BOTAN_ENDIAN_N2L(x) (x) +#define BOTAN_ENDIAN_L2N(x) (x) + +#define BOTAN_ENDIAN_N2B(x) reverse_bytes(x) +#define BOTAN_ENDIAN_B2N(x) reverse_bytes(x) + +#endif + +#endif + namespace Botan { /************************************************* @@ -70,34 +94,72 @@ inline T load_le(const byte in[], u32bit off) return out; } +template<> +inline u16bit load_be(const byte in[], u32bit off) + { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + return BOTAN_ENDIAN_N2B(*(reinterpret_cast(in) + off)); +#else + in += off * sizeof(u16bit); + return make_u16bit(in[0], in[1]); +#endif + } + +template<> +inline u16bit load_le(const byte in[], u32bit off) + { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + return BOTAN_ENDIAN_N2L(*(reinterpret_cast(in) + off)); +#else + in += off * sizeof(u16bit); + return make_u16bit(in[1], in[0]); +#endif + } + template<> inline u32bit load_be(const byte in[], u32bit off) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + return BOTAN_ENDIAN_N2B(*(reinterpret_cast(in) + off)); +#else in += off * sizeof(u32bit); return make_u32bit(in[0], in[1], in[2], in[3]); +#endif } template<> inline u32bit load_le(const byte in[], u32bit off) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + return BOTAN_ENDIAN_N2L(*(reinterpret_cast(in) + off)); +#else in += off * sizeof(u32bit); return make_u32bit(in[3], in[2], in[1], in[0]); +#endif } template<> inline u64bit load_be(const byte in[], u32bit off) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + return BOTAN_ENDIAN_N2B(*(reinterpret_cast(in) + off)); +#else in += off * sizeof(u64bit); return make_u64bit(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); +#endif } template<> inline u64bit load_le(const byte in[], u32bit off) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + return BOTAN_ENDIAN_N2L(*(reinterpret_cast(in) + off)); +#else in += off * sizeof(u64bit); return make_u64bit(in[7], in[6], in[5], in[4], in[3], in[2], in[1], in[0]); +#endif } /************************************************* @@ -105,34 +167,53 @@ inline u64bit load_le(const byte in[], u32bit off) *************************************************/ inline void store_be(u16bit in, byte out[2]) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + *reinterpret_cast(out) = BOTAN_ENDIAN_B2N(in); +#else out[0] = get_byte(0, in); out[1] = get_byte(1, in); +#endif } inline void store_le(u16bit in, byte out[2]) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + *reinterpret_cast(out) = BOTAN_ENDIAN_L2N(in); +#else out[0] = get_byte(1, in); out[1] = get_byte(0, in); +#endif } inline void store_be(u32bit in, byte out[4]) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + *reinterpret_cast(out) = BOTAN_ENDIAN_B2N(in); +#else out[0] = get_byte(0, in); out[1] = get_byte(1, in); out[2] = get_byte(2, in); out[3] = get_byte(3, in); +#endif } inline void store_le(u32bit in, byte out[4]) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + *reinterpret_cast(out) = BOTAN_ENDIAN_L2N(in); +#else out[0] = get_byte(3, in); out[1] = get_byte(2, in); out[2] = get_byte(1, in); out[3] = get_byte(0, in); +#endif } inline void store_be(u64bit in, byte out[8]) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + *reinterpret_cast(out) = BOTAN_ENDIAN_B2N(in); +#else out[0] = get_byte(0, in); out[1] = get_byte(1, in); out[2] = get_byte(2, in); @@ -141,10 +222,14 @@ inline void store_be(u64bit in, byte out[8]) out[5] = get_byte(5, in); out[6] = get_byte(6, in); out[7] = get_byte(7, in); +#endif } inline void store_le(u64bit in, byte out[8]) { +#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK + *reinterpret_cast(out) = BOTAN_ENDIAN_L2N(in); +#else out[0] = get_byte(7, in); out[1] = get_byte(6, in); out[2] = get_byte(5, in); @@ -153,6 +238,7 @@ inline void store_le(u64bit in, byte out[8]) out[5] = get_byte(2, in); out[6] = get_byte(1, in); out[7] = get_byte(0, in); +#endif } template -- cgit v1.2.3