diff options
author | Jack Lloyd <[email protected]> | 2016-11-27 14:54:30 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2016-11-27 14:54:30 -0500 |
commit | bd693b4746d5d8ccd65f07e67995ecf7cedacaa2 (patch) | |
tree | 94babf2a8306dd0bf78a0da48f28ceca8a10080e | |
parent | 9f28af95363cffa33e1745cc94814f86e937edce (diff) | |
parent | a17e3bfa187adb5c6d7d80c78d5df8c1f146c647 (diff) |
Merge GH #590 Initial Cilk/OpenMP support
-rwxr-xr-x | configure.py | 20 | ||||
-rw-r--r-- | src/build-data/buildh.in | 170 | ||||
-rw-r--r-- | src/build-data/cc/clang.txt | 2 | ||||
-rw-r--r-- | src/build-data/cc/gcc.txt | 3 | ||||
-rw-r--r-- | src/lib/block/aes/aes.cpp | 48 | ||||
-rw-r--r-- | src/lib/block/aes/aes_ssse3/aes_ssse3.cpp | 4 | ||||
-rw-r--r-- | src/lib/block/blowfish/blowfish.cpp | 22 | ||||
-rw-r--r-- | src/lib/block/camellia/camellia.cpp | 22 | ||||
-rw-r--r-- | src/lib/block/cast/cast128.cpp | 22 | ||||
-rw-r--r-- | src/lib/block/des/des.cpp | 30 | ||||
-rw-r--r-- | src/lib/block/idea/idea.cpp | 8 | ||||
-rw-r--r-- | src/lib/block/serpent/serpent.cpp | 81 | ||||
-rw-r--r-- | src/lib/block/threefish/threefish.cpp | 34 | ||||
-rw-r--r-- | src/lib/block/twofish/twofish.cpp | 42 | ||||
-rw-r--r-- | src/lib/block/xtea/xtea.cpp | 142 | ||||
-rw-r--r-- | src/lib/utils/compiler.h | 169 | ||||
-rw-r--r-- | src/lib/utils/info.txt | 1 | ||||
-rw-r--r-- | src/lib/utils/loadstor.h | 16 |
18 files changed, 445 insertions, 391 deletions
diff --git a/configure.py b/configure.py index 0fabeed3e..789377129 100755 --- a/configure.py +++ b/configure.py @@ -323,6 +323,11 @@ def process_command_line(args): build_group.add_option('--with-external-includedir', metavar='DIR', default='', help='use DIR for external includes') + build_group.add_option('--with-openmp', default=False, action='store_true', + help='enable use of OpenMP') + build_group.add_option('--with-cilkplus', default=False, action='store_true', + help='enable use of Cilk Plus') + link_methods = ['symlink', 'hardlink', 'copy'] build_group.add_option('--link-method', default=None, metavar='METHOD', choices=link_methods, @@ -850,6 +855,11 @@ class ArchInfo(object): if options.with_valgrind: macros.append('HAS_VALGRIND') + if options.with_openmp: + macros.append('TARGET_HAS_OPENMP') + if options.with_cilkplus: + macros.append('TARGET_HAS_CILKPLUS') + return macros class CompilerInfo(object): @@ -953,6 +963,16 @@ class CompilerInfo(object): raise Exception('No sanitizer handling for %s' % (self.basename)) abi_link.append(self.sanitizer_flags) + if options.with_openmp: + if 'openmp' not in self.mach_abi_linking: + raise Exception('No support for OpenMP for %s' % (self.basename)) + abi_link.append(self.mach_abi_linking['openmp']) + + if options.with_cilkplus: + if 'cilkplus' not in self.mach_abi_linking: + raise Exception('No support for Cilk Plus for %s' % (self.basename)) + abi_link.append(self.mach_abi_linking['cilkplus']) + abi_flags = ' '.join(sorted(abi_link)) if options.cc_abi_flags != '': diff --git a/src/build-data/buildh.in b/src/build-data/buildh.in index 56b70e060..ba6eee97d 100644 --- a/src/build-data/buildh.in +++ b/src/build-data/buildh.in @@ -21,8 +21,10 @@ #define BOTAN_VERSION_VC_REVISION "%{version_vc_rev}" #define BOTAN_DISTRIBUTION_INFO "%{distribution_info}" -%{unsafe_fuzzer_mode_define} +/* How many bits per limb in a BigInt */ +#define BOTAN_MP_WORD_BITS %{mp_bits} +%{unsafe_fuzzer_mode_define} #define BOTAN_INSTALL_PREFIX R"(%{prefix})" #define BOTAN_INSTALL_HEADER_DIR "%{includedir}/botan-%{version_major}.%{version_minor}" @@ -33,6 +35,28 @@ #define BOTAN_DLL %{visibility_attribute} #endif +/* Target identification and feature test macros */ +%{target_os_defines} + +%{target_cpu_defines} + +%{target_compiler_defines} + +/* +* Module availability definitions +*/ +%{module_defines} + +/* +* Local/misc configuration options (if any) follow +*/ +%{local_config} +%{misc_config} + +/* +* Things you can edit (but probably shouldn't) +*/ + /* How much to allocate for a buffer of no particular size */ #define BOTAN_DEFAULT_BUFFER_SIZE 1024 @@ -49,12 +73,6 @@ */ #define BOTAN_MLOCK_ALLOCATOR_MAX_LOCKED_KB 512 -/* Multiplier on a block cipher's native parallelism */ -#define BOTAN_BLOCK_CIPHER_PAR_MULT 4 - -/* How many bits per limb in a BigInt */ -#define BOTAN_MP_WORD_BITS %{mp_bits} - /* * If enabled uses memset via volatile function pointer to zero memory, * otherwise does a byte at a time write via a volatile pointer. @@ -114,6 +132,10 @@ { "timestamp", "rdseed", "rdrand", "proc_info", \ "darwin_secrandom", "dev_random", "win32_cryptoapi", "proc_walk", "system_stats" } + +/* Multiplier on a block cipher's native parallelism */ +#define BOTAN_BLOCK_CIPHER_PAR_MULT 4 + /* * These control the RNG used by the system RNG interface */ @@ -156,32 +178,6 @@ Each read generates 32 bits of output #define BOTAN_ENTROPY_RDSEED_RETRIES 20 /* -* Compiler and target specific flags -*/ - -/* Should we use GCC-style inline assembler? */ -#if !defined(BOTAN_USE_GCC_INLINE_ASM) && defined(__GNUG__) - #define BOTAN_USE_GCC_INLINE_ASM 1 -#endif - -#ifdef __GNUC__ - #define BOTAN_GCC_VERSION \ - (__GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__) -#else - #define BOTAN_GCC_VERSION 0 -#endif - -/* Target identification and feature test macros */ -%{target_os_defines} - -%{target_cpu_defines} - -#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) || \ - defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) - #define BOTAN_TARGET_CPU_HAS_KNOWN_ENDIANNESS -#endif - -/* * If no way of dynamically determining the cache line size for the * system exists, this value is used as the default. Used by the side * channel countermeasures rather than for alignment purposes, so it is @@ -192,93 +188,6 @@ Each read generates 32 bits of output #define BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE 32 #endif -%{target_compiler_defines} - - -#if defined(__GNUG__) || defined(__clang__) - #define BOTAN_FUNC_ISA(isa) __attribute__ ((target(isa))) -#else - #define BOTAN_FUNC_ISA(isa) -#endif - -#if defined(__GNUG__) || defined(__clang__) - #define BOTAN_WARN_UNUSED_RESULT __attribute__ ((warn_unused_result)) -#else - #define BOTAN_WARN_UNUSED_RESULT -#endif - -/* -* Compile-time deprecation warnings -*/ -#if !defined(BOTAN_NO_DEPRECATED_WARNINGS) - - #if defined(__clang__) - #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated)) - - #elif defined(_MSC_VER) - #define BOTAN_DEPRECATED(msg) __declspec(deprecated(msg)) - - #elif defined(__GNUG__) - - #if BOTAN_GCC_VERSION >= 450 - #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated(msg))) - #else - #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated)) - #endif - - #endif - -#endif - -#if !defined(BOTAN_NORETURN) - - #if defined (__clang__) || defined (__GNUG__) - #define BOTAN_NORETURN __attribute__ ((__noreturn__)) - - #elif defined (_MSC_VER) - #define BOTAN_NORETURN __declspec(noreturn) - - #else - #define BOTAN_NORETURN - - #endif - -#endif - - -#if defined(_MSC_VER) - #define BOTAN_CURRENT_FUNCTION __FUNCTION__ -#else - #define BOTAN_CURRENT_FUNCTION __func__ -#endif - -#if !defined(BOTAN_DEPRECATED) - #define BOTAN_DEPRECATED(msg) -#endif - -#if defined(_MSC_VER) && (_MSC_VER < 1900) - // noexcept is not supported in VS 2013 - #include <yvals.h> - #define BOTAN_NOEXCEPT _NOEXCEPT -#else - #define BOTAN_NOEXCEPT noexcept -#endif - -/* -* Module availability definitions -*/ -%{module_defines} - -/* -* Local configuration options (if any) follow -*/ -%{local_config} - -/* -* Miscellaneous configuration options (if any) follow -*/ -%{misc_config} - /** * Controls how AutoSeeded_RNG is instantiated */ @@ -301,20 +210,21 @@ Each read generates 32 bits of output // The struct is only declared to force the semicolon, it is never defined. #define BOTAN_FORCE_SEMICOLON struct BOTAN_DUMMY_STRUCT -#if defined(BOTAN_TARGET_ARCH_IS_X86_64) && (\ - (defined(_MSC_VER) && !defined(_WIN64)) || \ - (defined(__clang__) && !defined(__x86_64__)) || \ - (defined(__GNUG__) && !defined(__x86_64__)) \ -) +// Check for a common build problem: + +#if defined(BOTAN_TARGET_ARCH_IS_X86_64) && ((defined(_MSC_VER) && !defined(_WIN64)) || \ + (defined(__clang__) && !defined(__x86_64__)) || \ + (defined(__GNUG__) && !defined(__x86_64__))) #error "Trying to compile Botan configured as x86_64 with non-x86_64 compiler." #endif -#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (\ - (defined(_MSC_VER) && defined(_WIN64)) || \ - (defined(__clang__) && !defined(__i386__)) || \ - (defined(__GNUG__) && !defined(__i386__)) \ -) +#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && ((defined(_MSC_VER) && defined(_WIN64)) || \ + (defined(__clang__) && !defined(__i386__)) || \ + (defined(__GNUG__) && !defined(__i386__))) + #error "Trying to compile Botan configured as x86_32 with non-x86_32 compiler." #endif +#include <botan/compiler.h> + #endif diff --git a/src/build-data/cc/clang.txt b/src/build-data/cc/clang.txt index c4a85658f..055315c3b 100644 --- a/src/build-data/cc/clang.txt +++ b/src/build-data/cc/clang.txt @@ -73,6 +73,8 @@ ivybridge -> "-march=core-avx-i" <mach_abi_linking> all -> "-pthread" +openmp -> "-fopenmp" + x86_32 -> "-m32" x86_64 -> "-m64" ppc64 -> "-m64" diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt index b88454ce6..0a53e15c1 100644 --- a/src/build-data/cc/gcc.txt +++ b/src/build-data/cc/gcc.txt @@ -120,6 +120,9 @@ all_x86_64 -> "-momit-leaf-frame-pointer" <mach_abi_linking> all -> "-pthread -fstack-protector" +cilkplus -> "-fcilkplus" +openmp -> "-fopenmp" + mips64 -> "-mabi=64" s390 -> "-m31" s390x -> "-m64" diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp index 39f5bd0db..8c7000135 100644 --- a/src/lib/block/aes/aes.cpp +++ b/src/lib/block/aes/aes.cpp @@ -168,12 +168,15 @@ void aes_encrypt_n(const byte in[], byte out[], } Z &= TE[82]; // this is zero, which hopefully the compiler cannot deduce - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit T0 = load_be<u32bit>(in, 0) ^ EK[0]; - u32bit T1 = load_be<u32bit>(in, 1) ^ EK[1]; - u32bit T2 = load_be<u32bit>(in, 2) ^ EK[2]; - u32bit T3 = load_be<u32bit>(in, 3) ^ EK[3]; + u32bit T0, T1, T2, T3; + load_be(in + 16*i, T0, T1, T2, T3); + + T0 ^= EK[0]; + T1 ^= EK[1]; + T2 ^= EK[2]; + T3 ^= EK[3]; T0 ^= Z; @@ -226,25 +229,22 @@ void aes_encrypt_n(const byte in[], byte out[], TE[get_byte(2, T1) + 512] ^ TE[get_byte(3, T2) + 768]; } - out[ 0] = SE[get_byte(0, B0)] ^ ME[0]; - out[ 1] = SE[get_byte(1, B1)] ^ ME[1]; - out[ 2] = SE[get_byte(2, B2)] ^ ME[2]; - out[ 3] = SE[get_byte(3, B3)] ^ ME[3]; - out[ 4] = SE[get_byte(0, B1)] ^ ME[4]; - out[ 5] = SE[get_byte(1, B2)] ^ ME[5]; - out[ 6] = SE[get_byte(2, B3)] ^ ME[6]; - out[ 7] = SE[get_byte(3, B0)] ^ ME[7]; - out[ 8] = SE[get_byte(0, B2)] ^ ME[8]; - out[ 9] = SE[get_byte(1, B3)] ^ ME[9]; - out[10] = SE[get_byte(2, B0)] ^ ME[10]; - out[11] = SE[get_byte(3, B1)] ^ ME[11]; - out[12] = SE[get_byte(0, B3)] ^ ME[12]; - out[13] = SE[get_byte(1, B0)] ^ ME[13]; - out[14] = SE[get_byte(2, B1)] ^ ME[14]; - out[15] = SE[get_byte(3, B2)] ^ ME[15]; - - in += 16; - out += 16; + out[16*i+ 0] = SE[get_byte(0, B0)] ^ ME[0]; + out[16*i+ 1] = SE[get_byte(1, B1)] ^ ME[1]; + out[16*i+ 2] = SE[get_byte(2, B2)] ^ ME[2]; + out[16*i+ 3] = SE[get_byte(3, B3)] ^ ME[3]; + out[16*i+ 4] = SE[get_byte(0, B1)] ^ ME[4]; + out[16*i+ 5] = SE[get_byte(1, B2)] ^ ME[5]; + out[16*i+ 6] = SE[get_byte(2, B3)] ^ ME[6]; + out[16*i+ 7] = SE[get_byte(3, B0)] ^ ME[7]; + out[16*i+ 8] = SE[get_byte(0, B2)] ^ ME[8]; + out[16*i+ 9] = SE[get_byte(1, B3)] ^ ME[9]; + out[16*i+10] = SE[get_byte(2, B0)] ^ ME[10]; + out[16*i+11] = SE[get_byte(3, B1)] ^ ME[11]; + out[16*i+12] = SE[get_byte(0, B3)] ^ ME[12]; + out[16*i+13] = SE[get_byte(1, B0)] ^ ME[13]; + out[16*i+14] = SE[get_byte(2, B1)] ^ ME[14]; + out[16*i+15] = SE[get_byte(3, B2)] ^ ME[15]; } } diff --git a/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp b/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp index ef24795bb..d8c7e7314 100644 --- a/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp +++ b/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp @@ -355,7 +355,7 @@ void AES_128::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const CT::poison(in, blocks * block_size()); - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { __m128i B = _mm_loadu_si128(in_mm + i); _mm_storeu_si128(out_mm + i, aes_ssse3_encrypt(B, keys, 10)); @@ -378,7 +378,7 @@ void AES_128::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const CT::poison(in, blocks * block_size()); - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { __m128i B = _mm_loadu_si128(in_mm + i); _mm_storeu_si128(out_mm + i, aes_ssse3_decrypt(B, keys, 10)); diff --git a/src/lib/block/blowfish/blowfish.cpp b/src/lib/block/blowfish/blowfish.cpp index 7a06cf797..69d345baa 100644 --- a/src/lib/block/blowfish/blowfish.cpp +++ b/src/lib/block/blowfish/blowfish.cpp @@ -202,10 +202,10 @@ void Blowfish::encrypt_n(const byte in[], byte out[], size_t blocks) const const u32bit* S3 = &m_S[512]; const u32bit* S4 = &m_S[768]; - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit L = load_be<u32bit>(in, 0); - u32bit R = load_be<u32bit>(in, 1); + u32bit L, R; + load_be(in + BLOCK_SIZE*i, L, R); for(size_t j = 0; j != 16; j += 2) { @@ -220,10 +220,7 @@ void Blowfish::encrypt_n(const byte in[], byte out[], size_t blocks) const L ^= m_P[16]; R ^= m_P[17]; - store_be(out, R, L); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(out + BLOCK_SIZE*i, R, L); } } @@ -237,10 +234,10 @@ void Blowfish::decrypt_n(const byte in[], byte out[], size_t blocks) const const u32bit* S3 = &m_S[512]; const u32bit* S4 = &m_S[768]; - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit L = load_be<u32bit>(in, 0); - u32bit R = load_be<u32bit>(in, 1); + u32bit L, R; + load_be(in + BLOCK_SIZE*i, L, R); for(size_t j = 17; j != 1; j -= 2) { @@ -255,10 +252,7 @@ void Blowfish::decrypt_n(const byte in[], byte out[], size_t blocks) const L ^= m_P[1]; R ^= m_P[0]; - store_be(out, R, L); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(out + BLOCK_SIZE*i, R, L); } } diff --git a/src/lib/block/camellia/camellia.cpp b/src/lib/block/camellia/camellia.cpp index ac5d57d4e..5ac13b9ab 100644 --- a/src/lib/block/camellia/camellia.cpp +++ b/src/lib/block/camellia/camellia.cpp @@ -645,10 +645,10 @@ inline u64bit FLINV(u64bit v, u64bit K) void encrypt(const byte in[], byte out[], size_t blocks, const secure_vector<u64bit>& SK, const size_t rounds) { - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u64bit D1 = load_be<u64bit>(in, 0); - u64bit D2 = load_be<u64bit>(in, 1); + u64bit D1, D2; + load_be(in + 16*i, D1, D2); const u64bit* K = SK.data(); @@ -676,10 +676,7 @@ void encrypt(const byte in[], byte out[], size_t blocks, D2 ^= *K++; D1 ^= *K++; - store_be(out, D2, D1); - - in += 16; - out += 16; + store_be(out + 16*i, D2, D1); } } @@ -689,10 +686,10 @@ void encrypt(const byte in[], byte out[], size_t blocks, void decrypt(const byte in[], byte out[], size_t blocks, const secure_vector<u64bit>& SK, const size_t rounds) { - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u64bit D1 = load_be<u64bit>(in, 0); - u64bit D2 = load_be<u64bit>(in, 1); + u64bit D1, D2; + load_be(in + 16*i, D1, D2); const u64bit* K = &SK[SK.size()-1]; @@ -720,10 +717,7 @@ void decrypt(const byte in[], byte out[], size_t blocks, D1 ^= *K--; D2 ^= *K; - store_be(out, D2, D1); - - in += 16; - out += 16; + store_be(out + 16*i, D2, D1); } } diff --git a/src/lib/block/cast/cast128.cpp b/src/lib/block/cast/cast128.cpp index 53f7d4611..96c4f45a7 100644 --- a/src/lib/block/cast/cast128.cpp +++ b/src/lib/block/cast/cast128.cpp @@ -50,10 +50,10 @@ inline void R3(u32bit& L, u32bit R, u32bit MK, byte RK) */ void CAST_128::encrypt_n(const byte in[], byte out[], size_t blocks) const { - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit L = load_be<u32bit>(in, 0); - u32bit R = load_be<u32bit>(in, 1); + u32bit L, R; + load_be(in + BLOCK_SIZE*i, L, R); R1(L, R, m_MK[ 0], m_RK[ 0]); R2(R, L, m_MK[ 1], m_RK[ 1]); @@ -72,10 +72,7 @@ void CAST_128::encrypt_n(const byte in[], byte out[], size_t blocks) const R3(L, R, m_MK[14], m_RK[14]); R1(R, L, m_MK[15], m_RK[15]); - store_be(out, R, L); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(out + BLOCK_SIZE*i, R, L); } } @@ -84,10 +81,10 @@ void CAST_128::encrypt_n(const byte in[], byte out[], size_t blocks) const */ void CAST_128::decrypt_n(const byte in[], byte out[], size_t blocks) const { - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit L = load_be<u32bit>(in, 0); - u32bit R = load_be<u32bit>(in, 1); + u32bit L, R; + load_be(in + BLOCK_SIZE*i, L, R); R1(L, R, m_MK[15], m_RK[15]); R3(R, L, m_MK[14], m_RK[14]); @@ -106,10 +103,7 @@ void CAST_128::decrypt_n(const byte in[], byte out[], size_t blocks) const R2(L, R, m_MK[ 1], m_RK[ 1]); R1(R, L, m_MK[ 0], m_RK[ 0]); - store_be(out, R, L); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(out + BLOCK_SIZE*i, R, L); } } diff --git a/src/lib/block/des/des.cpp b/src/lib/block/des/des.cpp index 88671df8d..a55c43ec7 100644 --- a/src/lib/block/des/des.cpp +++ b/src/lib/block/des/des.cpp @@ -144,12 +144,12 @@ void des_decrypt(u32bit& L, u32bit& R, */ void DES::encrypt_n(const byte in[], byte out[], size_t blocks) const { - for(size_t i = 0; i != blocks; ++i) + for(size_t i = 0; i < blocks; ++i) { - u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | - (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | - (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | - (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + u64bit T = (DES_IPTAB1[in[8*i+0]] ) | (DES_IPTAB1[in[8*i+1]] << 1) | + (DES_IPTAB1[in[8*i+2]] << 2) | (DES_IPTAB1[in[8*i+3]] << 3) | + (DES_IPTAB1[in[8*i+4]] << 4) | (DES_IPTAB1[in[8*i+5]] << 5) | + (DES_IPTAB1[in[8*i+6]] << 6) | (DES_IPTAB2[in[8*i+7]] ); u32bit L = static_cast<u32bit>(T >> 32); u32bit R = static_cast<u32bit>(T); @@ -162,10 +162,7 @@ void DES::encrypt_n(const byte in[], byte out[], size_t blocks) const (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); T = rotate_left(T, 32); - store_be(T, out); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(T, out + 8*i); } } @@ -174,12 +171,12 @@ void DES::encrypt_n(const byte in[], byte out[], size_t blocks) const */ void DES::decrypt_n(const byte in[], byte out[], size_t blocks) const { - for(size_t i = 0; i != blocks; ++i) + for(size_t i = 0; i < blocks; ++i) { - u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | - (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | - (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | - (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + u64bit T = (DES_IPTAB1[in[BLOCK_SIZE*i+0]] ) | (DES_IPTAB1[in[BLOCK_SIZE*i+1]] << 1) | + (DES_IPTAB1[in[BLOCK_SIZE*i+2]] << 2) | (DES_IPTAB1[in[BLOCK_SIZE*i+3]] << 3) | + (DES_IPTAB1[in[BLOCK_SIZE*i+4]] << 4) | (DES_IPTAB1[in[BLOCK_SIZE*i+5]] << 5) | + (DES_IPTAB1[in[BLOCK_SIZE*i+6]] << 6) | (DES_IPTAB2[in[BLOCK_SIZE*i+7]] ); u32bit L = static_cast<u32bit>(T >> 32); u32bit R = static_cast<u32bit>(T); @@ -193,10 +190,7 @@ void DES::decrypt_n(const byte in[], byte out[], size_t blocks) const T = rotate_left(T, 32); - store_be(T, out); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(T, out + BLOCK_SIZE*i); } } diff --git a/src/lib/block/idea/idea.cpp b/src/lib/block/idea/idea.cpp index 85cc5e757..1fe25d599 100644 --- a/src/lib/block/idea/idea.cpp +++ b/src/lib/block/idea/idea.cpp @@ -67,12 +67,10 @@ void idea_op(const byte in[], byte out[], size_t blocks, const u16bit K[52]) CT::poison(out, blocks * 8); CT::poison(K, 52); - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u16bit X1 = load_be<u16bit>(in + BLOCK_SIZE*i, 0); - u16bit X2 = load_be<u16bit>(in + BLOCK_SIZE*i, 1); - u16bit X3 = load_be<u16bit>(in + BLOCK_SIZE*i, 2); - u16bit X4 = load_be<u16bit>(in + BLOCK_SIZE*i, 3); + u16bit X1, X2, X3, X4; + load_be(in + BLOCK_SIZE*i, X1, X2, X3, X4); for(size_t j = 0; j != 8; ++j) { diff --git a/src/lib/block/serpent/serpent.cpp b/src/lib/block/serpent/serpent.cpp index 07088211d..a1326b888 100644 --- a/src/lib/block/serpent/serpent.cpp +++ b/src/lib/block/serpent/serpent.cpp @@ -70,12 +70,10 @@ void Serpent::encrypt_n(const byte in[], byte out[], size_t blocks) const } #endif - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) { - u32bit B0 = load_le<u32bit>(in, 0); - u32bit B1 = load_le<u32bit>(in, 1); - u32bit B2 = load_le<u32bit>(in, 2); - u32bit B3 = load_le<u32bit>(in, 3); + u32bit B0, B1, B2, B3; + load_le(in + 16*i, B0, B1, B2, B3); key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -110,10 +108,7 @@ void Serpent::encrypt_n(const byte in[], byte out[], size_t blocks) const key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); - store_le(out, B0, B1, B2, B3); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_le(out + 16*i, B0, B1, B2, B3); } } @@ -135,12 +130,10 @@ void Serpent::decrypt_n(const byte in[], byte out[], size_t blocks) const } #endif - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) { - u32bit B0 = load_le<u32bit>(in, 0); - u32bit B1 = load_le<u32bit>(in, 1); - u32bit B2 = load_le<u32bit>(in, 2); - u32bit B3 = load_le<u32bit>(in, 3); + u32bit B0, B1, B2, B3; + load_le(in + 16*i, B0, B1, B2, B3); key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); @@ -175,10 +168,7 @@ void Serpent::decrypt_n(const byte in[], byte out[], size_t blocks) const i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); - store_le(out, B0, B1, B2, B3); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_le(out + 16*i, B0, B1, B2, B3); } } @@ -205,24 +195,47 @@ void Serpent::key_schedule(const byte key[], size_t length) W[i] = rotate_left(wi, 11); } - SBoxE4(W[ 8],W[ 9],W[ 10],W[ 11]); SBoxE3(W[ 12],W[ 13],W[ 14],W[ 15]); - SBoxE2(W[ 16],W[ 17],W[ 18],W[ 19]); SBoxE1(W[ 20],W[ 21],W[ 22],W[ 23]); - SBoxE8(W[ 24],W[ 25],W[ 26],W[ 27]); SBoxE7(W[ 28],W[ 29],W[ 30],W[ 31]); - SBoxE6(W[ 32],W[ 33],W[ 34],W[ 35]); SBoxE5(W[ 36],W[ 37],W[ 38],W[ 39]); - SBoxE4(W[ 40],W[ 41],W[ 42],W[ 43]); SBoxE3(W[ 44],W[ 45],W[ 46],W[ 47]); - SBoxE2(W[ 48],W[ 49],W[ 50],W[ 51]); SBoxE1(W[ 52],W[ 53],W[ 54],W[ 55]); - SBoxE8(W[ 56],W[ 57],W[ 58],W[ 59]); SBoxE7(W[ 60],W[ 61],W[ 62],W[ 63]); - SBoxE6(W[ 64],W[ 65],W[ 66],W[ 67]); SBoxE5(W[ 68],W[ 69],W[ 70],W[ 71]); - SBoxE4(W[ 72],W[ 73],W[ 74],W[ 75]); SBoxE3(W[ 76],W[ 77],W[ 78],W[ 79]); - SBoxE2(W[ 80],W[ 81],W[ 82],W[ 83]); SBoxE1(W[ 84],W[ 85],W[ 86],W[ 87]); - SBoxE8(W[ 88],W[ 89],W[ 90],W[ 91]); SBoxE7(W[ 92],W[ 93],W[ 94],W[ 95]); - SBoxE6(W[ 96],W[ 97],W[ 98],W[ 99]); SBoxE5(W[100],W[101],W[102],W[103]); - SBoxE4(W[104],W[105],W[106],W[107]); SBoxE3(W[108],W[109],W[110],W[111]); - SBoxE2(W[112],W[113],W[114],W[115]); SBoxE1(W[116],W[117],W[118],W[119]); - SBoxE8(W[120],W[121],W[122],W[123]); SBoxE7(W[124],W[125],W[126],W[127]); - SBoxE6(W[128],W[129],W[130],W[131]); SBoxE5(W[132],W[133],W[134],W[135]); + SBoxE1(W[ 20],W[ 21],W[ 22],W[ 23]); + SBoxE1(W[ 52],W[ 53],W[ 54],W[ 55]); + SBoxE1(W[ 84],W[ 85],W[ 86],W[ 87]); + SBoxE1(W[116],W[117],W[118],W[119]); + + SBoxE2(W[ 16],W[ 17],W[ 18],W[ 19]); + SBoxE2(W[ 48],W[ 49],W[ 50],W[ 51]); + SBoxE2(W[ 80],W[ 81],W[ 82],W[ 83]); + SBoxE2(W[112],W[113],W[114],W[115]); + + SBoxE3(W[ 12],W[ 13],W[ 14],W[ 15]); + SBoxE3(W[ 44],W[ 45],W[ 46],W[ 47]); + SBoxE3(W[ 76],W[ 77],W[ 78],W[ 79]); + SBoxE3(W[108],W[109],W[110],W[111]); + + SBoxE4(W[ 8],W[ 9],W[ 10],W[ 11]); + SBoxE4(W[ 40],W[ 41],W[ 42],W[ 43]); + SBoxE4(W[ 72],W[ 73],W[ 74],W[ 75]); + SBoxE4(W[104],W[105],W[106],W[107]); SBoxE4(W[136],W[137],W[138],W[139]); + SBoxE5(W[ 36],W[ 37],W[ 38],W[ 39]); + SBoxE5(W[ 68],W[ 69],W[ 70],W[ 71]); + SBoxE5(W[100],W[101],W[102],W[103]); + SBoxE5(W[132],W[133],W[134],W[135]); + + SBoxE6(W[ 32],W[ 33],W[ 34],W[ 35]); + SBoxE6(W[ 64],W[ 65],W[ 66],W[ 67]); + SBoxE6(W[ 96],W[ 97],W[ 98],W[ 99]); + SBoxE6(W[128],W[129],W[130],W[131]); + + SBoxE7(W[ 28],W[ 29],W[ 30],W[ 31]); + SBoxE7(W[ 60],W[ 61],W[ 62],W[ 63]); + SBoxE7(W[ 92],W[ 93],W[ 94],W[ 95]); + SBoxE7(W[124],W[125],W[126],W[127]); + + SBoxE8(W[ 24],W[ 25],W[ 26],W[ 27]); + SBoxE8(W[ 56],W[ 57],W[ 58],W[ 59]); + SBoxE8(W[ 88],W[ 89],W[ 90],W[ 91]); + SBoxE8(W[120],W[121],W[122],W[123]); + m_round_key.assign(W.begin() + 8, W.end()); } diff --git a/src/lib/block/threefish/threefish.cpp b/src/lib/block/threefish/threefish.cpp index f592021fb..2acdef020 100644 --- a/src/lib/block/threefish/threefish.cpp +++ b/src/lib/block/threefish/threefish.cpp @@ -122,16 +122,10 @@ void Threefish_512::encrypt_n(const byte in[], byte out[], size_t blocks) const } #endif - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u64bit X0 = load_le<u64bit>(in, 0); - u64bit X1 = load_le<u64bit>(in, 1); - u64bit X2 = load_le<u64bit>(in, 2); - u64bit X3 = load_le<u64bit>(in, 3); - u64bit X4 = load_le<u64bit>(in, 4); - u64bit X5 = load_le<u64bit>(in, 5); - u64bit X6 = load_le<u64bit>(in, 6); - u64bit X7 = load_le<u64bit>(in, 7); + u64bit X0, X1, X2, X3, X4, X5, X6, X7; + load_le(in + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); THREEFISH_INJECT_KEY(0); @@ -145,10 +139,7 @@ void Threefish_512::encrypt_n(const byte in[], byte out[], size_t blocks) const THREEFISH_ENC_8_ROUNDS(15,16); THREEFISH_ENC_8_ROUNDS(17,18); - store_le(out, X0, X1, X2, X3, X4, X5, X6, X7); - - in += 64; - out += 64; + store_le(out + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); } } @@ -211,16 +202,10 @@ void Threefish_512::decrypt_n(const byte in[], byte out[], size_t blocks) const THREEFISH_INJECT_KEY(R2); \ } while(0) - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u64bit X0 = load_le<u64bit>(in, 0); - u64bit X1 = load_le<u64bit>(in, 1); - u64bit X2 = load_le<u64bit>(in, 2); - u64bit X3 = load_le<u64bit>(in, 3); - u64bit X4 = load_le<u64bit>(in, 4); - u64bit X5 = load_le<u64bit>(in, 5); - u64bit X6 = load_le<u64bit>(in, 6); - u64bit X7 = load_le<u64bit>(in, 7); + u64bit X0, X1, X2, X3, X4, X5, X6, X7; + load_le(in + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); THREEFISH_INJECT_KEY(18); @@ -234,10 +219,7 @@ void Threefish_512::decrypt_n(const byte in[], byte out[], size_t blocks) const THREEFISH_DEC_8_ROUNDS(3,2); THREEFISH_DEC_8_ROUNDS(1,0); - store_le(out, X0, X1, X2, X3, X4, X5, X6, X7); - - in += 64; - out += 64; + store_le(out + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); } #undef THREEFISH_DEC_8_ROUNDS diff --git a/src/lib/block/twofish/twofish.cpp b/src/lib/block/twofish/twofish.cpp index 336d73a03..a98ae8e70 100644 --- a/src/lib/block/twofish/twofish.cpp +++ b/src/lib/block/twofish/twofish.cpp @@ -19,12 +19,15 @@ namespace Botan { */ void Twofish::encrypt_n(const byte in[], byte out[], size_t blocks) const { - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit A = load_le<u32bit>(in, 0) ^ m_RK[0]; - u32bit B = load_le<u32bit>(in, 1) ^ m_RK[1]; - u32bit C = load_le<u32bit>(in, 2) ^ m_RK[2]; - u32bit D = load_le<u32bit>(in, 3) ^ m_RK[3]; + u32bit A, B, C, D; + load_le(in + BLOCK_SIZE*i, A, B, C, D); + + A ^= m_RK[0]; + B ^= m_RK[1]; + C ^= m_RK[2]; + D ^= m_RK[3]; for(size_t j = 0; j != 16; j += 2) { @@ -58,10 +61,7 @@ void Twofish::encrypt_n(const byte in[], byte out[], size_t blocks) const A ^= m_RK[6]; B ^= m_RK[7]; - store_le(out, C, D, A, B); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_le(out + BLOCK_SIZE*i, C, D, A, B); } } @@ -70,12 +70,15 @@ void Twofish::encrypt_n(const byte in[], byte out[], size_t blocks) const */ void Twofish::decrypt_n(const byte in[], byte out[], size_t blocks) const { - for(size_t i = 0; i != blocks; ++i) + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) { - u32bit A = load_le<u32bit>(in, 0) ^ m_RK[4]; - u32bit B = load_le<u32bit>(in, 1) ^ m_RK[5]; - u32bit C = load_le<u32bit>(in, 2) ^ m_RK[6]; - u32bit D = load_le<u32bit>(in, 3) ^ m_RK[7]; + u32bit A, B, C, D; + load_le(in + BLOCK_SIZE*i, A, B, C, D); + + A ^= m_RK[4]; + B ^= m_RK[5]; + C ^= m_RK[6]; + D ^= m_RK[7]; for(size_t j = 0; j != 16; j += 2) { @@ -109,10 +112,7 @@ void Twofish::decrypt_n(const byte in[], byte out[], size_t blocks) const A ^= m_RK[2]; B ^= m_RK[3]; - store_le(out, C, D, A, B); - - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_le(out + BLOCK_SIZE*i, C, D, A, B); } } @@ -139,7 +139,7 @@ void Twofish::key_schedule(const byte key[], size_t length) m_SB[768+i] = MDS3[Q1[Q1[i]^S[ 3]]^S[ 7]]; } - for(size_t i = 0; i != 40; i += 2) + BOTAN_PARALLEL_FOR(size_t i = 0; i < 40; i += 2) { u32bit X = MDS0[Q0[Q0[i ]^key[ 8]]^key[ 0]] ^ MDS1[Q0[Q1[i ]^key[ 9]]^key[ 1]] ^ @@ -166,7 +166,7 @@ void Twofish::key_schedule(const byte key[], size_t length) m_SB[768+i] = MDS3[Q1[Q1[Q0[i]^S[ 3]]^S[ 7]]^S[11]]; } - for(size_t i = 0; i != 40; i += 2) + BOTAN_PARALLEL_FOR(size_t i = 0; i < 40; i += 2) { u32bit X = MDS0[Q0[Q0[Q1[i ]^key[16]]^key[ 8]]^key[ 0]] ^ MDS1[Q0[Q1[Q1[i ]^key[17]]^key[ 9]]^key[ 1]] ^ @@ -193,7 +193,7 @@ void Twofish::key_schedule(const byte key[], size_t length) m_SB[768+i] = MDS3[Q1[Q1[Q0[Q1[i]^S[ 3]]^S[ 7]]^S[11]]^S[15]]; } - for(size_t i = 0; i != 40; i += 2) + BOTAN_PARALLEL_FOR(size_t i = 0; i < 40; i += 2) { u32bit X = MDS0[Q0[Q0[Q1[Q1[i ]^key[24]]^key[16]]^key[ 8]]^key[ 0]] ^ MDS1[Q0[Q1[Q1[Q0[i ]^key[25]]^key[17]]^key[ 9]]^key[ 1]] ^ diff --git a/src/lib/block/xtea/xtea.cpp b/src/lib/block/xtea/xtea.cpp index 333406d9b..4e5ca7e7c 100644 --- a/src/lib/block/xtea/xtea.cpp +++ b/src/lib/block/xtea/xtea.cpp @@ -1,6 +1,6 @@ /* * XTEA -* (C) 1999-2009 Jack Lloyd +* (C) 1999-2009,2016 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -10,80 +10,49 @@ namespace Botan { -namespace { - -void xtea_encrypt_4(const byte in[32], byte out[32], const u32bit EK[64]) - { - u32bit L0, R0, L1, R1, L2, R2, L3, R3; - load_be(in, L0, R0, L1, R1, L2, R2, L3, R3); - - for(size_t i = 0; i != 32; ++i) - { - L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[2*i]; - L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[2*i]; - L2 += (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[2*i]; - L3 += (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[2*i]; - - R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[2*i+1]; - R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[2*i+1]; - R2 += (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[2*i+1]; - R3 += (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[2*i+1]; - } - - store_be(out, L0, R0, L1, R1, L2, R2, L3, R3); - } - -void xtea_decrypt_4(const byte in[32], byte out[32], const u32bit EK[64]) - { - u32bit L0, R0, L1, R1, L2, R2, L3, R3; - load_be(in, L0, R0, L1, R1, L2, R2, L3, R3); - - for(size_t i = 0; i != 32; ++i) - { - R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[63 - 2*i]; - R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[63 - 2*i]; - R2 -= (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[63 - 2*i]; - R3 -= (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[63 - 2*i]; - - L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[62 - 2*i]; - L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[62 - 2*i]; - L2 -= (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[62 - 2*i]; - L3 -= (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[62 - 2*i]; - } - - store_be(out, L0, R0, L1, R1, L2, R2, L3, R3); - } - -} - /* * XTEA Encryption */ void XTEA::encrypt_n(const byte in[], byte out[], size_t blocks) const { - while(blocks >= 4) - { - xtea_encrypt_4(in, out, &(this->m_EK[0])); - in += 4 * BLOCK_SIZE; - out += 4 * BLOCK_SIZE; - blocks -= 4; - } + const u32bit* EK = &m_EK[0]; - for(size_t i = 0; i != blocks; ++i) + const size_t blocks4 = blocks / 4; + const size_t blocks_left = blocks % 4; + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++) { - u32bit L = load_be<u32bit>(in, 0); - u32bit R = load_be<u32bit>(in, 1); + u32bit L0, R0, L1, R1, L2, R2, L3, R3; + load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); - for(size_t j = 0; j != 32; ++j) + for(size_t r = 0; r != 32; ++r) { - L += (((R << 4) ^ (R >> 5)) + R) ^ m_EK[2*j]; - R += (((L << 4) ^ (L >> 5)) + L) ^ m_EK[2*j+1]; + L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[2*r]; + L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[2*r]; + L2 += (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[2*r]; + L3 += (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[2*r]; + + R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[2*r+1]; + R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[2*r+1]; + R2 += (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[2*r+1]; + R3 += (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[2*r+1]; } - store_be(out, L, R); + store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); + } + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i) + { + u32bit L, R; + load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R); + + for(size_t r = 0; r != 32; ++r) + { + L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*r]; + R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*r+1]; + } - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R); } } @@ -92,29 +61,44 @@ void XTEA::encrypt_n(const byte in[], byte out[], size_t blocks) const */ void XTEA::decrypt_n(const byte in[], byte out[], size_t blocks) const { - while(blocks >= 4) - { - xtea_decrypt_4(in, out, &(this->m_EK[0])); - in += 4 * BLOCK_SIZE; - out += 4 * BLOCK_SIZE; - blocks -= 4; - } + const u32bit* EK = &m_EK[0]; - for(size_t i = 0; i != blocks; ++i) + const size_t blocks4 = blocks / 4; + const size_t blocks_left = blocks % 4; + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++) { - u32bit L = load_be<u32bit>(in, 0); - u32bit R = load_be<u32bit>(in, 1); + u32bit L0, R0, L1, R1, L2, R2, L3, R3; + load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); - for(size_t j = 0; j != 32; ++j) + for(size_t r = 0; r != 32; ++r) { - R -= (((L << 4) ^ (L >> 5)) + L) ^ m_EK[63 - 2*j]; - L -= (((R << 4) ^ (R >> 5)) + R) ^ m_EK[62 - 2*j]; + R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[63 - 2*r]; + R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[63 - 2*r]; + R2 -= (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[63 - 2*r]; + R3 -= (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[63 - 2*r]; + + L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[62 - 2*r]; + L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[62 - 2*r]; + L2 -= (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[62 - 2*r]; + L3 -= (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[62 - 2*r]; } - store_be(out, L, R); + store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); + } + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i) + { + u32bit L, R; + load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R); + + for(size_t r = 0; r != 32; ++r) + { + R -= (((L << 4) ^ (L >> 5)) + L) ^ m_EK[63 - 2*r]; + L -= (((R << 4) ^ (R >> 5)) + R) ^ m_EK[62 - 2*r]; + } - in += BLOCK_SIZE; - out += BLOCK_SIZE; + store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R); } } diff --git a/src/lib/utils/compiler.h b/src/lib/utils/compiler.h new file mode 100644 index 000000000..50441208b --- /dev/null +++ b/src/lib/utils/compiler.h @@ -0,0 +1,169 @@ +/* +* Define useful compiler-specific macros +* (C) 2016 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_UTIL_COMPILER_FLAGS_H__ +#define BOTAN_UTIL_COMPILER_FLAGS_H__ + +/* Should we use GCC-style inline assembler? */ +#if !defined(BOTAN_USE_GCC_INLINE_ASM) && defined(__GNUC__) + #define BOTAN_USE_GCC_INLINE_ASM 1 +#endif + +/* +* Define BOTAN_GCC_VERSION +*/ +#ifdef __GNUC__ + #define BOTAN_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__) +#else + #define BOTAN_GCC_VERSION 0 +#endif + +/* +* Define BOTAN_CLANG_VERSION +*/ +#ifdef __clang__ + #define BOTAN_CLANG_VERSION (__clang_major__ * 10 + __clang_minor__) +#else + #define BOTAN_CLANG_VERSION 0 +#endif + +/* +* Define BOTAN_FUNC_ISA +*/ +#if defined(__GNUG__) || defined(__clang__) + #define BOTAN_FUNC_ISA(isa) __attribute__ ((target(isa))) +#else + #define BOTAN_FUNC_ISA(isa) +#endif + +/* +* Define BOTAN_WARN_UNUSED_RESULT +*/ +#if defined(__GNUG__) || defined(__clang__) + #define BOTAN_WARN_UNUSED_RESULT __attribute__ ((warn_unused_result)) +#else + #define BOTAN_WARN_UNUSED_RESULT +#endif + +/* +* Define BOTAN_DEPRECATED +*/ +#if !defined(BOTAN_NO_DEPRECATED_WARNINGS) + + #if defined(__clang__) + #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated)) + + #elif defined(_MSC_VER) + #define BOTAN_DEPRECATED(msg) __declspec(deprecated(msg)) + + #elif defined(__GNUG__) + // msg supported since GCC 4.5, earliest we support is 4.8 + #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated(msg))) + #endif + +#endif + +#if !defined(BOTAN_DEPRECATED) + #define BOTAN_DEPRECATED(msg) +#endif + +/* +* Define BOTAN_NORETURN +*/ +#if !defined(BOTAN_NORETURN) + + #if defined (__clang__) || defined (__GNUG__) + #define BOTAN_NORETURN __attribute__ ((__noreturn__)) + + #elif defined (_MSC_VER) + #define BOTAN_NORETURN __declspec(noreturn) + + #else + #define BOTAN_NORETURN + #endif + +#endif + +/* +* Define BOTAN_CURRENT_FUNCTION +*/ +#if defined(_MSC_VER) + #define BOTAN_CURRENT_FUNCTION __FUNCTION__ +#else + #define BOTAN_CURRENT_FUNCTION __func__ +#endif + +/* +* Define BOTAN_NOEXCEPT (for MSVC 2013) +*/ +#if defined(_MSC_VER) && (_MSC_VER < 1900) + // noexcept is not supported in VS 2013 + #include <yvals.h> + #define BOTAN_NOEXCEPT _NOEXCEPT +#else + #define BOTAN_NOEXCEPT noexcept +#endif + +/* +* Define BOTAN_PARALLEL_FOR +*/ +#if !defined(BOTAN_PARALLEL_FOR) + +#if defined(BOTAN_TARGET_HAS_CILKPLUS) + #define BOTAN_PARALLEL_FOR _Cilk_for +#elif defined(BOTAN_TARGET_HAS_OPENMP) + #define BOTAN_PARALLEL_FOR _Pragma("omp parallel for") for +#else + #define BOTAN_PARALLEL_FOR for +#endif + +#endif + +/* +* Define BOTAN_PARALLEL_SIMD_FOR +*/ +#if !defined(BOTAN_PARALLEL_SIMD_FOR) + +#if defined(BOTAN_TARGET_HAS_CILKPLUS) + #define BOTAN_PARALLEL_SIMD_FOR _Pragma("simd") for +#elif defined(BOTAN_TARGET_HAS_OPENMP) + #define BOTAN_PARALLEL_SIMD_FOR _Pragma("omp simd") for +#elif defined(BOTAN_TARGET_COMPILER_IS_GCC) + #define BOTAN_PARALLEL_FOR _Pragma("GCC ivdep") for +#else + #define BOTAN_PARALLEL_SIMD_FOR for +#endif + +#endif + +/* +* Define BOTAN_PARALLEL_SPAWN +*/ +#if !defined(BOTAN_PARALLEL_SPAWN) + +#if defined(BOTAN_TARGET_HAS_CILKPLUS) + #define BOTAN_PARALLEL_SPAWN _Cilk_spawn +#else + #define BOTAN_PARALLEL_SPAWN +#endif + +#endif + +/* +* Define BOTAN_PARALLEL_SYNC +*/ +#if !defined(BOTAN_PARALLEL_SYNC) + +#if defined(BOTAN_TARGET_HAS_CILKPLUS) + #define BOTAN_PARALLEL_SYNC _Cilk_sync +#else + #define BOTAN_PARALLEL_SYNC BOTAN_FORCE_SEMICOLON +#endif + +#endif + +#endif diff --git a/src/lib/utils/info.txt b/src/lib/utils/info.txt index 75a428a83..820dd407d 100644 --- a/src/lib/utils/info.txt +++ b/src/lib/utils/info.txt @@ -8,6 +8,7 @@ bswap.h calendar.h charset.h cpuid.h +compiler.h data_src.h database.h exceptn.h diff --git a/src/lib/utils/loadstor.h b/src/lib/utils/loadstor.h index 9ae9fda0e..15ff6a708 100644 --- a/src/lib/utils/loadstor.h +++ b/src/lib/utils/loadstor.h @@ -324,10 +324,10 @@ inline void load_le(T out[], { if(count > 0) { -#if defined(BOTAN_TARGET_CPU_HAS_KNOWN_ENDIANNESS) +#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) + std::memcpy(out, in, sizeof(T)*count); +#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) std::memcpy(out, in, sizeof(T)*count); - -#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) const size_t blocks = count - (count % 4); const size_t left = count - blocks; @@ -336,8 +336,6 @@ inline void load_le(T out[], for(size_t i = 0; i != left; ++i) out[blocks+i] = reverse_bytes(out[blocks+i]); -#endif - #else for(size_t i = 0; i != count; ++i) out[i] = load_le<T>(in, i); @@ -416,10 +414,10 @@ inline void load_be(T out[], { if(count > 0) { -#if defined(BOTAN_TARGET_CPU_HAS_KNOWN_ENDIANNESS) +#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) + std::memcpy(out, in, sizeof(T)*count); +#elif defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) std::memcpy(out, in, sizeof(T)*count); - -#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) const size_t blocks = count - (count % 4); const size_t left = count - blocks; @@ -428,8 +426,6 @@ inline void load_be(T out[], for(size_t i = 0; i != left; ++i) out[blocks+i] = reverse_bytes(out[blocks+i]); -#endif - #else for(size_t i = 0; i != count; ++i) out[i] = load_be<T>(in, i); |