aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2016-11-27 14:54:30 -0500
committerJack Lloyd <[email protected]>2016-11-27 14:54:30 -0500
commitbd693b4746d5d8ccd65f07e67995ecf7cedacaa2 (patch)
tree94babf2a8306dd0bf78a0da48f28ceca8a10080e
parent9f28af95363cffa33e1745cc94814f86e937edce (diff)
parenta17e3bfa187adb5c6d7d80c78d5df8c1f146c647 (diff)
Merge GH #590 Initial Cilk/OpenMP support
-rwxr-xr-xconfigure.py20
-rw-r--r--src/build-data/buildh.in170
-rw-r--r--src/build-data/cc/clang.txt2
-rw-r--r--src/build-data/cc/gcc.txt3
-rw-r--r--src/lib/block/aes/aes.cpp48
-rw-r--r--src/lib/block/aes/aes_ssse3/aes_ssse3.cpp4
-rw-r--r--src/lib/block/blowfish/blowfish.cpp22
-rw-r--r--src/lib/block/camellia/camellia.cpp22
-rw-r--r--src/lib/block/cast/cast128.cpp22
-rw-r--r--src/lib/block/des/des.cpp30
-rw-r--r--src/lib/block/idea/idea.cpp8
-rw-r--r--src/lib/block/serpent/serpent.cpp81
-rw-r--r--src/lib/block/threefish/threefish.cpp34
-rw-r--r--src/lib/block/twofish/twofish.cpp42
-rw-r--r--src/lib/block/xtea/xtea.cpp142
-rw-r--r--src/lib/utils/compiler.h169
-rw-r--r--src/lib/utils/info.txt1
-rw-r--r--src/lib/utils/loadstor.h16
18 files changed, 445 insertions, 391 deletions
diff --git a/configure.py b/configure.py
index 0fabeed3e..789377129 100755
--- a/configure.py
+++ b/configure.py
@@ -323,6 +323,11 @@ def process_command_line(args):
build_group.add_option('--with-external-includedir', metavar='DIR', default='',
help='use DIR for external includes')
+ build_group.add_option('--with-openmp', default=False, action='store_true',
+ help='enable use of OpenMP')
+ build_group.add_option('--with-cilkplus', default=False, action='store_true',
+ help='enable use of Cilk Plus')
+
link_methods = ['symlink', 'hardlink', 'copy']
build_group.add_option('--link-method', default=None, metavar='METHOD',
choices=link_methods,
@@ -850,6 +855,11 @@ class ArchInfo(object):
if options.with_valgrind:
macros.append('HAS_VALGRIND')
+ if options.with_openmp:
+ macros.append('TARGET_HAS_OPENMP')
+ if options.with_cilkplus:
+ macros.append('TARGET_HAS_CILKPLUS')
+
return macros
class CompilerInfo(object):
@@ -953,6 +963,16 @@ class CompilerInfo(object):
raise Exception('No sanitizer handling for %s' % (self.basename))
abi_link.append(self.sanitizer_flags)
+ if options.with_openmp:
+ if 'openmp' not in self.mach_abi_linking:
+ raise Exception('No support for OpenMP for %s' % (self.basename))
+ abi_link.append(self.mach_abi_linking['openmp'])
+
+ if options.with_cilkplus:
+ if 'cilkplus' not in self.mach_abi_linking:
+ raise Exception('No support for Cilk Plus for %s' % (self.basename))
+ abi_link.append(self.mach_abi_linking['cilkplus'])
+
abi_flags = ' '.join(sorted(abi_link))
if options.cc_abi_flags != '':
diff --git a/src/build-data/buildh.in b/src/build-data/buildh.in
index 56b70e060..ba6eee97d 100644
--- a/src/build-data/buildh.in
+++ b/src/build-data/buildh.in
@@ -21,8 +21,10 @@
#define BOTAN_VERSION_VC_REVISION "%{version_vc_rev}"
#define BOTAN_DISTRIBUTION_INFO "%{distribution_info}"
-%{unsafe_fuzzer_mode_define}
+/* How many bits per limb in a BigInt */
+#define BOTAN_MP_WORD_BITS %{mp_bits}
+%{unsafe_fuzzer_mode_define}
#define BOTAN_INSTALL_PREFIX R"(%{prefix})"
#define BOTAN_INSTALL_HEADER_DIR "%{includedir}/botan-%{version_major}.%{version_minor}"
@@ -33,6 +35,28 @@
#define BOTAN_DLL %{visibility_attribute}
#endif
+/* Target identification and feature test macros */
+%{target_os_defines}
+
+%{target_cpu_defines}
+
+%{target_compiler_defines}
+
+/*
+* Module availability definitions
+*/
+%{module_defines}
+
+/*
+* Local/misc configuration options (if any) follow
+*/
+%{local_config}
+%{misc_config}
+
+/*
+* Things you can edit (but probably shouldn't)
+*/
+
/* How much to allocate for a buffer of no particular size */
#define BOTAN_DEFAULT_BUFFER_SIZE 1024
@@ -49,12 +73,6 @@
*/
#define BOTAN_MLOCK_ALLOCATOR_MAX_LOCKED_KB 512
-/* Multiplier on a block cipher's native parallelism */
-#define BOTAN_BLOCK_CIPHER_PAR_MULT 4
-
-/* How many bits per limb in a BigInt */
-#define BOTAN_MP_WORD_BITS %{mp_bits}
-
/*
* If enabled uses memset via volatile function pointer to zero memory,
* otherwise does a byte at a time write via a volatile pointer.
@@ -114,6 +132,10 @@
{ "timestamp", "rdseed", "rdrand", "proc_info", \
"darwin_secrandom", "dev_random", "win32_cryptoapi", "proc_walk", "system_stats" }
+
+/* Multiplier on a block cipher's native parallelism */
+#define BOTAN_BLOCK_CIPHER_PAR_MULT 4
+
/*
* These control the RNG used by the system RNG interface
*/
@@ -156,32 +178,6 @@ Each read generates 32 bits of output
#define BOTAN_ENTROPY_RDSEED_RETRIES 20
/*
-* Compiler and target specific flags
-*/
-
-/* Should we use GCC-style inline assembler? */
-#if !defined(BOTAN_USE_GCC_INLINE_ASM) && defined(__GNUG__)
- #define BOTAN_USE_GCC_INLINE_ASM 1
-#endif
-
-#ifdef __GNUC__
- #define BOTAN_GCC_VERSION \
- (__GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__)
-#else
- #define BOTAN_GCC_VERSION 0
-#endif
-
-/* Target identification and feature test macros */
-%{target_os_defines}
-
-%{target_cpu_defines}
-
-#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) || \
- defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- #define BOTAN_TARGET_CPU_HAS_KNOWN_ENDIANNESS
-#endif
-
-/*
* If no way of dynamically determining the cache line size for the
* system exists, this value is used as the default. Used by the side
* channel countermeasures rather than for alignment purposes, so it is
@@ -192,93 +188,6 @@ Each read generates 32 bits of output
#define BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE 32
#endif
-%{target_compiler_defines}
-
-
-#if defined(__GNUG__) || defined(__clang__)
- #define BOTAN_FUNC_ISA(isa) __attribute__ ((target(isa)))
-#else
- #define BOTAN_FUNC_ISA(isa)
-#endif
-
-#if defined(__GNUG__) || defined(__clang__)
- #define BOTAN_WARN_UNUSED_RESULT __attribute__ ((warn_unused_result))
-#else
- #define BOTAN_WARN_UNUSED_RESULT
-#endif
-
-/*
-* Compile-time deprecation warnings
-*/
-#if !defined(BOTAN_NO_DEPRECATED_WARNINGS)
-
- #if defined(__clang__)
- #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated))
-
- #elif defined(_MSC_VER)
- #define BOTAN_DEPRECATED(msg) __declspec(deprecated(msg))
-
- #elif defined(__GNUG__)
-
- #if BOTAN_GCC_VERSION >= 450
- #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated(msg)))
- #else
- #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated))
- #endif
-
- #endif
-
-#endif
-
-#if !defined(BOTAN_NORETURN)
-
- #if defined (__clang__) || defined (__GNUG__)
- #define BOTAN_NORETURN __attribute__ ((__noreturn__))
-
- #elif defined (_MSC_VER)
- #define BOTAN_NORETURN __declspec(noreturn)
-
- #else
- #define BOTAN_NORETURN
-
- #endif
-
-#endif
-
-
-#if defined(_MSC_VER)
- #define BOTAN_CURRENT_FUNCTION __FUNCTION__
-#else
- #define BOTAN_CURRENT_FUNCTION __func__
-#endif
-
-#if !defined(BOTAN_DEPRECATED)
- #define BOTAN_DEPRECATED(msg)
-#endif
-
-#if defined(_MSC_VER) && (_MSC_VER < 1900)
- // noexcept is not supported in VS 2013
- #include <yvals.h>
- #define BOTAN_NOEXCEPT _NOEXCEPT
-#else
- #define BOTAN_NOEXCEPT noexcept
-#endif
-
-/*
-* Module availability definitions
-*/
-%{module_defines}
-
-/*
-* Local configuration options (if any) follow
-*/
-%{local_config}
-
-/*
-* Miscellaneous configuration options (if any) follow
-*/
-%{misc_config}
-
/**
* Controls how AutoSeeded_RNG is instantiated
*/
@@ -301,20 +210,21 @@ Each read generates 32 bits of output
// The struct is only declared to force the semicolon, it is never defined.
#define BOTAN_FORCE_SEMICOLON struct BOTAN_DUMMY_STRUCT
-#if defined(BOTAN_TARGET_ARCH_IS_X86_64) && (\
- (defined(_MSC_VER) && !defined(_WIN64)) || \
- (defined(__clang__) && !defined(__x86_64__)) || \
- (defined(__GNUG__) && !defined(__x86_64__)) \
-)
+// Check for a common build problem:
+
+#if defined(BOTAN_TARGET_ARCH_IS_X86_64) && ((defined(_MSC_VER) && !defined(_WIN64)) || \
+ (defined(__clang__) && !defined(__x86_64__)) || \
+ (defined(__GNUG__) && !defined(__x86_64__)))
#error "Trying to compile Botan configured as x86_64 with non-x86_64 compiler."
#endif
-#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (\
- (defined(_MSC_VER) && defined(_WIN64)) || \
- (defined(__clang__) && !defined(__i386__)) || \
- (defined(__GNUG__) && !defined(__i386__)) \
-)
+#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && ((defined(_MSC_VER) && defined(_WIN64)) || \
+ (defined(__clang__) && !defined(__i386__)) || \
+ (defined(__GNUG__) && !defined(__i386__)))
+
#error "Trying to compile Botan configured as x86_32 with non-x86_32 compiler."
#endif
+#include <botan/compiler.h>
+
#endif
diff --git a/src/build-data/cc/clang.txt b/src/build-data/cc/clang.txt
index c4a85658f..055315c3b 100644
--- a/src/build-data/cc/clang.txt
+++ b/src/build-data/cc/clang.txt
@@ -73,6 +73,8 @@ ivybridge -> "-march=core-avx-i"
<mach_abi_linking>
all -> "-pthread"
+openmp -> "-fopenmp"
+
x86_32 -> "-m32"
x86_64 -> "-m64"
ppc64 -> "-m64"
diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt
index b88454ce6..0a53e15c1 100644
--- a/src/build-data/cc/gcc.txt
+++ b/src/build-data/cc/gcc.txt
@@ -120,6 +120,9 @@ all_x86_64 -> "-momit-leaf-frame-pointer"
<mach_abi_linking>
all -> "-pthread -fstack-protector"
+cilkplus -> "-fcilkplus"
+openmp -> "-fopenmp"
+
mips64 -> "-mabi=64"
s390 -> "-m31"
s390x -> "-m64"
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp
index 39f5bd0db..8c7000135 100644
--- a/src/lib/block/aes/aes.cpp
+++ b/src/lib/block/aes/aes.cpp
@@ -168,12 +168,15 @@ void aes_encrypt_n(const byte in[], byte out[],
}
Z &= TE[82]; // this is zero, which hopefully the compiler cannot deduce
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit T0 = load_be<u32bit>(in, 0) ^ EK[0];
- u32bit T1 = load_be<u32bit>(in, 1) ^ EK[1];
- u32bit T2 = load_be<u32bit>(in, 2) ^ EK[2];
- u32bit T3 = load_be<u32bit>(in, 3) ^ EK[3];
+ u32bit T0, T1, T2, T3;
+ load_be(in + 16*i, T0, T1, T2, T3);
+
+ T0 ^= EK[0];
+ T1 ^= EK[1];
+ T2 ^= EK[2];
+ T3 ^= EK[3];
T0 ^= Z;
@@ -226,25 +229,22 @@ void aes_encrypt_n(const byte in[], byte out[],
TE[get_byte(2, T1) + 512] ^ TE[get_byte(3, T2) + 768];
}
- out[ 0] = SE[get_byte(0, B0)] ^ ME[0];
- out[ 1] = SE[get_byte(1, B1)] ^ ME[1];
- out[ 2] = SE[get_byte(2, B2)] ^ ME[2];
- out[ 3] = SE[get_byte(3, B3)] ^ ME[3];
- out[ 4] = SE[get_byte(0, B1)] ^ ME[4];
- out[ 5] = SE[get_byte(1, B2)] ^ ME[5];
- out[ 6] = SE[get_byte(2, B3)] ^ ME[6];
- out[ 7] = SE[get_byte(3, B0)] ^ ME[7];
- out[ 8] = SE[get_byte(0, B2)] ^ ME[8];
- out[ 9] = SE[get_byte(1, B3)] ^ ME[9];
- out[10] = SE[get_byte(2, B0)] ^ ME[10];
- out[11] = SE[get_byte(3, B1)] ^ ME[11];
- out[12] = SE[get_byte(0, B3)] ^ ME[12];
- out[13] = SE[get_byte(1, B0)] ^ ME[13];
- out[14] = SE[get_byte(2, B1)] ^ ME[14];
- out[15] = SE[get_byte(3, B2)] ^ ME[15];
-
- in += 16;
- out += 16;
+ out[16*i+ 0] = SE[get_byte(0, B0)] ^ ME[0];
+ out[16*i+ 1] = SE[get_byte(1, B1)] ^ ME[1];
+ out[16*i+ 2] = SE[get_byte(2, B2)] ^ ME[2];
+ out[16*i+ 3] = SE[get_byte(3, B3)] ^ ME[3];
+ out[16*i+ 4] = SE[get_byte(0, B1)] ^ ME[4];
+ out[16*i+ 5] = SE[get_byte(1, B2)] ^ ME[5];
+ out[16*i+ 6] = SE[get_byte(2, B3)] ^ ME[6];
+ out[16*i+ 7] = SE[get_byte(3, B0)] ^ ME[7];
+ out[16*i+ 8] = SE[get_byte(0, B2)] ^ ME[8];
+ out[16*i+ 9] = SE[get_byte(1, B3)] ^ ME[9];
+ out[16*i+10] = SE[get_byte(2, B0)] ^ ME[10];
+ out[16*i+11] = SE[get_byte(3, B1)] ^ ME[11];
+ out[16*i+12] = SE[get_byte(0, B3)] ^ ME[12];
+ out[16*i+13] = SE[get_byte(1, B0)] ^ ME[13];
+ out[16*i+14] = SE[get_byte(2, B1)] ^ ME[14];
+ out[16*i+15] = SE[get_byte(3, B2)] ^ ME[15];
}
}
diff --git a/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp b/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp
index ef24795bb..d8c7e7314 100644
--- a/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp
+++ b/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp
@@ -355,7 +355,7 @@ void AES_128::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
CT::poison(in, blocks * block_size());
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
__m128i B = _mm_loadu_si128(in_mm + i);
_mm_storeu_si128(out_mm + i, aes_ssse3_encrypt(B, keys, 10));
@@ -378,7 +378,7 @@ void AES_128::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
CT::poison(in, blocks * block_size());
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
__m128i B = _mm_loadu_si128(in_mm + i);
_mm_storeu_si128(out_mm + i, aes_ssse3_decrypt(B, keys, 10));
diff --git a/src/lib/block/blowfish/blowfish.cpp b/src/lib/block/blowfish/blowfish.cpp
index 7a06cf797..69d345baa 100644
--- a/src/lib/block/blowfish/blowfish.cpp
+++ b/src/lib/block/blowfish/blowfish.cpp
@@ -202,10 +202,10 @@ void Blowfish::encrypt_n(const byte in[], byte out[], size_t blocks) const
const u32bit* S3 = &m_S[512];
const u32bit* S4 = &m_S[768];
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit L = load_be<u32bit>(in, 0);
- u32bit R = load_be<u32bit>(in, 1);
+ u32bit L, R;
+ load_be(in + BLOCK_SIZE*i, L, R);
for(size_t j = 0; j != 16; j += 2)
{
@@ -220,10 +220,7 @@ void Blowfish::encrypt_n(const byte in[], byte out[], size_t blocks) const
L ^= m_P[16]; R ^= m_P[17];
- store_be(out, R, L);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(out + BLOCK_SIZE*i, R, L);
}
}
@@ -237,10 +234,10 @@ void Blowfish::decrypt_n(const byte in[], byte out[], size_t blocks) const
const u32bit* S3 = &m_S[512];
const u32bit* S4 = &m_S[768];
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit L = load_be<u32bit>(in, 0);
- u32bit R = load_be<u32bit>(in, 1);
+ u32bit L, R;
+ load_be(in + BLOCK_SIZE*i, L, R);
for(size_t j = 17; j != 1; j -= 2)
{
@@ -255,10 +252,7 @@ void Blowfish::decrypt_n(const byte in[], byte out[], size_t blocks) const
L ^= m_P[1]; R ^= m_P[0];
- store_be(out, R, L);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(out + BLOCK_SIZE*i, R, L);
}
}
diff --git a/src/lib/block/camellia/camellia.cpp b/src/lib/block/camellia/camellia.cpp
index ac5d57d4e..5ac13b9ab 100644
--- a/src/lib/block/camellia/camellia.cpp
+++ b/src/lib/block/camellia/camellia.cpp
@@ -645,10 +645,10 @@ inline u64bit FLINV(u64bit v, u64bit K)
void encrypt(const byte in[], byte out[], size_t blocks,
const secure_vector<u64bit>& SK, const size_t rounds)
{
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u64bit D1 = load_be<u64bit>(in, 0);
- u64bit D2 = load_be<u64bit>(in, 1);
+ u64bit D1, D2;
+ load_be(in + 16*i, D1, D2);
const u64bit* K = SK.data();
@@ -676,10 +676,7 @@ void encrypt(const byte in[], byte out[], size_t blocks,
D2 ^= *K++;
D1 ^= *K++;
- store_be(out, D2, D1);
-
- in += 16;
- out += 16;
+ store_be(out + 16*i, D2, D1);
}
}
@@ -689,10 +686,10 @@ void encrypt(const byte in[], byte out[], size_t blocks,
void decrypt(const byte in[], byte out[], size_t blocks,
const secure_vector<u64bit>& SK, const size_t rounds)
{
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u64bit D1 = load_be<u64bit>(in, 0);
- u64bit D2 = load_be<u64bit>(in, 1);
+ u64bit D1, D2;
+ load_be(in + 16*i, D1, D2);
const u64bit* K = &SK[SK.size()-1];
@@ -720,10 +717,7 @@ void decrypt(const byte in[], byte out[], size_t blocks,
D1 ^= *K--;
D2 ^= *K;
- store_be(out, D2, D1);
-
- in += 16;
- out += 16;
+ store_be(out + 16*i, D2, D1);
}
}
diff --git a/src/lib/block/cast/cast128.cpp b/src/lib/block/cast/cast128.cpp
index 53f7d4611..96c4f45a7 100644
--- a/src/lib/block/cast/cast128.cpp
+++ b/src/lib/block/cast/cast128.cpp
@@ -50,10 +50,10 @@ inline void R3(u32bit& L, u32bit R, u32bit MK, byte RK)
*/
void CAST_128::encrypt_n(const byte in[], byte out[], size_t blocks) const
{
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit L = load_be<u32bit>(in, 0);
- u32bit R = load_be<u32bit>(in, 1);
+ u32bit L, R;
+ load_be(in + BLOCK_SIZE*i, L, R);
R1(L, R, m_MK[ 0], m_RK[ 0]);
R2(R, L, m_MK[ 1], m_RK[ 1]);
@@ -72,10 +72,7 @@ void CAST_128::encrypt_n(const byte in[], byte out[], size_t blocks) const
R3(L, R, m_MK[14], m_RK[14]);
R1(R, L, m_MK[15], m_RK[15]);
- store_be(out, R, L);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(out + BLOCK_SIZE*i, R, L);
}
}
@@ -84,10 +81,10 @@ void CAST_128::encrypt_n(const byte in[], byte out[], size_t blocks) const
*/
void CAST_128::decrypt_n(const byte in[], byte out[], size_t blocks) const
{
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit L = load_be<u32bit>(in, 0);
- u32bit R = load_be<u32bit>(in, 1);
+ u32bit L, R;
+ load_be(in + BLOCK_SIZE*i, L, R);
R1(L, R, m_MK[15], m_RK[15]);
R3(R, L, m_MK[14], m_RK[14]);
@@ -106,10 +103,7 @@ void CAST_128::decrypt_n(const byte in[], byte out[], size_t blocks) const
R2(L, R, m_MK[ 1], m_RK[ 1]);
R1(R, L, m_MK[ 0], m_RK[ 0]);
- store_be(out, R, L);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(out + BLOCK_SIZE*i, R, L);
}
}
diff --git a/src/lib/block/des/des.cpp b/src/lib/block/des/des.cpp
index 88671df8d..a55c43ec7 100644
--- a/src/lib/block/des/des.cpp
+++ b/src/lib/block/des/des.cpp
@@ -144,12 +144,12 @@ void des_decrypt(u32bit& L, u32bit& R,
*/
void DES::encrypt_n(const byte in[], byte out[], size_t blocks) const
{
- for(size_t i = 0; i != blocks; ++i)
+ for(size_t i = 0; i < blocks; ++i)
{
- u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) |
- (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) |
- (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) |
- (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] );
+ u64bit T = (DES_IPTAB1[in[8*i+0]] ) | (DES_IPTAB1[in[8*i+1]] << 1) |
+ (DES_IPTAB1[in[8*i+2]] << 2) | (DES_IPTAB1[in[8*i+3]] << 3) |
+ (DES_IPTAB1[in[8*i+4]] << 4) | (DES_IPTAB1[in[8*i+5]] << 5) |
+ (DES_IPTAB1[in[8*i+6]] << 6) | (DES_IPTAB2[in[8*i+7]] );
u32bit L = static_cast<u32bit>(T >> 32);
u32bit R = static_cast<u32bit>(T);
@@ -162,10 +162,7 @@ void DES::encrypt_n(const byte in[], byte out[], size_t blocks) const
(DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] );
T = rotate_left(T, 32);
- store_be(T, out);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(T, out + 8*i);
}
}
@@ -174,12 +171,12 @@ void DES::encrypt_n(const byte in[], byte out[], size_t blocks) const
*/
void DES::decrypt_n(const byte in[], byte out[], size_t blocks) const
{
- for(size_t i = 0; i != blocks; ++i)
+ for(size_t i = 0; i < blocks; ++i)
{
- u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) |
- (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) |
- (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) |
- (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] );
+ u64bit T = (DES_IPTAB1[in[BLOCK_SIZE*i+0]] ) | (DES_IPTAB1[in[BLOCK_SIZE*i+1]] << 1) |
+ (DES_IPTAB1[in[BLOCK_SIZE*i+2]] << 2) | (DES_IPTAB1[in[BLOCK_SIZE*i+3]] << 3) |
+ (DES_IPTAB1[in[BLOCK_SIZE*i+4]] << 4) | (DES_IPTAB1[in[BLOCK_SIZE*i+5]] << 5) |
+ (DES_IPTAB1[in[BLOCK_SIZE*i+6]] << 6) | (DES_IPTAB2[in[BLOCK_SIZE*i+7]] );
u32bit L = static_cast<u32bit>(T >> 32);
u32bit R = static_cast<u32bit>(T);
@@ -193,10 +190,7 @@ void DES::decrypt_n(const byte in[], byte out[], size_t blocks) const
T = rotate_left(T, 32);
- store_be(T, out);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(T, out + BLOCK_SIZE*i);
}
}
diff --git a/src/lib/block/idea/idea.cpp b/src/lib/block/idea/idea.cpp
index 85cc5e757..1fe25d599 100644
--- a/src/lib/block/idea/idea.cpp
+++ b/src/lib/block/idea/idea.cpp
@@ -67,12 +67,10 @@ void idea_op(const byte in[], byte out[], size_t blocks, const u16bit K[52])
CT::poison(out, blocks * 8);
CT::poison(K, 52);
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u16bit X1 = load_be<u16bit>(in + BLOCK_SIZE*i, 0);
- u16bit X2 = load_be<u16bit>(in + BLOCK_SIZE*i, 1);
- u16bit X3 = load_be<u16bit>(in + BLOCK_SIZE*i, 2);
- u16bit X4 = load_be<u16bit>(in + BLOCK_SIZE*i, 3);
+ u16bit X1, X2, X3, X4;
+ load_be(in + BLOCK_SIZE*i, X1, X2, X3, X4);
for(size_t j = 0; j != 8; ++j)
{
diff --git a/src/lib/block/serpent/serpent.cpp b/src/lib/block/serpent/serpent.cpp
index 07088211d..a1326b888 100644
--- a/src/lib/block/serpent/serpent.cpp
+++ b/src/lib/block/serpent/serpent.cpp
@@ -70,12 +70,10 @@ void Serpent::encrypt_n(const byte in[], byte out[], size_t blocks) const
}
#endif
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit B0 = load_le<u32bit>(in, 0);
- u32bit B1 = load_le<u32bit>(in, 1);
- u32bit B2 = load_le<u32bit>(in, 2);
- u32bit B3 = load_le<u32bit>(in, 3);
+ u32bit B0, B1, B2, B3;
+ load_le(in + 16*i, B0, B1, B2, B3);
key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -110,10 +108,7 @@ void Serpent::encrypt_n(const byte in[], byte out[], size_t blocks) const
key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);
- store_le(out, B0, B1, B2, B3);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_le(out + 16*i, B0, B1, B2, B3);
}
}
@@ -135,12 +130,10 @@ void Serpent::decrypt_n(const byte in[], byte out[], size_t blocks) const
}
#endif
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit B0 = load_le<u32bit>(in, 0);
- u32bit B1 = load_le<u32bit>(in, 1);
- u32bit B2 = load_le<u32bit>(in, 2);
- u32bit B3 = load_le<u32bit>(in, 3);
+ u32bit B0, B1, B2, B3;
+ load_le(in + 16*i, B0, B1, B2, B3);
key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
@@ -175,10 +168,7 @@ void Serpent::decrypt_n(const byte in[], byte out[], size_t blocks) const
i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);
- store_le(out, B0, B1, B2, B3);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_le(out + 16*i, B0, B1, B2, B3);
}
}
@@ -205,24 +195,47 @@ void Serpent::key_schedule(const byte key[], size_t length)
W[i] = rotate_left(wi, 11);
}
- SBoxE4(W[ 8],W[ 9],W[ 10],W[ 11]); SBoxE3(W[ 12],W[ 13],W[ 14],W[ 15]);
- SBoxE2(W[ 16],W[ 17],W[ 18],W[ 19]); SBoxE1(W[ 20],W[ 21],W[ 22],W[ 23]);
- SBoxE8(W[ 24],W[ 25],W[ 26],W[ 27]); SBoxE7(W[ 28],W[ 29],W[ 30],W[ 31]);
- SBoxE6(W[ 32],W[ 33],W[ 34],W[ 35]); SBoxE5(W[ 36],W[ 37],W[ 38],W[ 39]);
- SBoxE4(W[ 40],W[ 41],W[ 42],W[ 43]); SBoxE3(W[ 44],W[ 45],W[ 46],W[ 47]);
- SBoxE2(W[ 48],W[ 49],W[ 50],W[ 51]); SBoxE1(W[ 52],W[ 53],W[ 54],W[ 55]);
- SBoxE8(W[ 56],W[ 57],W[ 58],W[ 59]); SBoxE7(W[ 60],W[ 61],W[ 62],W[ 63]);
- SBoxE6(W[ 64],W[ 65],W[ 66],W[ 67]); SBoxE5(W[ 68],W[ 69],W[ 70],W[ 71]);
- SBoxE4(W[ 72],W[ 73],W[ 74],W[ 75]); SBoxE3(W[ 76],W[ 77],W[ 78],W[ 79]);
- SBoxE2(W[ 80],W[ 81],W[ 82],W[ 83]); SBoxE1(W[ 84],W[ 85],W[ 86],W[ 87]);
- SBoxE8(W[ 88],W[ 89],W[ 90],W[ 91]); SBoxE7(W[ 92],W[ 93],W[ 94],W[ 95]);
- SBoxE6(W[ 96],W[ 97],W[ 98],W[ 99]); SBoxE5(W[100],W[101],W[102],W[103]);
- SBoxE4(W[104],W[105],W[106],W[107]); SBoxE3(W[108],W[109],W[110],W[111]);
- SBoxE2(W[112],W[113],W[114],W[115]); SBoxE1(W[116],W[117],W[118],W[119]);
- SBoxE8(W[120],W[121],W[122],W[123]); SBoxE7(W[124],W[125],W[126],W[127]);
- SBoxE6(W[128],W[129],W[130],W[131]); SBoxE5(W[132],W[133],W[134],W[135]);
+ SBoxE1(W[ 20],W[ 21],W[ 22],W[ 23]);
+ SBoxE1(W[ 52],W[ 53],W[ 54],W[ 55]);
+ SBoxE1(W[ 84],W[ 85],W[ 86],W[ 87]);
+ SBoxE1(W[116],W[117],W[118],W[119]);
+
+ SBoxE2(W[ 16],W[ 17],W[ 18],W[ 19]);
+ SBoxE2(W[ 48],W[ 49],W[ 50],W[ 51]);
+ SBoxE2(W[ 80],W[ 81],W[ 82],W[ 83]);
+ SBoxE2(W[112],W[113],W[114],W[115]);
+
+ SBoxE3(W[ 12],W[ 13],W[ 14],W[ 15]);
+ SBoxE3(W[ 44],W[ 45],W[ 46],W[ 47]);
+ SBoxE3(W[ 76],W[ 77],W[ 78],W[ 79]);
+ SBoxE3(W[108],W[109],W[110],W[111]);
+
+ SBoxE4(W[ 8],W[ 9],W[ 10],W[ 11]);
+ SBoxE4(W[ 40],W[ 41],W[ 42],W[ 43]);
+ SBoxE4(W[ 72],W[ 73],W[ 74],W[ 75]);
+ SBoxE4(W[104],W[105],W[106],W[107]);
SBoxE4(W[136],W[137],W[138],W[139]);
+ SBoxE5(W[ 36],W[ 37],W[ 38],W[ 39]);
+ SBoxE5(W[ 68],W[ 69],W[ 70],W[ 71]);
+ SBoxE5(W[100],W[101],W[102],W[103]);
+ SBoxE5(W[132],W[133],W[134],W[135]);
+
+ SBoxE6(W[ 32],W[ 33],W[ 34],W[ 35]);
+ SBoxE6(W[ 64],W[ 65],W[ 66],W[ 67]);
+ SBoxE6(W[ 96],W[ 97],W[ 98],W[ 99]);
+ SBoxE6(W[128],W[129],W[130],W[131]);
+
+ SBoxE7(W[ 28],W[ 29],W[ 30],W[ 31]);
+ SBoxE7(W[ 60],W[ 61],W[ 62],W[ 63]);
+ SBoxE7(W[ 92],W[ 93],W[ 94],W[ 95]);
+ SBoxE7(W[124],W[125],W[126],W[127]);
+
+ SBoxE8(W[ 24],W[ 25],W[ 26],W[ 27]);
+ SBoxE8(W[ 56],W[ 57],W[ 58],W[ 59]);
+ SBoxE8(W[ 88],W[ 89],W[ 90],W[ 91]);
+ SBoxE8(W[120],W[121],W[122],W[123]);
+
m_round_key.assign(W.begin() + 8, W.end());
}
diff --git a/src/lib/block/threefish/threefish.cpp b/src/lib/block/threefish/threefish.cpp
index f592021fb..2acdef020 100644
--- a/src/lib/block/threefish/threefish.cpp
+++ b/src/lib/block/threefish/threefish.cpp
@@ -122,16 +122,10 @@ void Threefish_512::encrypt_n(const byte in[], byte out[], size_t blocks) const
}
#endif
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u64bit X0 = load_le<u64bit>(in, 0);
- u64bit X1 = load_le<u64bit>(in, 1);
- u64bit X2 = load_le<u64bit>(in, 2);
- u64bit X3 = load_le<u64bit>(in, 3);
- u64bit X4 = load_le<u64bit>(in, 4);
- u64bit X5 = load_le<u64bit>(in, 5);
- u64bit X6 = load_le<u64bit>(in, 6);
- u64bit X7 = load_le<u64bit>(in, 7);
+ u64bit X0, X1, X2, X3, X4, X5, X6, X7;
+ load_le(in + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7);
THREEFISH_INJECT_KEY(0);
@@ -145,10 +139,7 @@ void Threefish_512::encrypt_n(const byte in[], byte out[], size_t blocks) const
THREEFISH_ENC_8_ROUNDS(15,16);
THREEFISH_ENC_8_ROUNDS(17,18);
- store_le(out, X0, X1, X2, X3, X4, X5, X6, X7);
-
- in += 64;
- out += 64;
+ store_le(out + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7);
}
}
@@ -211,16 +202,10 @@ void Threefish_512::decrypt_n(const byte in[], byte out[], size_t blocks) const
THREEFISH_INJECT_KEY(R2); \
} while(0)
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u64bit X0 = load_le<u64bit>(in, 0);
- u64bit X1 = load_le<u64bit>(in, 1);
- u64bit X2 = load_le<u64bit>(in, 2);
- u64bit X3 = load_le<u64bit>(in, 3);
- u64bit X4 = load_le<u64bit>(in, 4);
- u64bit X5 = load_le<u64bit>(in, 5);
- u64bit X6 = load_le<u64bit>(in, 6);
- u64bit X7 = load_le<u64bit>(in, 7);
+ u64bit X0, X1, X2, X3, X4, X5, X6, X7;
+ load_le(in + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7);
THREEFISH_INJECT_KEY(18);
@@ -234,10 +219,7 @@ void Threefish_512::decrypt_n(const byte in[], byte out[], size_t blocks) const
THREEFISH_DEC_8_ROUNDS(3,2);
THREEFISH_DEC_8_ROUNDS(1,0);
- store_le(out, X0, X1, X2, X3, X4, X5, X6, X7);
-
- in += 64;
- out += 64;
+ store_le(out + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7);
}
#undef THREEFISH_DEC_8_ROUNDS
diff --git a/src/lib/block/twofish/twofish.cpp b/src/lib/block/twofish/twofish.cpp
index 336d73a03..a98ae8e70 100644
--- a/src/lib/block/twofish/twofish.cpp
+++ b/src/lib/block/twofish/twofish.cpp
@@ -19,12 +19,15 @@ namespace Botan {
*/
void Twofish::encrypt_n(const byte in[], byte out[], size_t blocks) const
{
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit A = load_le<u32bit>(in, 0) ^ m_RK[0];
- u32bit B = load_le<u32bit>(in, 1) ^ m_RK[1];
- u32bit C = load_le<u32bit>(in, 2) ^ m_RK[2];
- u32bit D = load_le<u32bit>(in, 3) ^ m_RK[3];
+ u32bit A, B, C, D;
+ load_le(in + BLOCK_SIZE*i, A, B, C, D);
+
+ A ^= m_RK[0];
+ B ^= m_RK[1];
+ C ^= m_RK[2];
+ D ^= m_RK[3];
for(size_t j = 0; j != 16; j += 2)
{
@@ -58,10 +61,7 @@ void Twofish::encrypt_n(const byte in[], byte out[], size_t blocks) const
A ^= m_RK[6];
B ^= m_RK[7];
- store_le(out, C, D, A, B);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_le(out + BLOCK_SIZE*i, C, D, A, B);
}
}
@@ -70,12 +70,15 @@ void Twofish::encrypt_n(const byte in[], byte out[], size_t blocks) const
*/
void Twofish::decrypt_n(const byte in[], byte out[], size_t blocks) const
{
- for(size_t i = 0; i != blocks; ++i)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
{
- u32bit A = load_le<u32bit>(in, 0) ^ m_RK[4];
- u32bit B = load_le<u32bit>(in, 1) ^ m_RK[5];
- u32bit C = load_le<u32bit>(in, 2) ^ m_RK[6];
- u32bit D = load_le<u32bit>(in, 3) ^ m_RK[7];
+ u32bit A, B, C, D;
+ load_le(in + BLOCK_SIZE*i, A, B, C, D);
+
+ A ^= m_RK[4];
+ B ^= m_RK[5];
+ C ^= m_RK[6];
+ D ^= m_RK[7];
for(size_t j = 0; j != 16; j += 2)
{
@@ -109,10 +112,7 @@ void Twofish::decrypt_n(const byte in[], byte out[], size_t blocks) const
A ^= m_RK[2];
B ^= m_RK[3];
- store_le(out, C, D, A, B);
-
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_le(out + BLOCK_SIZE*i, C, D, A, B);
}
}
@@ -139,7 +139,7 @@ void Twofish::key_schedule(const byte key[], size_t length)
m_SB[768+i] = MDS3[Q1[Q1[i]^S[ 3]]^S[ 7]];
}
- for(size_t i = 0; i != 40; i += 2)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < 40; i += 2)
{
u32bit X = MDS0[Q0[Q0[i ]^key[ 8]]^key[ 0]] ^
MDS1[Q0[Q1[i ]^key[ 9]]^key[ 1]] ^
@@ -166,7 +166,7 @@ void Twofish::key_schedule(const byte key[], size_t length)
m_SB[768+i] = MDS3[Q1[Q1[Q0[i]^S[ 3]]^S[ 7]]^S[11]];
}
- for(size_t i = 0; i != 40; i += 2)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < 40; i += 2)
{
u32bit X = MDS0[Q0[Q0[Q1[i ]^key[16]]^key[ 8]]^key[ 0]] ^
MDS1[Q0[Q1[Q1[i ]^key[17]]^key[ 9]]^key[ 1]] ^
@@ -193,7 +193,7 @@ void Twofish::key_schedule(const byte key[], size_t length)
m_SB[768+i] = MDS3[Q1[Q1[Q0[Q1[i]^S[ 3]]^S[ 7]]^S[11]]^S[15]];
}
- for(size_t i = 0; i != 40; i += 2)
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < 40; i += 2)
{
u32bit X = MDS0[Q0[Q0[Q1[Q1[i ]^key[24]]^key[16]]^key[ 8]]^key[ 0]] ^
MDS1[Q0[Q1[Q1[Q0[i ]^key[25]]^key[17]]^key[ 9]]^key[ 1]] ^
diff --git a/src/lib/block/xtea/xtea.cpp b/src/lib/block/xtea/xtea.cpp
index 333406d9b..4e5ca7e7c 100644
--- a/src/lib/block/xtea/xtea.cpp
+++ b/src/lib/block/xtea/xtea.cpp
@@ -1,6 +1,6 @@
/*
* XTEA
-* (C) 1999-2009 Jack Lloyd
+* (C) 1999-2009,2016 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -10,80 +10,49 @@
namespace Botan {
-namespace {
-
-void xtea_encrypt_4(const byte in[32], byte out[32], const u32bit EK[64])
- {
- u32bit L0, R0, L1, R1, L2, R2, L3, R3;
- load_be(in, L0, R0, L1, R1, L2, R2, L3, R3);
-
- for(size_t i = 0; i != 32; ++i)
- {
- L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[2*i];
- L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[2*i];
- L2 += (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[2*i];
- L3 += (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[2*i];
-
- R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[2*i+1];
- R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[2*i+1];
- R2 += (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[2*i+1];
- R3 += (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[2*i+1];
- }
-
- store_be(out, L0, R0, L1, R1, L2, R2, L3, R3);
- }
-
-void xtea_decrypt_4(const byte in[32], byte out[32], const u32bit EK[64])
- {
- u32bit L0, R0, L1, R1, L2, R2, L3, R3;
- load_be(in, L0, R0, L1, R1, L2, R2, L3, R3);
-
- for(size_t i = 0; i != 32; ++i)
- {
- R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[63 - 2*i];
- R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[63 - 2*i];
- R2 -= (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[63 - 2*i];
- R3 -= (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[63 - 2*i];
-
- L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[62 - 2*i];
- L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[62 - 2*i];
- L2 -= (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[62 - 2*i];
- L3 -= (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[62 - 2*i];
- }
-
- store_be(out, L0, R0, L1, R1, L2, R2, L3, R3);
- }
-
-}
-
/*
* XTEA Encryption
*/
void XTEA::encrypt_n(const byte in[], byte out[], size_t blocks) const
{
- while(blocks >= 4)
- {
- xtea_encrypt_4(in, out, &(this->m_EK[0]));
- in += 4 * BLOCK_SIZE;
- out += 4 * BLOCK_SIZE;
- blocks -= 4;
- }
+ const u32bit* EK = &m_EK[0];
- for(size_t i = 0; i != blocks; ++i)
+ const size_t blocks4 = blocks / 4;
+ const size_t blocks_left = blocks % 4;
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++)
{
- u32bit L = load_be<u32bit>(in, 0);
- u32bit R = load_be<u32bit>(in, 1);
+ u32bit L0, R0, L1, R1, L2, R2, L3, R3;
+ load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3);
- for(size_t j = 0; j != 32; ++j)
+ for(size_t r = 0; r != 32; ++r)
{
- L += (((R << 4) ^ (R >> 5)) + R) ^ m_EK[2*j];
- R += (((L << 4) ^ (L >> 5)) + L) ^ m_EK[2*j+1];
+ L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[2*r];
+ L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[2*r];
+ L2 += (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[2*r];
+ L3 += (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[2*r];
+
+ R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[2*r+1];
+ R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[2*r+1];
+ R2 += (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[2*r+1];
+ R3 += (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[2*r+1];
}
- store_be(out, L, R);
+ store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3);
+ }
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i)
+ {
+ u32bit L, R;
+ load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R);
+
+ for(size_t r = 0; r != 32; ++r)
+ {
+ L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*r];
+ R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*r+1];
+ }
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R);
}
}
@@ -92,29 +61,44 @@ void XTEA::encrypt_n(const byte in[], byte out[], size_t blocks) const
*/
void XTEA::decrypt_n(const byte in[], byte out[], size_t blocks) const
{
- while(blocks >= 4)
- {
- xtea_decrypt_4(in, out, &(this->m_EK[0]));
- in += 4 * BLOCK_SIZE;
- out += 4 * BLOCK_SIZE;
- blocks -= 4;
- }
+ const u32bit* EK = &m_EK[0];
- for(size_t i = 0; i != blocks; ++i)
+ const size_t blocks4 = blocks / 4;
+ const size_t blocks_left = blocks % 4;
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++)
{
- u32bit L = load_be<u32bit>(in, 0);
- u32bit R = load_be<u32bit>(in, 1);
+ u32bit L0, R0, L1, R1, L2, R2, L3, R3;
+ load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3);
- for(size_t j = 0; j != 32; ++j)
+ for(size_t r = 0; r != 32; ++r)
{
- R -= (((L << 4) ^ (L >> 5)) + L) ^ m_EK[63 - 2*j];
- L -= (((R << 4) ^ (R >> 5)) + R) ^ m_EK[62 - 2*j];
+ R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[63 - 2*r];
+ R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[63 - 2*r];
+ R2 -= (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[63 - 2*r];
+ R3 -= (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[63 - 2*r];
+
+ L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[62 - 2*r];
+ L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[62 - 2*r];
+ L2 -= (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[62 - 2*r];
+ L3 -= (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[62 - 2*r];
}
- store_be(out, L, R);
+ store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3);
+ }
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i)
+ {
+ u32bit L, R;
+ load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R);
+
+ for(size_t r = 0; r != 32; ++r)
+ {
+ R -= (((L << 4) ^ (L >> 5)) + L) ^ m_EK[63 - 2*r];
+ L -= (((R << 4) ^ (R >> 5)) + R) ^ m_EK[62 - 2*r];
+ }
- in += BLOCK_SIZE;
- out += BLOCK_SIZE;
+ store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R);
}
}
diff --git a/src/lib/utils/compiler.h b/src/lib/utils/compiler.h
new file mode 100644
index 000000000..50441208b
--- /dev/null
+++ b/src/lib/utils/compiler.h
@@ -0,0 +1,169 @@
+/*
+* Define useful compiler-specific macros
+* (C) 2016 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_UTIL_COMPILER_FLAGS_H__
+#define BOTAN_UTIL_COMPILER_FLAGS_H__
+
+/* Should we use GCC-style inline assembler? */
+#if !defined(BOTAN_USE_GCC_INLINE_ASM) && defined(__GNUC__)
+ #define BOTAN_USE_GCC_INLINE_ASM 1
+#endif
+
+/*
+* Define BOTAN_GCC_VERSION
+*/
+#ifdef __GNUC__
+ #define BOTAN_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__)
+#else
+ #define BOTAN_GCC_VERSION 0
+#endif
+
+/*
+* Define BOTAN_CLANG_VERSION
+*/
+#ifdef __clang__
+ #define BOTAN_CLANG_VERSION (__clang_major__ * 10 + __clang_minor__)
+#else
+ #define BOTAN_CLANG_VERSION 0
+#endif
+
+/*
+* Define BOTAN_FUNC_ISA
+*/
+#if defined(__GNUG__) || defined(__clang__)
+ #define BOTAN_FUNC_ISA(isa) __attribute__ ((target(isa)))
+#else
+ #define BOTAN_FUNC_ISA(isa)
+#endif
+
+/*
+* Define BOTAN_WARN_UNUSED_RESULT
+*/
+#if defined(__GNUG__) || defined(__clang__)
+ #define BOTAN_WARN_UNUSED_RESULT __attribute__ ((warn_unused_result))
+#else
+ #define BOTAN_WARN_UNUSED_RESULT
+#endif
+
+/*
+* Define BOTAN_DEPRECATED
+*/
+#if !defined(BOTAN_NO_DEPRECATED_WARNINGS)
+
+ #if defined(__clang__)
+ #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated))
+
+ #elif defined(_MSC_VER)
+ #define BOTAN_DEPRECATED(msg) __declspec(deprecated(msg))
+
+ #elif defined(__GNUG__)
+ // msg supported since GCC 4.5, earliest we support is 4.8
+ #define BOTAN_DEPRECATED(msg) __attribute__ ((deprecated(msg)))
+ #endif
+
+#endif
+
+#if !defined(BOTAN_DEPRECATED)
+ #define BOTAN_DEPRECATED(msg)
+#endif
+
+/*
+* Define BOTAN_NORETURN
+*/
+#if !defined(BOTAN_NORETURN)
+
+ #if defined (__clang__) || defined (__GNUG__)
+ #define BOTAN_NORETURN __attribute__ ((__noreturn__))
+
+ #elif defined (_MSC_VER)
+ #define BOTAN_NORETURN __declspec(noreturn)
+
+ #else
+ #define BOTAN_NORETURN
+ #endif
+
+#endif
+
+/*
+* Define BOTAN_CURRENT_FUNCTION
+*/
+#if defined(_MSC_VER)
+ #define BOTAN_CURRENT_FUNCTION __FUNCTION__
+#else
+ #define BOTAN_CURRENT_FUNCTION __func__
+#endif
+
+/*
+* Define BOTAN_NOEXCEPT (for MSVC 2013)
+*/
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+ // noexcept is not supported in VS 2013
+ #include <yvals.h>
+ #define BOTAN_NOEXCEPT _NOEXCEPT
+#else
+ #define BOTAN_NOEXCEPT noexcept
+#endif
+
+/*
+* Define BOTAN_PARALLEL_FOR
+*/
+#if !defined(BOTAN_PARALLEL_FOR)
+
+#if defined(BOTAN_TARGET_HAS_CILKPLUS)
+ #define BOTAN_PARALLEL_FOR _Cilk_for
+#elif defined(BOTAN_TARGET_HAS_OPENMP)
+ #define BOTAN_PARALLEL_FOR _Pragma("omp parallel for") for
+#else
+ #define BOTAN_PARALLEL_FOR for
+#endif
+
+#endif
+
+/*
+* Define BOTAN_PARALLEL_SIMD_FOR
+*/
+#if !defined(BOTAN_PARALLEL_SIMD_FOR)
+
+#if defined(BOTAN_TARGET_HAS_CILKPLUS)
+ #define BOTAN_PARALLEL_SIMD_FOR _Pragma("simd") for
+#elif defined(BOTAN_TARGET_HAS_OPENMP)
+ #define BOTAN_PARALLEL_SIMD_FOR _Pragma("omp simd") for
+#elif defined(BOTAN_TARGET_COMPILER_IS_GCC)
+ #define BOTAN_PARALLEL_FOR _Pragma("GCC ivdep") for
+#else
+ #define BOTAN_PARALLEL_SIMD_FOR for
+#endif
+
+#endif
+
+/*
+* Define BOTAN_PARALLEL_SPAWN
+*/
+#if !defined(BOTAN_PARALLEL_SPAWN)
+
+#if defined(BOTAN_TARGET_HAS_CILKPLUS)
+ #define BOTAN_PARALLEL_SPAWN _Cilk_spawn
+#else
+ #define BOTAN_PARALLEL_SPAWN
+#endif
+
+#endif
+
+/*
+* Define BOTAN_PARALLEL_SYNC
+*/
+#if !defined(BOTAN_PARALLEL_SYNC)
+
+#if defined(BOTAN_TARGET_HAS_CILKPLUS)
+ #define BOTAN_PARALLEL_SYNC _Cilk_sync
+#else
+ #define BOTAN_PARALLEL_SYNC BOTAN_FORCE_SEMICOLON
+#endif
+
+#endif
+
+#endif
diff --git a/src/lib/utils/info.txt b/src/lib/utils/info.txt
index 75a428a83..820dd407d 100644
--- a/src/lib/utils/info.txt
+++ b/src/lib/utils/info.txt
@@ -8,6 +8,7 @@ bswap.h
calendar.h
charset.h
cpuid.h
+compiler.h
data_src.h
database.h
exceptn.h
diff --git a/src/lib/utils/loadstor.h b/src/lib/utils/loadstor.h
index 9ae9fda0e..15ff6a708 100644
--- a/src/lib/utils/loadstor.h
+++ b/src/lib/utils/loadstor.h
@@ -324,10 +324,10 @@ inline void load_le(T out[],
{
if(count > 0)
{
-#if defined(BOTAN_TARGET_CPU_HAS_KNOWN_ENDIANNESS)
+#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
+ std::memcpy(out, in, sizeof(T)*count);
+#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
std::memcpy(out, in, sizeof(T)*count);
-
-#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
const size_t blocks = count - (count % 4);
const size_t left = count - blocks;
@@ -336,8 +336,6 @@ inline void load_le(T out[],
for(size_t i = 0; i != left; ++i)
out[blocks+i] = reverse_bytes(out[blocks+i]);
-#endif
-
#else
for(size_t i = 0; i != count; ++i)
out[i] = load_le<T>(in, i);
@@ -416,10 +414,10 @@ inline void load_be(T out[],
{
if(count > 0)
{
-#if defined(BOTAN_TARGET_CPU_HAS_KNOWN_ENDIANNESS)
+#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
+ std::memcpy(out, in, sizeof(T)*count);
+#elif defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
std::memcpy(out, in, sizeof(T)*count);
-
-#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
const size_t blocks = count - (count % 4);
const size_t left = count - blocks;
@@ -428,8 +426,6 @@ inline void load_be(T out[],
for(size_t i = 0; i != left; ++i)
out[blocks+i] = reverse_bytes(out[blocks+i]);
-#endif
-
#else
for(size_t i = 0; i != count; ++i)
out[i] = load_be<T>(in, i);