diff options
author | lloyd <[email protected]> | 2007-05-31 03:25:19 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2007-05-31 03:25:19 +0000 |
commit | 55608e7dd1aa593944f967f2549564e4f42b654e (patch) | |
tree | ec2ec03a762a6dac82eb608487d5394370135624 | |
parent | 22ecdc45a0efa4c444d0b7010b7cd743aeb68c57 (diff) |
Write functions to handle loading and saving words a block at a time, taking into
account endian differences.
The current code does not take advantage of the knowledge of which endianness
we are running on; an optimization suggested by Yves Jerschow is to use (unsafe)
casts to speed up the load/store operations. This turns out to provide large
performance increases (30% or more) in some cases.
Even without the unsafe casts, this version seems to average a few percent
faster, probably because the longer loading loops have been partially or
fully unrolled.
This also makes the code implementing low-level algorithms like ciphers and
hashes a bit more succint.
41 files changed, 449 insertions, 431 deletions
diff --git a/include/adler32.h b/include/adler32.h index 4a6975e90..4beb3bdb6 100644 --- a/include/adler32.h +++ b/include/adler32.h @@ -25,7 +25,7 @@ class Adler32 : public HashFunction void add_data(const byte[], u32bit); void final_result(byte[]); void hash(const byte[], u32bit); - u32bit S1, S2; + u16bit S1, S2; }; } diff --git a/include/bit_ops.h b/include/bit_ops.h index 34f7365b2..0636cac41 100644 --- a/include/bit_ops.h +++ b/include/bit_ops.h @@ -59,6 +59,146 @@ u32bit low_bit(u64bit); u32bit significant_bytes(u64bit); u32bit hamming_weight(u64bit); +/************************************************* +* Endian-Specific Word Loading Operations * +*************************************************/ +template<typename T> +inline T load_be(const byte in[], u32bit off) + { + in += off * sizeof(T); + T out = 0; + for(u32bit j = 0; j != sizeof(T); j++) + out = (out << 8) | in[j]; + return out; + } + +template<typename T> +inline T load_le(const byte in[], u32bit off) + { + in += off * sizeof(T); + T out = 0; + for(u32bit j = 0; j != sizeof(T); j++) + out = (out << 8) | in[sizeof(T)-1-j]; + return out; + } + +template<> +inline u32bit load_be<u32bit>(const byte in[], u32bit off) + { + in += off * sizeof(u32bit); + return make_u32bit(in[0], in[1], in[2], in[3]); + } + +template<> +inline u32bit load_le<u32bit>(const byte in[], u32bit off) + { + in += off * sizeof(u32bit); + return make_u32bit(in[3], in[2], in[1], in[0]); + } + +template<> +inline u64bit load_be<u64bit>(const byte in[], u32bit off) + { + in += off * sizeof(u64bit); + return make_u64bit(in[0], in[1], in[2], in[3], + in[4], in[5], in[6], in[7]); + } + +template<> +inline u64bit load_le<u64bit>(const byte in[], u32bit off) + { + in += off * sizeof(u64bit); + return make_u64bit(in[7], in[6], in[5], in[4], + in[3], in[2], in[1], in[0]); + } + +/************************************************* +* Endian-Specific Word Storing Operations * +*************************************************/ +inline void store_be(u16bit in, byte out[2]) + { + out[0] = get_byte(0, in); + out[1] = get_byte(1, in); + } + +inline void store_le(u16bit in, byte out[2]) + { + out[0] = get_byte(1, in); + out[1] = get_byte(0, in); + } + +inline void store_be(u32bit in, byte out[4]) + { + out[0] = get_byte(0, in); + out[1] = get_byte(1, in); + out[2] = get_byte(2, in); + out[3] = get_byte(3, in); + } + +inline void store_le(u32bit in, byte out[4]) + { + out[0] = get_byte(3, in); + out[1] = get_byte(2, in); + out[2] = get_byte(1, in); + out[3] = get_byte(0, in); + } + +inline void store_be(u64bit in, byte out[8]) + { + out[0] = get_byte(0, in); + out[1] = get_byte(1, in); + out[2] = get_byte(2, in); + out[3] = get_byte(3, in); + out[4] = get_byte(4, in); + out[5] = get_byte(5, in); + out[6] = get_byte(6, in); + out[7] = get_byte(7, in); + } + +inline void store_le(u64bit in, byte out[8]) + { + out[0] = get_byte(7, in); + out[1] = get_byte(6, in); + out[2] = get_byte(5, in); + out[3] = get_byte(4, in); + out[4] = get_byte(3, in); + out[5] = get_byte(2, in); + out[6] = get_byte(1, in); + out[7] = get_byte(0, in); + } + +template<typename T> +inline void store_le(byte out[], T a, T b) + { + store_le(a, out + (0 * sizeof(T))); + store_le(b, out + (1 * sizeof(T))); + } + +template<typename T> +inline void store_be(byte out[], T a, T b) + { + store_be(a, out + (0 * sizeof(T))); + store_be(b, out + (1 * sizeof(T))); + } + +template<typename T> +inline void store_le(byte out[], T a, T b, T c, T d) + { + store_le(a, out + (0 * sizeof(T))); + store_le(b, out + (1 * sizeof(T))); + store_le(c, out + (2 * sizeof(T))); + store_le(d, out + (3 * sizeof(T))); + } + +template<typename T> +inline void store_be(byte out[], T a, T b, T c, T d) + { + store_be(a, out + (0 * sizeof(T))); + store_be(b, out + (1 * sizeof(T))); + store_be(c, out + (2 * sizeof(T))); + store_be(d, out + (3 * sizeof(T))); + } + } #endif diff --git a/include/botan.h b/include/botan.h index 8ae97ace3..007bf411e 100644 --- a/include/botan.h +++ b/include/botan.h @@ -9,5 +9,4 @@ #include <botan/lookup.h> #include <botan/rng.h> #include <botan/version.h> -#include <botan/bit_ops.h> #include <botan/parsing.h> diff --git a/include/cast256.h b/include/cast256.h index d55344e99..152580cc5 100644 --- a/include/cast256.h +++ b/include/cast256.h @@ -24,11 +24,10 @@ class CAST_256 : public BlockCipher void enc(const byte[], byte[]) const; void dec(const byte[], byte[]) const; void key(const byte[], u32bit); - void round1(u32bit&, u32bit, u32bit, u32bit) const; - void round2(u32bit&, u32bit, u32bit, u32bit) const; - void round3(u32bit&, u32bit, u32bit, u32bit) const; + static const u32bit KEY_MASK[192]; static const byte KEY_ROT[32]; + SecureBuffer<u32bit, 48> MK; SecureBuffer<byte, 48> RK; }; diff --git a/include/square.h b/include/square.h index 4070f9c47..702fabad5 100644 --- a/include/square.h +++ b/include/square.h @@ -24,11 +24,13 @@ class Square : public BlockCipher void enc(const byte[], byte[]) const; void dec(const byte[], byte[]) const; void key(const byte[], u32bit); + static void transform(u32bit[4]); - static byte mul(byte, byte); + static const byte SE[256], SD[256], Log[256], ALog[255]; static const u32bit TE0[256], TE1[256], TE2[256], TE3[256], TD0[256], TD1[256], TD2[256], TD3[256]; + SecureBuffer<u32bit, 28> EK, DK; SecureBuffer<byte, 32> ME, MD; }; diff --git a/src/adler32.cpp b/src/adler32.cpp index b27fa247b..51f6cbea3 100644 --- a/src/adler32.cpp +++ b/src/adler32.cpp @@ -65,10 +65,7 @@ void Adler32::add_data(const byte input[], u32bit length) *************************************************/ void Adler32::final_result(byte output[]) { - output[0] = get_byte(2, S2); - output[1] = get_byte(3, S2); - output[2] = get_byte(2, S1); - output[3] = get_byte(3, S1); + store_be(output, S2, S1); clear(); } diff --git a/src/aes.cpp b/src/aes.cpp index f89ebf529..b345763cb 100644 --- a/src/aes.cpp +++ b/src/aes.cpp @@ -134,7 +134,8 @@ void AES::key(const byte key[], u32bit length) const u32bit X = length / 4; for(u32bit j = 0; j != X; ++j) - XEK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + XEK[j] = load_be<u32bit>(key, j); + for(u32bit j = X; j < 4*(ROUNDS+1); j += X) { XEK[j] = XEK[j-X] ^ S(rotate_left(XEK[j-1], 8)) ^ RC[(j-X)/X]; diff --git a/src/blowfish.cpp b/src/blowfish.cpp index 384a12461..8fd43ee7e 100644 --- a/src/blowfish.cpp +++ b/src/blowfish.cpp @@ -13,8 +13,8 @@ namespace Botan { *************************************************/ void Blowfish::enc(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0); + u32bit R = load_be<u32bit>(in, 1); for(u32bit j = 0; j != 16; j += 2) { @@ -29,10 +29,7 @@ void Blowfish::enc(const byte in[], byte out[]) const L ^= P[16]; R ^= P[17]; - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* @@ -40,8 +37,8 @@ void Blowfish::enc(const byte in[], byte out[]) const *************************************************/ void Blowfish::dec(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0); + u32bit R = load_be<u32bit>(in, 1); for(u32bit j = 17; j != 1; j -= 2) { @@ -56,10 +53,7 @@ void Blowfish::dec(const byte in[], byte out[]) const L ^= P[1]; R ^= P[0]; - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* @@ -68,9 +62,11 @@ void Blowfish::dec(const byte in[], byte out[]) const void Blowfish::key(const byte key[], u32bit length) { clear(); + for(u32bit j = 0, k = 0; j != 18; ++j, k += 4) P[j] ^= make_u32bit(key[(k ) % length], key[(k+1) % length], - key[(k+2) % length], key[(k+3) % length]); + key[(k+2) % length], key[(k+3) % length]); + u32bit L = 0, R = 0; generate_sbox(P, 18, L, R); generate_sbox(S1, 256, L, R); diff --git a/src/cast128.cpp b/src/cast128.cpp index ec87afb08..481897810 100644 --- a/src/cast128.cpp +++ b/src/cast128.cpp @@ -47,8 +47,8 @@ inline void R3(u32bit& L, u32bit R, u32bit MK, u32bit RK) *************************************************/ void CAST_128::enc(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0); + u32bit R = load_be<u32bit>(in, 1); R1(L, R, MK[ 0], RK[ 0]); R2(R, L, MK[ 1], RK[ 1]); @@ -67,10 +67,7 @@ void CAST_128::enc(const byte in[], byte out[]) const R3(L, R, MK[14], RK[14]); R1(R, L, MK[15], RK[15]); - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* @@ -78,8 +75,8 @@ void CAST_128::enc(const byte in[], byte out[]) const *************************************************/ void CAST_128::dec(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0); + u32bit R = load_be<u32bit>(in, 1); R1(L, R, MK[15], RK[15]); R3(R, L, MK[14], RK[14]); @@ -98,10 +95,7 @@ void CAST_128::dec(const byte in[], byte out[]) const R2(L, R, MK[ 1], RK[ 1]); R1(R, L, MK[ 0], RK[ 0]); - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* diff --git a/src/cast256.cpp b/src/cast256.cpp index 21ccbea63..1064ed0de 100644 --- a/src/cast256.cpp +++ b/src/cast256.cpp @@ -8,15 +8,50 @@ namespace Botan { +namespace { + +/************************************************* +* CAST-256 Round Type 1 * +*************************************************/ +void round1(u32bit& out, u32bit in, u32bit mask, u32bit rot) + { + u32bit temp = rotate_left(mask + in, rot); + out ^= (CAST_SBOX1[get_byte(0, temp)] ^ CAST_SBOX2[get_byte(1, temp)]) - + CAST_SBOX3[get_byte(2, temp)] + CAST_SBOX4[get_byte(3, temp)]; + } + +/************************************************* +* CAST-256 Round Type 2 * +*************************************************/ +void round2(u32bit& out, u32bit in, u32bit mask, u32bit rot) + { + u32bit temp = rotate_left(mask ^ in, rot); + out ^= (CAST_SBOX1[get_byte(0, temp)] - CAST_SBOX2[get_byte(1, temp)] + + CAST_SBOX3[get_byte(2, temp)]) ^ CAST_SBOX4[get_byte(3, temp)]; + } + +/************************************************* +* CAST-256 Round Type 3 * +*************************************************/ +void round3(u32bit& out, u32bit in, u32bit mask, u32bit rot) + { + u32bit temp = rotate_left(mask - in, rot); + out ^= ((CAST_SBOX1[get_byte(0, temp)] + CAST_SBOX2[get_byte(1, temp)]) ^ + CAST_SBOX3[get_byte(2, temp)]) - CAST_SBOX4[get_byte(3, temp)]; + } + +} + /************************************************* * CAST-256 Encryption * *************************************************/ void CAST_256::enc(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 0], in[ 1], in[ 2], in[ 3]), - B = make_u32bit(in[ 4], in[ 5], in[ 6], in[ 7]), - C = make_u32bit(in[ 8], in[ 9], in[10], in[11]), - D = make_u32bit(in[12], in[13], in[14], in[15]); + u32bit A = load_be<u32bit>(in, 0); + u32bit B = load_be<u32bit>(in, 1); + u32bit C = load_be<u32bit>(in, 2); + u32bit D = load_be<u32bit>(in, 3); + round1(C, D, MK[ 0], RK[ 0]); round2(B, C, MK[ 1], RK[ 1]); round3(A, B, MK[ 2], RK[ 2]); round1(D, A, MK[ 3], RK[ 3]); round1(C, D, MK[ 4], RK[ 4]); round2(B, C, MK[ 5], RK[ 5]); @@ -41,14 +76,8 @@ void CAST_256::enc(const byte in[], byte out[]) const round2(B, C, MK[41], RK[41]); round1(C, D, MK[40], RK[40]); round1(D, A, MK[47], RK[47]); round3(A, B, MK[46], RK[46]); round2(B, C, MK[45], RK[45]); round1(C, D, MK[44], RK[44]); - out[ 0] = get_byte(0, A); out[ 1] = get_byte(1, A); - out[ 2] = get_byte(2, A); out[ 3] = get_byte(3, A); - out[ 4] = get_byte(0, B); out[ 5] = get_byte(1, B); - out[ 6] = get_byte(2, B); out[ 7] = get_byte(3, B); - out[ 8] = get_byte(0, C); out[ 9] = get_byte(1, C); - out[10] = get_byte(2, C); out[11] = get_byte(3, C); - out[12] = get_byte(0, D); out[13] = get_byte(1, D); - out[14] = get_byte(2, D); out[15] = get_byte(3, D); + + store_be(out, A, B, C, D); } /************************************************* @@ -56,10 +85,11 @@ void CAST_256::enc(const byte in[], byte out[]) const *************************************************/ void CAST_256::dec(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 0], in[ 1], in[ 2], in[ 3]), - B = make_u32bit(in[ 4], in[ 5], in[ 6], in[ 7]), - C = make_u32bit(in[ 8], in[ 9], in[10], in[11]), - D = make_u32bit(in[12], in[13], in[14], in[15]); + u32bit A = load_be<u32bit>(in, 0); + u32bit B = load_be<u32bit>(in, 1); + u32bit C = load_be<u32bit>(in, 2); + u32bit D = load_be<u32bit>(in, 3); + round1(C, D, MK[44], RK[44]); round2(B, C, MK[45], RK[45]); round3(A, B, MK[46], RK[46]); round1(D, A, MK[47], RK[47]); round1(C, D, MK[40], RK[40]); round2(B, C, MK[41], RK[41]); @@ -84,44 +114,8 @@ void CAST_256::dec(const byte in[], byte out[]) const round2(B, C, MK[ 5], RK[ 5]); round1(C, D, MK[ 4], RK[ 4]); round1(D, A, MK[ 3], RK[ 3]); round3(A, B, MK[ 2], RK[ 2]); round2(B, C, MK[ 1], RK[ 1]); round1(C, D, MK[ 0], RK[ 0]); - out[ 0] = get_byte(0, A); out[ 1] = get_byte(1, A); - out[ 2] = get_byte(2, A); out[ 3] = get_byte(3, A); - out[ 4] = get_byte(0, B); out[ 5] = get_byte(1, B); - out[ 6] = get_byte(2, B); out[ 7] = get_byte(3, B); - out[ 8] = get_byte(0, C); out[ 9] = get_byte(1, C); - out[10] = get_byte(2, C); out[11] = get_byte(3, C); - out[12] = get_byte(0, D); out[13] = get_byte(1, D); - out[14] = get_byte(2, D); out[15] = get_byte(3, D); - } -/************************************************* -* CAST-256 Round Type 1 * -*************************************************/ -void CAST_256::round1(u32bit& out, u32bit in, u32bit mask, u32bit rot) const - { - u32bit temp = rotate_left(mask + in, rot); - out ^= (CAST_SBOX1[get_byte(0, temp)] ^ CAST_SBOX2[get_byte(1, temp)]) - - CAST_SBOX3[get_byte(2, temp)] + CAST_SBOX4[get_byte(3, temp)]; - } - -/************************************************* -* CAST-256 Round Type 2 * -*************************************************/ -void CAST_256::round2(u32bit& out, u32bit in, u32bit mask, u32bit rot) const - { - u32bit temp = rotate_left(mask ^ in, rot); - out ^= (CAST_SBOX1[get_byte(0, temp)] - CAST_SBOX2[get_byte(1, temp)] + - CAST_SBOX3[get_byte(2, temp)]) ^ CAST_SBOX4[get_byte(3, temp)]; - } - -/************************************************* -* CAST-256 Round Type 3 * -*************************************************/ -void CAST_256::round3(u32bit& out, u32bit in, u32bit mask, u32bit rot) const - { - u32bit temp = rotate_left(mask - in, rot); - out ^= ((CAST_SBOX1[get_byte(0, temp)] + CAST_SBOX2[get_byte(1, temp)]) ^ - CAST_SBOX3[get_byte(2, temp)]) - CAST_SBOX4[get_byte(3, temp)]; + store_be(out, A, B, C, D); } /************************************************* diff --git a/src/crc32.cpp b/src/crc32.cpp index b10a2eebe..e897cbc02 100644 --- a/src/crc32.cpp +++ b/src/crc32.cpp @@ -93,8 +93,7 @@ void CRC32::add_data(const byte input[], u32bit length) void CRC32::final_result(byte output[]) { crc ^= 0xFFFFFFFF; - for(u32bit j = 0; j != 4; ++j) - output[j] = get_byte(j, crc); + store_be(crc, output); clear(); } diff --git a/src/des.cpp b/src/des.cpp index 47bdb8f56..e8f173c5c 100644 --- a/src/des.cpp +++ b/src/des.cpp @@ -13,17 +13,13 @@ namespace Botan { *************************************************/ void DES::enc(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0), R = load_be<u32bit>(in, 1); IP(L, R); raw_encrypt(L, R); FP(L, R); - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* @@ -31,17 +27,13 @@ void DES::enc(const byte in[], byte out[]) const *************************************************/ void DES::dec(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0), R = load_be<u32bit>(in, 1); IP(L, R); raw_decrypt(L, R); FP(L, R); - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* @@ -193,8 +185,7 @@ void DES::key(const byte key[], u32bit) *************************************************/ void TripleDES::enc(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0), R = load_be<u32bit>(in, 1); DES::IP(L, R); des1.raw_encrypt(L, R); @@ -202,10 +193,7 @@ void TripleDES::enc(const byte in[], byte out[]) const des3.raw_encrypt(L, R); DES::FP(L, R); - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* @@ -213,8 +201,7 @@ void TripleDES::enc(const byte in[], byte out[]) const *************************************************/ void TripleDES::dec(const byte in[], byte out[]) const { - u32bit L = make_u32bit(in[0], in[1], in[2], in[3]), - R = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0), R = load_be<u32bit>(in, 1); DES::IP(L, R); des3.raw_decrypt(L, R); @@ -222,10 +209,7 @@ void TripleDES::dec(const byte in[], byte out[]) const des1.raw_decrypt(L, R); DES::FP(L, R); - out[0] = get_byte(0, R); out[1] = get_byte(1, R); - out[2] = get_byte(2, R); out[3] = get_byte(3, R); - out[4] = get_byte(0, L); out[5] = get_byte(1, L); - out[6] = get_byte(2, L); out[7] = get_byte(3, L); + store_be(out, R, L); } /************************************************* diff --git a/src/fork256.cpp b/src/fork256.cpp index abd8e98b3..af2d7a756 100644 --- a/src/fork256.cpp +++ b/src/fork256.cpp @@ -62,7 +62,7 @@ void FORK_256::hash(const byte input[]) H1 = H2 = H3 = H4 = digest[7]; for(u32bit j = 0; j != 16; ++j) - M[j] = make_u32bit(input[4*j], input[4*j+1], input[4*j+2], input[4*j+3]); + M[j] = load_be<u32bit>(input, j); step(A1, B1, C1, D1, E1, F1, G1, H1, M[ 0], M[ 1], DELTA[ 0], DELTA[ 1]); step(A2, B2, C2, D2, E2, F2, G2, H2, M[14], M[15], DELTA[15], DELTA[14]); @@ -119,8 +119,8 @@ void FORK_256::hash(const byte input[]) *************************************************/ void FORK_256::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(j % 4, digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_be(digest[j/4], output + j); } /************************************************* diff --git a/src/gost.cpp b/src/gost.cpp index a8a295c4b..d999d0d2d 100644 --- a/src/gost.cpp +++ b/src/gost.cpp @@ -13,8 +13,7 @@ namespace Botan { *************************************************/ void GOST::enc(const byte in[], byte out[]) const { - u32bit N1 = make_u32bit(in[3], in[2], in[1], in[0]), - N2 = make_u32bit(in[7], in[6], in[5], in[4]); + u32bit N1 = load_le<u32bit>(in, 0), N2 = load_le<u32bit>(in, 1); for(u32bit j = 0; j != 32; j += 2) { @@ -29,10 +28,7 @@ void GOST::enc(const byte in[], byte out[]) const SBOX3[get_byte(2, T0)] | SBOX4[get_byte(3, T0)]; } - out[0] = get_byte(3, N2); out[1] = get_byte(2, N2); - out[2] = get_byte(1, N2); out[3] = get_byte(0, N2); - out[4] = get_byte(3, N1); out[5] = get_byte(2, N1); - out[6] = get_byte(1, N1); out[7] = get_byte(0, N1); + store_le(out, N2, N1); } /************************************************* @@ -40,8 +36,7 @@ void GOST::enc(const byte in[], byte out[]) const *************************************************/ void GOST::dec(const byte in[], byte out[]) const { - u32bit N1 = make_u32bit(in[3], in[2], in[1], in[0]), - N2 = make_u32bit(in[7], in[6], in[5], in[4]); + u32bit N1 = load_le<u32bit>(in, 0), N2 = load_le<u32bit>(in, 1); for(u32bit j = 0; j != 32; j += 2) { @@ -56,10 +51,7 @@ void GOST::dec(const byte in[], byte out[]) const SBOX3[get_byte(2, T0)] | SBOX4[get_byte(3, T0)]; } - out[0] = get_byte(3, N2); out[1] = get_byte(2, N2); - out[2] = get_byte(1, N2); out[3] = get_byte(0, N2); - out[4] = get_byte(3, N1); out[5] = get_byte(2, N1); - out[6] = get_byte(1, N1); out[7] = get_byte(0, N1); + store_le(out, N2, N1); } /************************************************* @@ -69,7 +61,7 @@ void GOST::key(const byte key[], u32bit) { for(u32bit j = 0; j != 8; ++j) { - u32bit K = make_u32bit(key[4*j+3], key[4*j+2], key[4*j+1], key[4*j]); + u32bit K = load_le<u32bit>(key, j); EK[j] = EK[j+8] = EK[j+16] = K; } diff --git a/src/has160.cpp b/src/has160.cpp index ceb5a8b6b..7c6721903 100644 --- a/src/has160.cpp +++ b/src/has160.cpp @@ -58,7 +58,7 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, void HAS_160::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - X[j] = make_u32bit(input[4*j+3], input[4*j+2], input[4*j+1], input[4*j]); + X[j] = load_le<u32bit>(input, j); u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3], E = digest[4]; @@ -120,8 +120,8 @@ void HAS_160::hash(const byte input[]) *************************************************/ void HAS_160::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(3 - (j % 4), digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_le(digest[j/4], output + j); } /************************************************* diff --git a/src/idea.cpp b/src/idea.cpp index 1f2facbb8..ed142ca9b 100644 --- a/src/idea.cpp +++ b/src/idea.cpp @@ -33,8 +33,10 @@ inline void mul(u16bit& a, u16bit b) *************************************************/ void IDEA::enc(const byte in[], byte out[]) const { - u16bit X1 = make_u16bit(in[0], in[1]), X2 = make_u16bit(in[2], in[3]), - X3 = make_u16bit(in[4], in[5]), X4 = make_u16bit(in[6], in[7]); + u16bit X1 = load_be<u16bit>(in, 0); + u16bit X2 = load_be<u16bit>(in, 1); + u16bit X3 = load_be<u16bit>(in, 2); + u16bit X4 = load_be<u16bit>(in, 3); for(u32bit j = 0; j != 8; ++j) { @@ -57,10 +59,7 @@ void IDEA::enc(const byte in[], byte out[]) const mul(X1, EK[48]); X2 += EK[50]; X3 += EK[49]; mul(X4, EK[51]); - out[0] = get_byte(0, X1); out[1] = get_byte(1, X1); - out[2] = get_byte(0, X3); out[3] = get_byte(1, X3); - out[4] = get_byte(0, X2); out[5] = get_byte(1, X2); - out[6] = get_byte(0, X4); out[7] = get_byte(1, X4); + store_be(out, X1, X3, X2, X4); } /************************************************* @@ -68,8 +67,11 @@ void IDEA::enc(const byte in[], byte out[]) const *************************************************/ void IDEA::dec(const byte in[], byte out[]) const { - u16bit X1 = make_u16bit(in[0], in[1]), X2 = make_u16bit(in[2], in[3]), - X3 = make_u16bit(in[4], in[5]), X4 = make_u16bit(in[6], in[7]); + u16bit X1 = load_be<u16bit>(in, 0); + u16bit X2 = load_be<u16bit>(in, 1); + u16bit X3 = load_be<u16bit>(in, 2); + u16bit X4 = load_be<u16bit>(in, 3); + for(u32bit j = 0; j != 8; ++j) { mul(X1, DK[6*j+0]); @@ -91,10 +93,7 @@ void IDEA::dec(const byte in[], byte out[]) const mul(X1, DK[48]); X2 += DK[50]; X3 += DK[49]; mul(X4, DK[51]); - out[0] = get_byte(0, X1); out[1] = get_byte(1, X1); - out[2] = get_byte(0, X3); out[3] = get_byte(1, X3); - out[4] = get_byte(0, X2); out[5] = get_byte(1, X2); - out[6] = get_byte(0, X4); out[7] = get_byte(1, X4); + store_be(out, X1, X3, X2, X4); } /************************************************* @@ -125,17 +124,20 @@ u16bit IDEA::mul_inv(u16bit x) void IDEA::key(const byte key[], u32bit) { for(u32bit j = 0; j != 8; ++j) - EK[j] = make_u16bit(key[2*j], key[2*j+1]); + EK[j] = load_be<u16bit>(key, j); + for(u32bit j = 1, k = 8, offset = 0; k != 52; j %= 8, ++j, ++k) { EK[j+7+offset] = (u16bit)((EK[(j % 8) + offset] << 9) | (EK[((j+1) % 8) + offset] >> 7)); offset += (j == 8) ? 8 : 0; } + DK[51] = mul_inv(EK[3]); DK[50] = (u16bit)-EK[2]; DK[49] = (u16bit)-EK[1]; DK[48] = mul_inv(EK[0]); + for(u32bit j = 1, k = 4, counter = 47; j != 8; ++j, k += 6) { DK[counter--] = EK[k+1]; @@ -145,6 +147,7 @@ void IDEA::key(const byte key[], u32bit) DK[counter--] = (u16bit)-EK[k+4]; DK[counter--] = mul_inv(EK[k+2]); } + DK[5] = EK[47]; DK[4] = EK[46]; DK[3] = mul_inv(EK[51]); diff --git a/src/kasumi.cpp b/src/kasumi.cpp index 8730c45fc..43eff7311 100644 --- a/src/kasumi.cpp +++ b/src/kasumi.cpp @@ -33,8 +33,10 @@ u16bit FI(u16bit I, u16bit K) *************************************************/ void KASUMI::enc(const byte in[], byte out[]) const { - u16bit B0 = make_u16bit(in[0], in[1]), B1 = make_u16bit(in[2], in[3]), - B2 = make_u16bit(in[4], in[5]), B3 = make_u16bit(in[6], in[7]); + u16bit B0 = load_be<u16bit>(in, 0); + u16bit B1 = load_be<u16bit>(in, 1); + u16bit B2 = load_be<u16bit>(in, 2); + u16bit B3 = load_be<u16bit>(in, 3); for(u32bit j = 0; j != 8; j += 2) { @@ -61,10 +63,7 @@ void KASUMI::enc(const byte in[], byte out[]) const B1 ^= R; } - out[0] = get_byte(0, B0); out[1] = get_byte(1, B0); - out[2] = get_byte(0, B1); out[3] = get_byte(1, B1); - out[4] = get_byte(0, B2); out[5] = get_byte(1, B2); - out[6] = get_byte(0, B3); out[7] = get_byte(1, B3); + store_be(out, B0, B1, B2, B3); } /************************************************* @@ -72,8 +71,10 @@ void KASUMI::enc(const byte in[], byte out[]) const *************************************************/ void KASUMI::dec(const byte in[], byte out[]) const { - u16bit B0 = make_u16bit(in[0], in[1]), B1 = make_u16bit(in[2], in[3]), - B2 = make_u16bit(in[4], in[5]), B3 = make_u16bit(in[6], in[7]); + u16bit B0 = load_be<u16bit>(in, 0); + u16bit B1 = load_be<u16bit>(in, 1); + u16bit B2 = load_be<u16bit>(in, 2); + u16bit B3 = load_be<u16bit>(in, 3); for(u32bit j = 0; j != 8; j += 2) { @@ -102,10 +103,7 @@ void KASUMI::dec(const byte in[], byte out[]) const B3 ^= R; } - out[0] = get_byte(0, B0); out[1] = get_byte(1, B0); - out[2] = get_byte(0, B1); out[3] = get_byte(1, B1); - out[4] = get_byte(0, B2); out[5] = get_byte(1, B2); - out[6] = get_byte(0, B3); out[7] = get_byte(1, B3); + store_be(out, B0, B1, B2, B3); } /************************************************* @@ -119,7 +117,7 @@ void KASUMI::key(const byte key[], u32bit) SecureBuffer<u16bit, 16> K; for(u32bit j = 0; j != 8; ++j) { - K[j] = make_u16bit(key[2*j], key[2*j+1]); + K[j] = load_be<u16bit>(key, j); K[j+8] = K[j] ^ RC[j]; } diff --git a/src/mars.cpp b/src/mars.cpp index cdc992da5..34327d717 100644 --- a/src/mars.cpp +++ b/src/mars.cpp @@ -49,10 +49,10 @@ u32bit gen_mask(u32bit input) *************************************************/ void MARS::enc(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]) + EK[0], - B = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]) + EK[1], - C = make_u32bit(in[11], in[10], in[ 9], in[ 8]) + EK[2], - D = make_u32bit(in[15], in[14], in[13], in[12]) + EK[3]; + u32bit A = load_le<u32bit>(in, 0) + EK[0]; + u32bit B = load_le<u32bit>(in, 1) + EK[1]; + u32bit C = load_le<u32bit>(in, 2) + EK[2]; + u32bit D = load_le<u32bit>(in, 3) + EK[3]; forward_mix(A, B, C, D); @@ -78,14 +78,7 @@ void MARS::enc(const byte in[], byte out[]) const A -= EK[36]; B -= EK[37]; C -= EK[38]; D -= EK[39]; - out[ 0] = get_byte(3, A); out[ 1] = get_byte(2, A); - out[ 2] = get_byte(1, A); out[ 3] = get_byte(0, A); - out[ 4] = get_byte(3, B); out[ 5] = get_byte(2, B); - out[ 6] = get_byte(1, B); out[ 7] = get_byte(0, B); - out[ 8] = get_byte(3, C); out[ 9] = get_byte(2, C); - out[10] = get_byte(1, C); out[11] = get_byte(0, C); - out[12] = get_byte(3, D); out[13] = get_byte(2, D); - out[14] = get_byte(1, D); out[15] = get_byte(0, D); + store_le(out, A, B, C, D); } /************************************************* @@ -93,10 +86,10 @@ void MARS::enc(const byte in[], byte out[]) const *************************************************/ void MARS::dec(const byte in[], byte out[]) const { - u32bit D = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]) + EK[36], - C = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]) + EK[37], - B = make_u32bit(in[11], in[10], in[ 9], in[ 8]) + EK[38], - A = make_u32bit(in[15], in[14], in[13], in[12]) + EK[39]; + u32bit A = load_le<u32bit>(in, 3) + EK[39]; + u32bit B = load_le<u32bit>(in, 2) + EK[38]; + u32bit C = load_le<u32bit>(in, 1) + EK[37]; + u32bit D = load_le<u32bit>(in, 0) + EK[36]; forward_mix(A, B, C, D); @@ -122,14 +115,7 @@ void MARS::dec(const byte in[], byte out[]) const A -= EK[3]; B -= EK[2]; C -= EK[1]; D -= EK[0]; - out[ 0] = get_byte(3, D); out[ 1] = get_byte(2, D); - out[ 2] = get_byte(1, D); out[ 3] = get_byte(0, D); - out[ 4] = get_byte(3, C); out[ 5] = get_byte(2, C); - out[ 6] = get_byte(1, C); out[ 7] = get_byte(0, C); - out[ 8] = get_byte(3, B); out[ 9] = get_byte(2, B); - out[10] = get_byte(1, B); out[11] = get_byte(0, B); - out[12] = get_byte(3, A); out[13] = get_byte(2, A); - out[14] = get_byte(1, A); out[15] = get_byte(0, A); + store_le(out, D, C, B, A); } /************************************************* @@ -230,7 +216,7 @@ void MARS::key(const byte key[], u32bit length) { SecureBuffer<u32bit, 15> T; for(u32bit j = 0; j != length / 4; ++j) - T[j] = make_u32bit(key[4*j+3], key[4*j+2], key[4*j+1], key[4*j]); + T[j] = load_le<u32bit>(key, j); T[length / 4] = length / 4; for(u32bit j = 0; j != 4; ++j) diff --git a/src/md4.cpp b/src/md4.cpp index a0a32b179..b4cc4ce17 100644 --- a/src/md4.cpp +++ b/src/md4.cpp @@ -45,7 +45,7 @@ inline void HH(u32bit& A, u32bit B, u32bit C, u32bit D, u32bit M, byte S) void MD4::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - M[j] = make_u32bit(input[4*j+3], input[4*j+2], input[4*j+1], input[4*j]); + M[j] = load_le<u32bit>(input, j); u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3]; @@ -78,8 +78,8 @@ void MD4::hash(const byte input[]) *************************************************/ void MD4::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(3 - (j % 4), digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_le(digest[j/4], output + j); } /************************************************* diff --git a/src/md5.cpp b/src/md5.cpp index 51cd7fe82..ec3703de5 100644 --- a/src/md5.cpp +++ b/src/md5.cpp @@ -58,7 +58,7 @@ inline void II(u32bit& A, u32bit B, u32bit C, u32bit D, u32bit msg, void MD5::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - M[j] = make_u32bit(input[4*j+3], input[4*j+2], input[4*j+1], input[4*j]); + M[j] = load_le<u32bit>(input, j); u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3]; @@ -106,8 +106,8 @@ void MD5::hash(const byte input[]) *************************************************/ void MD5::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(3 - (j % 4), digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_le(digest[j/4], output + j); } /************************************************* diff --git a/src/mdx_hash.cpp b/src/mdx_hash.cpp index 8fbad2580..f8020e2dc 100644 --- a/src/mdx_hash.cpp +++ b/src/mdx_hash.cpp @@ -90,11 +90,13 @@ void MDx_HashFunction::write_count(byte out[]) { if(COUNT_SIZE < 8) throw Invalid_State("MDx_HashFunction::write_count: COUNT_SIZE < 8"); - for(u32bit j = 0; j != 8; ++j) - { - const u32bit choose = (BIG_BYTE_ENDIAN ? (j % 8) : (7 - (j % 8))); - out[j+COUNT_SIZE-8] = get_byte(choose, 8 * count); - } + + const u64bit bit_count = count * 8; + + if(BIG_BYTE_ENDIAN) + store_be(bit_count, out + COUNT_SIZE - 8); + else + store_le(bit_count, out + COUNT_SIZE - 8); } } diff --git a/src/misty1.cpp b/src/misty1.cpp index f0f54c476..4df3ecc76 100644 --- a/src/misty1.cpp +++ b/src/misty1.cpp @@ -30,8 +30,10 @@ u16bit FI(u16bit input, u16bit key7, u16bit key9) *************************************************/ void MISTY1::enc(const byte in[], byte out[]) const { - u16bit B0 = make_u16bit(in[0], in[1]), B1 = make_u16bit(in[2], in[3]), - B2 = make_u16bit(in[4], in[5]), B3 = make_u16bit(in[6], in[7]); + u16bit B0 = load_be<u16bit>(in, 0); + u16bit B1 = load_be<u16bit>(in, 1); + u16bit B2 = load_be<u16bit>(in, 2); + u16bit B3 = load_be<u16bit>(in, 3); for(u32bit j = 0; j != 12; j += 3) { @@ -64,10 +66,7 @@ void MISTY1::enc(const byte in[], byte out[]) const B3 ^= B2 & EK[98]; B2 ^= B3 | EK[99]; - out[0] = get_byte(0, B2); out[1] = get_byte(1, B2); - out[2] = get_byte(0, B3); out[3] = get_byte(1, B3); - out[4] = get_byte(0, B0); out[5] = get_byte(1, B0); - out[6] = get_byte(0, B1); out[7] = get_byte(1, B1); + store_be(out, B2, B3, B0, B1); } /************************************************* @@ -75,8 +74,10 @@ void MISTY1::enc(const byte in[], byte out[]) const *************************************************/ void MISTY1::dec(const byte in[], byte out[]) const { - u16bit B0 = make_u16bit(in[4], in[5]), B1 = make_u16bit(in[6], in[7]), - B2 = make_u16bit(in[0], in[1]), B3 = make_u16bit(in[2], in[3]); + u16bit B0 = load_be<u16bit>(in, 2); + u16bit B1 = load_be<u16bit>(in, 3); + u16bit B2 = load_be<u16bit>(in, 0); + u16bit B3 = load_be<u16bit>(in, 1); for(u32bit j = 0; j != 12; j += 3) { @@ -109,10 +110,7 @@ void MISTY1::dec(const byte in[], byte out[]) const B0 ^= B1 | DK[98]; B1 ^= B0 & DK[99]; - out[0] = get_byte(0, B0); out[1] = get_byte(1, B0); - out[2] = get_byte(0, B1); out[3] = get_byte(1, B1); - out[4] = get_byte(0, B2); out[5] = get_byte(1, B2); - out[6] = get_byte(0, B3); out[7] = get_byte(1, B3); + store_be(out, B0, B1, B2, B3); } /************************************************* @@ -122,7 +120,8 @@ void MISTY1::key(const byte key[], u32bit length) { SecureBuffer<u16bit, 32> KS; for(u32bit j = 0; j != length / 2; ++j) - KS[j] = make_u16bit(key[2*j], key[2*j+1]); + KS[j] = load_be<u16bit>(key, j); + for(u32bit j = 0; j != 8; ++j) { KS[j+ 8] = FI(KS[j], KS[(j+1) % 8] >> 9, KS[(j+1) % 8] & 0x1FF); diff --git a/src/prf_x942.cpp b/src/prf_x942.cpp index 6e67aab8b..fd29aecdd 100644 --- a/src/prf_x942.cpp +++ b/src/prf_x942.cpp @@ -21,9 +21,7 @@ namespace { MemoryVector<byte> encode_x942_int(u32bit n) { byte n_buf[4] = { 0 }; - for(u32bit j = 0; j != 4; ++j) - n_buf[j] = get_byte(j, n); - + store_be(n, n_buf); return DER_Encoder().encode(n_buf, 4, OCTET_STRING).get_contents(); } diff --git a/src/randpool.cpp b/src/randpool.cpp index e041cf873..ed60b385a 100644 --- a/src/randpool.cpp +++ b/src/randpool.cpp @@ -9,6 +9,8 @@ #include <botan/util.h> #include <algorithm> +#include <assert.h> + namespace Botan { namespace { @@ -63,8 +65,7 @@ void Randpool::update_buffer() for(u32bit j = 0; j != counter.size(); ++j) if(++counter[j]) break; - for(u32bit j = 0; j != 8; ++j) - counter[j+4] = get_byte(j, timestamp); + store_be(timestamp, counter + 4); SecureVector<byte> mac_val = randpool_prf(mac, GEN_OUTPUT, counter, counter.size()); diff --git a/src/rc2.cpp b/src/rc2.cpp index fd6b4ccc6..e59e7d669 100644 --- a/src/rc2.cpp +++ b/src/rc2.cpp @@ -13,8 +13,10 @@ namespace Botan { *************************************************/ void RC2::enc(const byte in[], byte out[]) const { - u16bit R0 = make_u16bit(in[1], in[0]), R1 = make_u16bit(in[3], in[2]), - R2 = make_u16bit(in[5], in[4]), R3 = make_u16bit(in[7], in[6]); + u16bit R0 = load_le<u16bit>(in, 0); + u16bit R1 = load_le<u16bit>(in, 1); + u16bit R2 = load_le<u16bit>(in, 2); + u16bit R3 = load_le<u16bit>(in, 3); for(u32bit j = 0; j != 16; ++j) { @@ -39,10 +41,7 @@ void RC2::enc(const byte in[], byte out[]) const } } - out[0] = get_byte(1, R0); out[1] = get_byte(0, R0); - out[2] = get_byte(1, R1); out[3] = get_byte(0, R1); - out[4] = get_byte(1, R2); out[5] = get_byte(0, R2); - out[6] = get_byte(1, R3); out[7] = get_byte(0, R3); + store_le(out, R0, R1, R2, R3); } /************************************************* @@ -50,8 +49,10 @@ void RC2::enc(const byte in[], byte out[]) const *************************************************/ void RC2::dec(const byte in[], byte out[]) const { - u16bit R0 = make_u16bit(in[1], in[0]), R1 = make_u16bit(in[3], in[2]), - R2 = make_u16bit(in[5], in[4]), R3 = make_u16bit(in[7], in[6]); + u16bit R0 = load_le<u16bit>(in, 0); + u16bit R1 = load_le<u16bit>(in, 1); + u16bit R2 = load_le<u16bit>(in, 2); + u16bit R3 = load_le<u16bit>(in, 3); for(u32bit j = 0; j != 16; ++j) { @@ -76,10 +77,7 @@ void RC2::dec(const byte in[], byte out[]) const } } - out[0] = get_byte(1, R0); out[1] = get_byte(0, R0); - out[2] = get_byte(1, R1); out[3] = get_byte(0, R1); - out[4] = get_byte(1, R2); out[5] = get_byte(0, R2); - out[6] = get_byte(1, R3); out[7] = get_byte(0, R3); + store_le(out, R0, R1, R2, R3); } /************************************************* @@ -121,7 +119,7 @@ void RC2::key(const byte key[], u32bit length) L[j] = TABLE[L[j+1] ^ L[j+length]]; for(u32bit j = 0; j != 64; ++j) - K[j] = make_u16bit(L[2*j+1], L[2*j]); + K[j] = load_le<u16bit>(L, j); } /************************************************* diff --git a/src/rc5.cpp b/src/rc5.cpp index 261529ea8..3e87dc8ab 100644 --- a/src/rc5.cpp +++ b/src/rc5.cpp @@ -15,8 +15,8 @@ namespace Botan { *************************************************/ void RC5::enc(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[3], in[2], in[1], in[0]), - B = make_u32bit(in[7], in[6], in[5], in[4]); + u32bit A = load_le<u32bit>(in, 0), B = load_le<u32bit>(in, 1); + A += S[0]; B += S[1]; for(u32bit j = 0; j != ROUNDS; j += 4) { @@ -29,10 +29,8 @@ void RC5::enc(const byte in[], byte out[]) const A = rotate_left(A ^ B, B % 32) + S[2*j+8]; B = rotate_left(B ^ A, A % 32) + S[2*j+9]; } - out[0] = get_byte(3, A); out[1] = get_byte(2, A); - out[2] = get_byte(1, A); out[3] = get_byte(0, A); - out[4] = get_byte(3, B); out[5] = get_byte(2, B); - out[6] = get_byte(1, B); out[7] = get_byte(0, B); + + store_le(out, A, B); } /************************************************* @@ -40,8 +38,8 @@ void RC5::enc(const byte in[], byte out[]) const *************************************************/ void RC5::dec(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[3], in[2], in[1], in[0]), - B = make_u32bit(in[7], in[6], in[5], in[4]); + u32bit A = load_le<u32bit>(in, 0), B = load_le<u32bit>(in, 1); + for(u32bit j = ROUNDS; j != 0; j -= 4) { B = rotate_right(B - S[2*j+1], A % 32) ^ A; @@ -54,10 +52,8 @@ void RC5::dec(const byte in[], byte out[]) const A = rotate_right(A - S[2*j-6], B % 32) ^ B; } B -= S[1]; A -= S[0]; - out[0] = get_byte(3, A); out[1] = get_byte(2, A); - out[2] = get_byte(1, A); out[3] = get_byte(0, A); - out[4] = get_byte(3, B); out[5] = get_byte(2, B); - out[6] = get_byte(1, B); out[7] = get_byte(0, B); + + store_le(out, A, B); } /************************************************* diff --git a/src/rc6.cpp b/src/rc6.cpp index 622cb3f16..e7c8a4725 100644 --- a/src/rc6.cpp +++ b/src/rc6.cpp @@ -14,10 +14,10 @@ namespace Botan { *************************************************/ void RC6::enc(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]), - B = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]), - C = make_u32bit(in[11], in[10], in[ 9], in[ 8]), - D = make_u32bit(in[15], in[14], in[13], in[12]); + u32bit A = load_le<u32bit>(in, 0); + u32bit B = load_le<u32bit>(in, 1); + u32bit C = load_le<u32bit>(in, 2); + u32bit D = load_le<u32bit>(in, 3); B += S[0]; D += S[1]; @@ -48,14 +48,7 @@ void RC6::enc(const byte in[], byte out[]) const A += S[42]; C += S[43]; - out[ 0] = get_byte(3, A); out[ 1] = get_byte(2, A); - out[ 2] = get_byte(1, A); out[ 3] = get_byte(0, A); - out[ 4] = get_byte(3, B); out[ 5] = get_byte(2, B); - out[ 6] = get_byte(1, B); out[ 7] = get_byte(0, B); - out[ 8] = get_byte(3, C); out[ 9] = get_byte(2, C); - out[10] = get_byte(1, C); out[11] = get_byte(0, C); - out[12] = get_byte(3, D); out[13] = get_byte(2, D); - out[14] = get_byte(1, D); out[15] = get_byte(0, D); + store_le(out, A, B, C, D); } /************************************************* @@ -63,10 +56,10 @@ void RC6::enc(const byte in[], byte out[]) const *************************************************/ void RC6::dec(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]), - B = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]), - C = make_u32bit(in[11], in[10], in[ 9], in[ 8]), - D = make_u32bit(in[15], in[14], in[13], in[12]); + u32bit A = load_le<u32bit>(in, 0); + u32bit B = load_le<u32bit>(in, 1); + u32bit C = load_le<u32bit>(in, 2); + u32bit D = load_le<u32bit>(in, 3); C -= S[43]; A -= S[42]; @@ -97,14 +90,7 @@ void RC6::dec(const byte in[], byte out[]) const D -= S[1]; B -= S[0]; - out[ 0] = get_byte(3, A); out[ 1] = get_byte(2, A); - out[ 2] = get_byte(1, A); out[ 3] = get_byte(0, A); - out[ 4] = get_byte(3, B); out[ 5] = get_byte(2, B); - out[ 6] = get_byte(1, B); out[ 7] = get_byte(0, B); - out[ 8] = get_byte(3, C); out[ 9] = get_byte(2, C); - out[10] = get_byte(1, C); out[11] = get_byte(0, C); - out[12] = get_byte(3, D); out[13] = get_byte(2, D); - out[14] = get_byte(1, D); out[15] = get_byte(0, D); + store_le(out, A, B, C, D); } /************************************************* diff --git a/src/rmd128.cpp b/src/rmd128.cpp index 1614de3a2..ea7c11342 100644 --- a/src/rmd128.cpp +++ b/src/rmd128.cpp @@ -58,7 +58,7 @@ inline void F4(u32bit& A, u32bit B, u32bit C, u32bit D, void RIPEMD_128::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - M[j] = make_u32bit(input[4*j+3], input[4*j+2], input[4*j+1], input[4*j]); + M[j] = load_le<u32bit>(input, j); u32bit A1 = digest[0], A2 = A1, B1 = digest[1], B2 = B1, C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1; @@ -145,8 +145,8 @@ void RIPEMD_128::hash(const byte input[]) *************************************************/ void RIPEMD_128::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(3 - (j % 4), digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_le(digest[j/4], output + j); } /************************************************* diff --git a/src/rmd160.cpp b/src/rmd160.cpp index 6cc80d999..e092b19a2 100644 --- a/src/rmd160.cpp +++ b/src/rmd160.cpp @@ -73,7 +73,7 @@ inline void F5(u32bit& A, u32bit B, u32bit& C, u32bit D, u32bit E, void RIPEMD_160::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - M[j] = make_u32bit(input[4*j+3], input[4*j+2], input[4*j+1], input[4*j]); + M[j] = load_le<u32bit>(input, j); u32bit A1 = digest[0], A2 = A1, B1 = digest[1], B2 = B1, C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1, @@ -179,8 +179,8 @@ void RIPEMD_160::hash(const byte input[]) *************************************************/ void RIPEMD_160::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(3 - (j % 4), digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_le(digest[j/4], output + j); } /************************************************* diff --git a/src/seed.cpp b/src/seed.cpp index 84d033c06..9ed05b28f 100644 --- a/src/seed.cpp +++ b/src/seed.cpp @@ -22,10 +22,10 @@ u32bit SEED::G_FUNC::operator()(u32bit X) const *************************************************/ void SEED::enc(const byte in[], byte out[]) const { - u32bit B0 = make_u32bit(in[ 0], in[ 1], in[ 2], in[ 3]), - B1 = make_u32bit(in[ 4], in[ 5], in[ 6], in[ 7]), - B2 = make_u32bit(in[ 8], in[ 9], in[10], in[11]), - B3 = make_u32bit(in[12], in[13], in[14], in[15]); + u32bit B0 = load_be<u32bit>(in, 0); + u32bit B1 = load_be<u32bit>(in, 1); + u32bit B2 = load_be<u32bit>(in, 2); + u32bit B3 = load_be<u32bit>(in, 3); G_FUNC G; @@ -48,14 +48,7 @@ void SEED::enc(const byte in[], byte out[]) const B2 ^= T0 + T1; } - out[ 0] = get_byte(0, B2); out[ 1] = get_byte(1, B2); - out[ 2] = get_byte(2, B2); out[ 3] = get_byte(3, B2); - out[ 4] = get_byte(0, B3); out[ 5] = get_byte(1, B3); - out[ 6] = get_byte(2, B3); out[ 7] = get_byte(3, B3); - out[ 8] = get_byte(0, B0); out[ 9] = get_byte(1, B0); - out[10] = get_byte(2, B0); out[11] = get_byte(3, B0); - out[12] = get_byte(0, B1); out[13] = get_byte(1, B1); - out[14] = get_byte(2, B1); out[15] = get_byte(3, B1); + store_be(out, B2, B3, B0, B1); } /************************************************* @@ -63,10 +56,10 @@ void SEED::enc(const byte in[], byte out[]) const *************************************************/ void SEED::dec(const byte in[], byte out[]) const { - u32bit B0 = make_u32bit(in[ 0], in[ 1], in[ 2], in[ 3]), - B1 = make_u32bit(in[ 4], in[ 5], in[ 6], in[ 7]), - B2 = make_u32bit(in[ 8], in[ 9], in[10], in[11]), - B3 = make_u32bit(in[12], in[13], in[14], in[15]); + u32bit B0 = load_be<u32bit>(in, 0); + u32bit B1 = load_be<u32bit>(in, 1); + u32bit B2 = load_be<u32bit>(in, 2); + u32bit B3 = load_be<u32bit>(in, 3); G_FUNC G; @@ -89,14 +82,7 @@ void SEED::dec(const byte in[], byte out[]) const B2 ^= T0 + T1; } - out[ 0] = get_byte(0, B2); out[ 1] = get_byte(1, B2); - out[ 2] = get_byte(2, B2); out[ 3] = get_byte(3, B2); - out[ 4] = get_byte(0, B3); out[ 5] = get_byte(1, B3); - out[ 6] = get_byte(2, B3); out[ 7] = get_byte(3, B3); - out[ 8] = get_byte(0, B0); out[ 9] = get_byte(1, B0); - out[10] = get_byte(2, B0); out[11] = get_byte(3, B0); - out[12] = get_byte(0, B1); out[13] = get_byte(1, B1); - out[14] = get_byte(2, B1); out[15] = get_byte(3, B1); + store_be(out, B2, B3, B0, B1); } /************************************************* @@ -114,7 +100,7 @@ void SEED::key(const byte key[], u32bit) SecureBuffer<u32bit, 4> WK; for(u32bit j = 0; j != 4; ++j) - WK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + WK[j] = load_be<u32bit>(key, j); G_FUNC G; diff --git a/src/serpent.cpp b/src/serpent.cpp index d17c8d0a7..6bd7132a8 100644 --- a/src/serpent.cpp +++ b/src/serpent.cpp @@ -242,10 +242,11 @@ inline void i_transform(u32bit& B0, u32bit& B1, u32bit& B2, u32bit& B3) *************************************************/ void Serpent::enc(const byte in[], byte out[]) const { - u32bit B0 = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]), - B1 = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]), - B2 = make_u32bit(in[11], in[10], in[ 9], in[ 8]), - B3 = make_u32bit(in[15], in[14], in[13], in[12]); + u32bit B0 = load_le<u32bit>(in, 0); + u32bit B1 = load_le<u32bit>(in, 1); + u32bit B2 = load_le<u32bit>(in, 2); + u32bit B3 = load_le<u32bit>(in, 3); + key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -278,14 +279,8 @@ void Serpent::enc(const byte in[], byte out[]) const key_xor(29,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); - out[ 0] = get_byte(3, B0); out[ 1] = get_byte(2, B0); - out[ 2] = get_byte(1, B0); out[ 3] = get_byte(0, B0); - out[ 4] = get_byte(3, B1); out[ 5] = get_byte(2, B1); - out[ 6] = get_byte(1, B1); out[ 7] = get_byte(0, B1); - out[ 8] = get_byte(3, B2); out[ 9] = get_byte(2, B2); - out[10] = get_byte(1, B2); out[11] = get_byte(0, B2); - out[12] = get_byte(3, B3); out[13] = get_byte(2, B3); - out[14] = get_byte(1, B3); out[15] = get_byte(0, B3); + + store_le(out, B0, B1, B2, B3); } /************************************************* @@ -293,10 +288,11 @@ void Serpent::enc(const byte in[], byte out[]) const *************************************************/ void Serpent::dec(const byte in[], byte out[]) const { - u32bit B0 = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]), - B1 = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]), - B2 = make_u32bit(in[11], in[10], in[ 9], in[ 8]), - B3 = make_u32bit(in[15], in[14], in[13], in[12]); + u32bit B0 = load_le<u32bit>(in, 0); + u32bit B1 = load_le<u32bit>(in, 1); + u32bit B2 = load_le<u32bit>(in, 2); + u32bit B3 = load_le<u32bit>(in, 3); + key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); @@ -329,14 +325,8 @@ void Serpent::dec(const byte in[], byte out[]) const i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); - out[ 0] = get_byte(3, B0); out[ 1] = get_byte(2, B0); - out[ 2] = get_byte(1, B0); out[ 3] = get_byte(0, B0); - out[ 4] = get_byte(3, B1); out[ 5] = get_byte(2, B1); - out[ 6] = get_byte(1, B1); out[ 7] = get_byte(0, B1); - out[ 8] = get_byte(3, B2); out[ 9] = get_byte(2, B2); - out[10] = get_byte(1, B2); out[11] = get_byte(0, B2); - out[12] = get_byte(3, B3); out[13] = get_byte(2, B3); - out[14] = get_byte(1, B3); out[15] = get_byte(0, B3); + + store_le(out, B0, B1, B2, B3); } /************************************************* @@ -348,7 +338,8 @@ void Serpent::key(const byte key[], u32bit length) SecureBuffer<u32bit, 140> W; for(u32bit j = 0; j != length / 4; ++j) - W[j] = make_u32bit(key[4*j+3], key[4*j+2], key[4*j+1], key[4*j]); + W[j] = load_le<u32bit>(key, j); + W[length / 4] |= u32bit(1) << ((length%4)*8); for(u32bit j = 8; j != 140; ++j) W[j] = rotate_left(W[j-8] ^ W[j-5] ^ W[j-3] ^ W[j-1] ^ PHI ^ (j-8), 11); diff --git a/src/sha160.cpp b/src/sha160.cpp index 9f6ba6960..7581f3ea0 100644 --- a/src/sha160.cpp +++ b/src/sha160.cpp @@ -54,7 +54,7 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, u32bit msg) void SHA_160::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - W[j] = make_u32bit(input[4*j], input[4*j+1], input[4*j+2], input[4*j+3]); + W[j] = load_be<u32bit>(input, j); for(u32bit j = 16; j != 80; ++j) W[j] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1); @@ -102,8 +102,8 @@ void SHA_160::hash(const byte input[]) *************************************************/ void SHA_160::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(j % 4, digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_be(digest[j/4], output + j); } /************************************************* diff --git a/src/sha256.cpp b/src/sha256.cpp index 1a98d4560..ae9849a57 100644 --- a/src/sha256.cpp +++ b/src/sha256.cpp @@ -47,7 +47,7 @@ inline void F1(u32bit A, u32bit B, u32bit C, u32bit& D, void SHA_256::hash(const byte input[]) { for(u32bit j = 0; j != 16; ++j) - W[j] = make_u32bit(input[4*j], input[4*j+1], input[4*j+2], input[4*j+3]); + W[j] = load_be<u32bit>(input, j); for(u32bit j = 16; j != 64; ++j) W[j] = sigma(W[j- 2], 17, 19, 10) + W[j- 7] + sigma(W[j-15], 7, 18, 3) + W[j-16]; @@ -99,8 +99,8 @@ void SHA_256::hash(const byte input[]) *************************************************/ void SHA_256::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(j % 4, digest[j/4]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4) + store_be(digest[j/4], output + j); } /************************************************* diff --git a/src/skipjack.cpp b/src/skipjack.cpp index 35d0e6010..969841b53 100644 --- a/src/skipjack.cpp +++ b/src/skipjack.cpp @@ -13,8 +13,10 @@ namespace Botan { *************************************************/ void Skipjack::enc(const byte in[], byte out[]) const { - u16bit W1 = make_u16bit(in[7], in[6]), W2 = make_u16bit(in[5], in[4]), - W3 = make_u16bit(in[3], in[2]), W4 = make_u16bit(in[1], in[0]); + u16bit W1 = load_le<u16bit>(in, 3); + u16bit W2 = load_le<u16bit>(in, 2); + u16bit W3 = load_le<u16bit>(in, 1); + u16bit W4 = load_le<u16bit>(in, 0); step_A(W1,W4, 1); step_A(W4,W3, 2); step_A(W3,W2, 3); step_A(W2,W1, 4); step_A(W1,W4, 5); step_A(W4,W3, 6); step_A(W3,W2, 7); step_A(W2,W1, 8); @@ -28,10 +30,7 @@ void Skipjack::enc(const byte in[], byte out[]) const step_B(W1,W2,25); step_B(W4,W1,26); step_B(W3,W4,27); step_B(W2,W3,28); step_B(W1,W2,29); step_B(W4,W1,30); step_B(W3,W4,31); step_B(W2,W3,32); - out[0] = get_byte(1, W4); out[1] = get_byte(0, W4); - out[2] = get_byte(1, W3); out[3] = get_byte(0, W3); - out[4] = get_byte(1, W2); out[5] = get_byte(0, W2); - out[6] = get_byte(1, W1); out[7] = get_byte(0, W1); + store_le(out, W4, W3, W2, W1); } /************************************************* @@ -39,8 +38,10 @@ void Skipjack::enc(const byte in[], byte out[]) const *************************************************/ void Skipjack::dec(const byte in[], byte out[]) const { - u16bit W1 = make_u16bit(in[7], in[6]), W2 = make_u16bit(in[5], in[4]), - W3 = make_u16bit(in[3], in[2]), W4 = make_u16bit(in[1], in[0]); + u16bit W1 = load_le<u16bit>(in, 3); + u16bit W2 = load_le<u16bit>(in, 2); + u16bit W3 = load_le<u16bit>(in, 1); + u16bit W4 = load_le<u16bit>(in, 0); step_Bi(W2,W3,32); step_Bi(W3,W4,31); step_Bi(W4,W1,30); step_Bi(W1,W2,29); step_Bi(W2,W3,28); step_Bi(W3,W4,27); step_Bi(W4,W1,26); step_Bi(W1,W2,25); @@ -54,10 +55,7 @@ void Skipjack::dec(const byte in[], byte out[]) const step_Ai(W1,W2, 8); step_Ai(W2,W3, 7); step_Ai(W3,W4, 6); step_Ai(W4,W1, 5); step_Ai(W1,W2, 4); step_Ai(W2,W3, 3); step_Ai(W3,W4, 2); step_Ai(W4,W1, 1); - out[0] = get_byte(1, W4); out[1] = get_byte(0, W4); - out[2] = get_byte(1, W3); out[3] = get_byte(0, W3); - out[4] = get_byte(1, W2); out[5] = get_byte(0, W2); - out[6] = get_byte(1, W1); out[7] = get_byte(0, W1); + store_le(out, W4, W3, W2, W1); } /************************************************* diff --git a/src/square.cpp b/src/square.cpp index 7d7cf1da5..988e56ef5 100644 --- a/src/square.cpp +++ b/src/square.cpp @@ -117,7 +117,7 @@ void Square::key(const byte key[], u32bit) { SecureBuffer<u32bit, 36> XEK, XDK; for(u32bit j = 0; j != 4; ++j) - XEK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + XEK[j] = load_be<u32bit>(key, j); for(u32bit j = 0; j != 8; ++j) { XEK[4*j+4] = XEK[4*j ] ^ rotate_left(XEK[4*j+3], 8) ^ (0x01000000 << j); @@ -149,27 +149,25 @@ void Square::transform(u32bit round_key[4]) { 0x03, 0x02, 0x01, 0x01 }, { 0x01, 0x03, 0x02, 0x01 }, { 0x01, 0x01, 0x03, 0x02 } }; - SecureBuffer<byte, 4> A[4], B[4]; - for(u32bit j = 0; j != 4; ++j) - for(u32bit k = 0; k != 4; ++k) - A[j][k] = get_byte(k, round_key[j]); + for(u32bit j = 0; j != 4; ++j) + { + SecureBuffer<byte, 4> A, B; + + store_be(round_key[j], A); + for(u32bit k = 0; k != 4; ++k) for(u32bit l = 0; l != 4; ++l) - B[j][k] ^= mul(A[j][l], G[l][k]); - for(u32bit j = 0; j != 4; ++j) - round_key[j] = make_u32bit(B[j][0], B[j][1], B[j][2], B[j][3]); - } + { + const byte a = A[l]; + const byte b = G[l][k]; -/************************************************* -* Multiply in GF(2^8) * -*************************************************/ -byte Square::mul(byte a, byte b) - { - if(a && b) - return ALog[(Log[a] + Log[b]) % 255]; - else - return 0; + if(a && b) + B[k] ^= ALog[(Log[a] + Log[b]) % 255]; + } + + round_key[j] = load_be<u32bit>(B.begin(), 0); + } } /************************************************* diff --git a/src/tea.cpp b/src/tea.cpp index 9b04aba11..aa04b1df8 100644 --- a/src/tea.cpp +++ b/src/tea.cpp @@ -13,8 +13,8 @@ namespace Botan { *************************************************/ void TEA::enc(const byte in[], byte out[]) const { - u32bit left = make_u32bit(in[0], in[1], in[2], in[3]), - right = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit left = load_be<u32bit>(in, 0), right = load_be<u32bit>(in, 1); + u32bit sum = 0; for(u32bit j = 0; j != 32; ++j) { @@ -22,10 +22,8 @@ void TEA::enc(const byte in[], byte out[]) const left += ((right << 4) + K[0]) ^ (right + sum) ^ ((right >> 5) + K[1]); right += ((left << 4) + K[2]) ^ (left + sum) ^ ((left >> 5) + K[3]); } - out[0] = get_byte(0, left); out[1] = get_byte(1, left); - out[2] = get_byte(2, left); out[3] = get_byte(3, left); - out[4] = get_byte(0, right); out[5] = get_byte(1, right); - out[6] = get_byte(2, right); out[7] = get_byte(3, right); + + store_be(out, left, right); } /************************************************* @@ -33,8 +31,8 @@ void TEA::enc(const byte in[], byte out[]) const *************************************************/ void TEA::dec(const byte in[], byte out[]) const { - u32bit left = make_u32bit(in[0], in[1], in[2], in[3]), - right = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit left = load_be<u32bit>(in, 0), right = load_be<u32bit>(in, 1); + u32bit sum = 0xC6EF3720; for(u32bit j = 0; j != 32; ++j) { @@ -42,10 +40,8 @@ void TEA::dec(const byte in[], byte out[]) const left -= ((right << 4) + K[0]) ^ (right + sum) ^ ((right >> 5) + K[1]); sum -= 0x9E3779B9; } - out[0] = get_byte(0, left); out[1] = get_byte(1, left); - out[2] = get_byte(2, left); out[3] = get_byte(3, left); - out[4] = get_byte(0, right); out[5] = get_byte(1, right); - out[6] = get_byte(2, right); out[7] = get_byte(3, right); + + store_be(out, left, right); } /************************************************* @@ -54,7 +50,7 @@ void TEA::dec(const byte in[], byte out[]) const void TEA::key(const byte key[], u32bit) { for(u32bit j = 0; j != 4; ++j) - K[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + K[j] = load_be<u32bit>(key, j); } } diff --git a/src/tiger.cpp b/src/tiger.cpp index 3df507853..a4dd657b2 100644 --- a/src/tiger.cpp +++ b/src/tiger.cpp @@ -15,9 +15,8 @@ namespace Botan { void Tiger::hash(const byte input[]) { for(u32bit j = 0; j != 8; ++j) - X[j] = make_u64bit(input[8*j+7], input[8*j+6], input[8*j+5], - input[8*j+4], input[8*j+3], input[8*j+2], - input[8*j+1], input[8*j]); + X[j] = load_le<u64bit>(input, j); + u64bit A = digest[0], B = digest[1], C = digest[2]; pass(A, B, C, X, 5); mix(X); diff --git a/src/twofish.cpp b/src/twofish.cpp index 08d2e93a6..25359f635 100644 --- a/src/twofish.cpp +++ b/src/twofish.cpp @@ -13,12 +13,10 @@ namespace Botan { *************************************************/ void Twofish::enc(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]), - B = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]), - C = make_u32bit(in[11], in[10], in[ 9], in[ 8]), - D = make_u32bit(in[15], in[14], in[13], in[12]); - - A ^= round_key[0]; B ^= round_key[1]; C ^= round_key[2]; D ^= round_key[3]; + u32bit A = load_le<u32bit>(in, 0) ^ round_key[0]; + u32bit B = load_le<u32bit>(in, 1) ^ round_key[1]; + u32bit C = load_le<u32bit>(in, 2) ^ round_key[2]; + u32bit D = load_le<u32bit>(in, 3) ^ round_key[3]; for(u32bit j = 0; j != 16; j += 2) { @@ -47,16 +45,12 @@ void Twofish::enc(const byte in[], byte out[]) const B = rotate_left(B, 1) ^ Y; } - C ^= round_key[4]; D ^= round_key[5]; A ^= round_key[6]; B ^= round_key[7]; + C ^= round_key[4]; + D ^= round_key[5]; + A ^= round_key[6]; + B ^= round_key[7]; - out[ 0] = get_byte(3, C); out[ 1] = get_byte(2, C); - out[ 2] = get_byte(1, C); out[ 3] = get_byte(0, C); - out[ 4] = get_byte(3, D); out[ 5] = get_byte(2, D); - out[ 6] = get_byte(1, D); out[ 7] = get_byte(0, D); - out[ 8] = get_byte(3, A); out[ 9] = get_byte(2, A); - out[10] = get_byte(1, A); out[11] = get_byte(0, A); - out[12] = get_byte(3, B); out[13] = get_byte(2, B); - out[14] = get_byte(1, B); out[15] = get_byte(0, B); + store_le(out, C, D, A, B); } /************************************************* @@ -64,12 +58,10 @@ void Twofish::enc(const byte in[], byte out[]) const *************************************************/ void Twofish::dec(const byte in[], byte out[]) const { - u32bit A = make_u32bit(in[ 3], in[ 2], in[ 1], in[ 0]), - B = make_u32bit(in[ 7], in[ 6], in[ 5], in[ 4]), - C = make_u32bit(in[11], in[10], in[ 9], in[ 8]), - D = make_u32bit(in[15], in[14], in[13], in[12]); - - A ^= round_key[4]; B ^= round_key[5]; C ^= round_key[6]; D ^= round_key[7]; + u32bit A = load_le<u32bit>(in, 0) ^ round_key[4]; + u32bit B = load_le<u32bit>(in, 1) ^ round_key[5]; + u32bit C = load_le<u32bit>(in, 2) ^ round_key[6]; + u32bit D = load_le<u32bit>(in, 3) ^ round_key[7]; for(u32bit j = 0; j != 16; j += 2) { @@ -100,14 +92,7 @@ void Twofish::dec(const byte in[], byte out[]) const C ^= round_key[0]; D ^= round_key[1]; A ^= round_key[2]; B ^= round_key[3]; - out[ 0] = get_byte(3, C); out[ 1] = get_byte(2, C); - out[ 2] = get_byte(1, C); out[ 3] = get_byte(0, C); - out[ 4] = get_byte(3, D); out[ 5] = get_byte(2, D); - out[ 6] = get_byte(1, D); out[ 7] = get_byte(0, D); - out[ 8] = get_byte(3, A); out[ 9] = get_byte(2, A); - out[10] = get_byte(1, A); out[11] = get_byte(0, A); - out[12] = get_byte(3, B); out[13] = get_byte(2, B); - out[14] = get_byte(1, B); out[15] = get_byte(0, B); + store_le(out, C, D, A, B); } /************************************************* diff --git a/src/whrlpool.cpp b/src/whrlpool.cpp index 48cd79e5f..960095d9b 100644 --- a/src/whrlpool.cpp +++ b/src/whrlpool.cpp @@ -22,8 +22,7 @@ void Whirlpool::hash(const byte in[]) }; for(u32bit j = 0; j != 8; ++j) - M[j] = make_u64bit(in[8*j+0], in[8*j+1], in[8*j+2], in[8*j+3], - in[8*j+4], in[8*j+5], in[8*j+6], in[8*j+7]); + M[j] = load_be<u64bit>(in, j); u64bit K0, K1, K2, K3, K4, K5, K6, K7; K0 = digest[0]; K1 = digest[1]; K2 = digest[2]; K3 = digest[3]; @@ -124,8 +123,8 @@ void Whirlpool::hash(const byte in[]) *************************************************/ void Whirlpool::copy_out(byte output[]) { - for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) - output[j] = get_byte(j % 8, digest[j/8]); + for(u32bit j = 0; j != OUTPUT_LENGTH; j += 8) + store_be(digest[j/8], output + j); } /************************************************* diff --git a/src/wid_wake.cpp b/src/wid_wake.cpp index 6002138c7..fe3fd8dab 100644 --- a/src/wid_wake.cpp +++ b/src/wid_wake.cpp @@ -30,15 +30,15 @@ void WiderWake_41_BE::cipher(const byte in[], byte out[], u32bit length) *************************************************/ void WiderWake_41_BE::generate(u32bit length) { - u32bit R0 = state[0], R1 = state[1], R2 = state[2], - R3 = state[3], R4 = state[4]; + u32bit R0 = state[0], R1 = state[1], + R2 = state[2], R3 = state[3], + R4 = state[4]; for(u32bit j = 0; j != length; j += 8) { u32bit R0a; - buffer[j+0] = get_byte(0, R3); buffer[j+1] = get_byte(1, R3); - buffer[j+2] = get_byte(2, R3); buffer[j+3] = get_byte(3, R3); + store_be(R3, buffer + j); R0a = R4 + R3; R3 += R2; R2 += R1; R1 += R0; R0a = (R0a >> 8) ^ T[(R0a & 0xFF)]; @@ -47,8 +47,7 @@ void WiderWake_41_BE::generate(u32bit length) R3 = (R3 >> 8) ^ T[(R3 & 0xFF)]; R4 = R0; R0 = R0a; - buffer[j+4] = get_byte(0, R3); buffer[j+5] = get_byte(1, R3); - buffer[j+6] = get_byte(2, R3); buffer[j+7] = get_byte(3, R3); + store_be(R3, buffer + j + 4); R0a = R4 + R3; R3 += R2; R2 += R1; R1 += R0; R0a = (R0a >> 8) ^ T[(R0a & 0xFF)]; @@ -57,7 +56,13 @@ void WiderWake_41_BE::generate(u32bit length) R3 = (R3 >> 8) ^ T[(R3 & 0xFF)]; R4 = R0; R0 = R0a; } - state[0] = R0; state[1] = R1; state[2] = R2; state[3] = R3; state[4] = R4; + + state[0] = R0; + state[1] = R1; + state[2] = R2; + state[3] = R3; + state[4] = R4; + position = 0; } @@ -67,7 +72,7 @@ void WiderWake_41_BE::generate(u32bit length) void WiderWake_41_BE::key(const byte key[], u32bit) { for(u32bit j = 0; j != 4; ++j) - t_key[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + t_key[j] = load_be<u32bit>(key, j); static const u32bit MAGIC[8] = { 0x726A8F3B, 0xE69A3B5C, 0xD3C71FE5, 0xAB3C73D2, @@ -116,9 +121,9 @@ void WiderWake_41_BE::resync(const byte iv[], u32bit length) for(u32bit j = 0; j != 4; ++j) state[j] = t_key[j]; - state[4] = make_u32bit(iv[0], iv[1], iv[2], iv[3]); + state[4] = load_be<u32bit>(iv, 0); state[0] ^= state[4]; - state[2] ^= make_u32bit(iv[4], iv[5], iv[6], iv[7]); + state[2] ^= load_be<u32bit>(iv, 1); generate(8*4); generate(buffer.size()); diff --git a/src/xtea.cpp b/src/xtea.cpp index 06626cfda..5ecf3054e 100644 --- a/src/xtea.cpp +++ b/src/xtea.cpp @@ -14,17 +14,15 @@ namespace Botan { *************************************************/ void XTEA::enc(const byte in[], byte out[]) const { - u32bit left = make_u32bit(in[0], in[1], in[2], in[3]), - right = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0), R = load_be<u32bit>(in, 1); + for(u32bit j = 0; j != 32; ++j) { - left += (((right << 4) ^ (right >> 5)) + right) ^ EK[2*j]; - right += (((left << 4) ^ (left >> 5)) + left) ^ EK[2*j+1]; + L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*j]; + R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*j+1]; } - out[0] = get_byte(0, left); out[1] = get_byte(1, left); - out[2] = get_byte(2, left); out[3] = get_byte(3, left); - out[4] = get_byte(0, right); out[5] = get_byte(1, right); - out[6] = get_byte(2, right); out[7] = get_byte(3, right); + + store_be(out, L, R); } /************************************************* @@ -32,17 +30,15 @@ void XTEA::enc(const byte in[], byte out[]) const *************************************************/ void XTEA::dec(const byte in[], byte out[]) const { - u32bit left = make_u32bit(in[0], in[1], in[2], in[3]), - right = make_u32bit(in[4], in[5], in[6], in[7]); + u32bit L = load_be<u32bit>(in, 0), R = load_be<u32bit>(in, 1); + for(u32bit j = 32; j > 0; --j) { - right -= (((left << 4) ^ (left >> 5)) + left) ^ EK[2*j - 1]; - left -= (((right << 4) ^ (right >> 5)) + right) ^ EK[2*j - 2]; + R -= (((L << 4) ^ (L >> 5)) + L) ^ EK[2*j - 1]; + L -= (((R << 4) ^ (R >> 5)) + R) ^ EK[2*j - 2]; } - out[0] = get_byte(0, left); out[1] = get_byte(1, left); - out[2] = get_byte(2, left); out[3] = get_byte(3, left); - out[4] = get_byte(0, right); out[5] = get_byte(1, right); - out[6] = get_byte(2, right); out[7] = get_byte(3, right); + + store_be(out, L, R); } /************************************************* @@ -73,7 +69,8 @@ void XTEA::key(const byte key[], u32bit) SecureBuffer<u32bit, 4> UK; for(u32bit j = 0; j != 4; ++j) - UK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + UK[j] = load_be<u32bit>(key, j); + for(u32bit j = 0; j != 64; ++j) EK[j] = DELTAS[j] + UK[KEY_INDEX[j]]; } |