aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/lib/block/aes/aes.cpp60
-rw-r--r--src/lib/block/aria/aria.cpp12
-rw-r--r--src/lib/block/camellia/camellia.cpp16
-rw-r--r--src/lib/block/cast/cast128.cpp6
-rw-r--r--src/lib/block/cast/cast256.cpp24
-rw-r--r--src/lib/block/des/des.cpp24
-rw-r--r--src/lib/block/gost_28147/gost_28147.cpp25
-rw-r--r--src/lib/block/gost_28147/gost_28147.h6
-rw-r--r--src/lib/block/kasumi/kasumi.cpp26
-rw-r--r--src/lib/block/noekeon/noekeon.cpp44
-rw-r--r--src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp38
-rw-r--r--src/lib/block/serpent/serpent.cpp22
-rw-r--r--src/lib/block/serpent/serpent_simd/serpent_simd.cpp24
-rw-r--r--src/lib/block/shacal2/shacal2.cpp12
-rw-r--r--src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp8
-rw-r--r--src/lib/block/sm4/sm4.cpp4
-rw-r--r--src/lib/block/threefish/threefish.cpp16
-rw-r--r--src/lib/block/twofish/twofish.cpp28
-rw-r--r--src/lib/hash/blake2/blake2b.cpp8
-rw-r--r--src/lib/hash/md4/md4.cpp120
-rw-r--r--src/lib/hash/md4/md4.h7
-rw-r--r--src/lib/hash/md5/md5.cpp102
-rw-r--r--src/lib/hash/rmd160/rmd160.cpp229
-rw-r--r--src/lib/hash/sha1/sha160.cpp32
-rw-r--r--src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp16
-rw-r--r--src/lib/hash/sha2_32/sha2_32.cpp8
-rw-r--r--src/lib/hash/sha2_64/sha2_64.cpp47
-rw-r--r--src/lib/hash/sha3/sha3.cpp58
-rw-r--r--src/lib/hash/sm3/sm3.cpp124
-rw-r--r--src/lib/mac/siphash/siphash.cpp12
-rw-r--r--src/lib/stream/chacha/chacha.cpp12
-rw-r--r--src/lib/stream/salsa20/salsa20.cpp20
-rw-r--r--src/lib/utils/bswap.h6
-rw-r--r--src/lib/utils/rotate.h85
-rw-r--r--src/lib/utils/simd/simd_32.h87
-rw-r--r--src/tests/test_simd.cpp32
36 files changed, 739 insertions, 661 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp
index 71a8c6a44..1893ab4a0 100644
--- a/src/lib/block/aes/aes.cpp
+++ b/src/lib/block/aes/aes.cpp
@@ -114,9 +114,9 @@ const std::vector<uint32_t>& AES_TE()
const uint32_t x = make_uint32(xtime(s), s, s, xtime3(s));
TE[i] = x;
- TE[i+256] = rotate_right(x, 8);
- TE[i+512] = rotate_right(x, 16);
- TE[i+768] = rotate_right(x, 24);
+ TE[i+256] = rotr< 8>(x);
+ TE[i+512] = rotr<16>(x);
+ TE[i+768] = rotr<24>(x);
}
return TE;
};
@@ -135,9 +135,9 @@ const std::vector<uint32_t>& AES_TD()
const uint32_t x = make_uint32(xtime14(s), xtime9(s), xtime13(s), xtime11(s));
TD[i] = x;
- TD[i+256] = rotate_right(x, 8);
- TD[i+512] = rotate_right(x, 16);
- TD[i+768] = rotate_right(x, 24);
+ TD[i+256] = rotr< 8>(x);
+ TD[i+512] = rotr<16>(x);
+ TD[i+768] = rotr<24>(x);
}
return TD;
};
@@ -188,24 +188,24 @@ void aes_encrypt_n(const uint8_t in[], uint8_t out[],
*/
uint32_t B0 = TE[get_byte(0, T0)] ^
- rotate_right(TE[get_byte(1, T1)], 8) ^
- rotate_right(TE[get_byte(2, T2)], 16) ^
- rotate_right(TE[get_byte(3, T3)], 24) ^ EK[4];
+ rotr< 8>(TE[get_byte(1, T1)]) ^
+ rotr<16>(TE[get_byte(2, T2)]) ^
+ rotr<24>(TE[get_byte(3, T3)]) ^ EK[4];
uint32_t B1 = TE[get_byte(0, T1)] ^
- rotate_right(TE[get_byte(1, T2)], 8) ^
- rotate_right(TE[get_byte(2, T3)], 16) ^
- rotate_right(TE[get_byte(3, T0)], 24) ^ EK[5];
+ rotr< 8>(TE[get_byte(1, T2)]) ^
+ rotr<16>(TE[get_byte(2, T3)]) ^
+ rotr<24>(TE[get_byte(3, T0)]) ^ EK[5];
uint32_t B2 = TE[get_byte(0, T2)] ^
- rotate_right(TE[get_byte(1, T3)], 8) ^
- rotate_right(TE[get_byte(2, T0)], 16) ^
- rotate_right(TE[get_byte(3, T1)], 24) ^ EK[6];
+ rotr< 8>(TE[get_byte(1, T3)]) ^
+ rotr<16>(TE[get_byte(2, T0)]) ^
+ rotr<24>(TE[get_byte(3, T1)]) ^ EK[6];
uint32_t B3 = TE[get_byte(0, T3)] ^
- rotate_right(TE[get_byte(1, T0)], 8) ^
- rotate_right(TE[get_byte(2, T1)], 16) ^
- rotate_right(TE[get_byte(3, T2)], 24) ^ EK[7];
+ rotr< 8>(TE[get_byte(1, T0)]) ^
+ rotr<16>(TE[get_byte(2, T1)]) ^
+ rotr<24>(TE[get_byte(3, T2)]) ^ EK[7];
for(size_t r = 2*4; r < EK.size(); r += 2*4)
{
@@ -276,24 +276,24 @@ void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks,
T0 ^= Z;
uint32_t B0 = TD[get_byte(0, T0)] ^
- rotate_right(TD[get_byte(1, T3)], 8) ^
- rotate_right(TD[get_byte(2, T2)], 16) ^
- rotate_right(TD[get_byte(3, T1)], 24) ^ DK[4];
+ rotr< 8>(TD[get_byte(1, T3)]) ^
+ rotr<16>(TD[get_byte(2, T2)]) ^
+ rotr<24>(TD[get_byte(3, T1)]) ^ DK[4];
uint32_t B1 = TD[get_byte(0, T1)] ^
- rotate_right(TD[get_byte(1, T0)], 8) ^
- rotate_right(TD[get_byte(2, T3)], 16) ^
- rotate_right(TD[get_byte(3, T2)], 24) ^ DK[5];
+ rotr< 8>(TD[get_byte(1, T0)]) ^
+ rotr<16>(TD[get_byte(2, T3)]) ^
+ rotr<24>(TD[get_byte(3, T2)]) ^ DK[5];
uint32_t B2 = TD[get_byte(0, T2)] ^
- rotate_right(TD[get_byte(1, T1)], 8) ^
- rotate_right(TD[get_byte(2, T0)], 16) ^
- rotate_right(TD[get_byte(3, T3)], 24) ^ DK[6];
+ rotr< 8>(TD[get_byte(1, T1)]) ^
+ rotr<16>(TD[get_byte(2, T0)]) ^
+ rotr<24>(TD[get_byte(3, T3)]) ^ DK[6];
uint32_t B3 = TD[get_byte(0, T3)] ^
- rotate_right(TD[get_byte(1, T2)], 8) ^
- rotate_right(TD[get_byte(2, T1)], 16) ^
- rotate_right(TD[get_byte(3, T0)], 24) ^ DK[7];
+ rotr< 8>(TD[get_byte(1, T2)]) ^
+ rotr<16>(TD[get_byte(2, T1)]) ^
+ rotr<24>(TD[get_byte(3, T0)]) ^ DK[7];
for(size_t r = 2*4; r < DK.size(); r += 2*4)
{
diff --git a/src/lib/block/aria/aria.cpp b/src/lib/block/aria/aria.cpp
index 5b449722a..1583dd7d3 100644
--- a/src/lib/block/aria/aria.cpp
+++ b/src/lib/block/aria/aria.cpp
@@ -183,7 +183,7 @@ inline void ARIA_FO(uint32_t& T0, uint32_t& T1, uint32_t& T2, uint32_t& T3)
T1 ^= T2;
T1 = ((T1 << 8) & 0xFF00FF00) | ((T1 >> 8) & 0x00FF00FF);
- T2 = rotate_right(T2, 16);
+ T2 = rotr<16>(T2);
T3 = reverse_bytes(T3);
T1 ^= T2;
@@ -205,7 +205,7 @@ inline void ARIA_FE(uint32_t& T0, uint32_t& T1, uint32_t& T2, uint32_t& T3)
T1 ^= T2;
T3 = ((T3 << 8) & 0xFF00FF00) | ((T3 >> 8) & 0x00FF00FF);
- T0 = rotate_right(T0, 16);
+ T0 = rotr<16>(T0);
T1 = reverse_bytes(T1);
T1 ^= T2;
@@ -411,9 +411,9 @@ void key_schedule(secure_vector<uint32_t>& ERK,
{
for(size_t j = 0; j != 4; ++j)
{
- DRK[i+j] = rotate_right(DRK[i+j], 8) ^
- rotate_right(DRK[i+j], 16) ^
- rotate_right(DRK[i+j], 24);
+ DRK[i+j] = rotr<8>(DRK[i+j]) ^
+ rotr<16>(DRK[i+j]) ^
+ rotr<24>(DRK[i+j]);
}
DRK[i+1] ^= DRK[i+2]; DRK[i+2] ^= DRK[i+3];
@@ -421,7 +421,7 @@ void key_schedule(secure_vector<uint32_t>& ERK,
DRK[i+2] ^= DRK[i+0]; DRK[i+1] ^= DRK[i+2];
DRK[i+1] = ((DRK[i+1] << 8) & 0xFF00FF00) | ((DRK[i+1] >> 8) & 0x00FF00FF);
- DRK[i+2] = rotate_right(DRK[i+2], 16);
+ DRK[i+2] = rotr<16>(DRK[i+2]);
DRK[i+3] = reverse_bytes(DRK[i+3]);
DRK[i+1] ^= DRK[i+2]; DRK[i+2] ^= DRK[i+3];
diff --git a/src/lib/block/camellia/camellia.cpp b/src/lib/block/camellia/camellia.cpp
index ea84fa313..89db6f8b9 100644
--- a/src/lib/block/camellia/camellia.cpp
+++ b/src/lib/block/camellia/camellia.cpp
@@ -577,12 +577,12 @@ uint64_t F_SLOW(uint64_t v, uint64_t K)
const uint64_t x = v ^ K;
const uint8_t t1 = SBOX[get_byte(0, x)];
- const uint8_t t2 = rotate_left(SBOX[get_byte(1, x)], 1);
- const uint8_t t3 = rotate_left(SBOX[get_byte(2, x)], 7);
- const uint8_t t4 = SBOX[rotate_left(get_byte(3, x), 1)];
- const uint8_t t5 = rotate_left(SBOX[get_byte(4, x)], 1);
- const uint8_t t6 = rotate_left(SBOX[get_byte(5, x)], 7);
- const uint8_t t7 = SBOX[rotate_left(get_byte(6, x), 1)];
+ const uint8_t t2 = rotl<1>(SBOX[get_byte(1, x)]);
+ const uint8_t t3 = rotl<7>(SBOX[get_byte(2, x)]);
+ const uint8_t t4 = SBOX[rotl<1>(get_byte(3, x))];
+ const uint8_t t5 = rotl<1>(SBOX[get_byte(4, x)]);
+ const uint8_t t6 = rotl<7>(SBOX[get_byte(5, x)]);
+ const uint8_t t7 = SBOX[rotl<1>(get_byte(6, x))];
const uint8_t t8 = SBOX[get_byte(7, x)];
const uint8_t y1 = t1 ^ t3 ^ t4 ^ t6 ^ t7 ^ t8;
@@ -619,7 +619,7 @@ inline uint64_t FL(uint64_t v, uint64_t K)
const uint32_t k1 = static_cast<uint32_t>(K >> 32);
const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF);
- x2 ^= rotate_left(x1 & k1, 1);
+ x2 ^= rotl<1>(x1 & k1);
x1 ^= (x2 | k2);
return ((static_cast<uint64_t>(x1) << 32) | x2);
@@ -634,7 +634,7 @@ inline uint64_t FLINV(uint64_t v, uint64_t K)
const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF);
x1 ^= (x2 | k2);
- x2 ^= rotate_left(x1 & k1, 1);
+ x2 ^= rotl<1>(x1 & k1);
return ((static_cast<uint64_t>(x1) << 32) | x2);
}
diff --git a/src/lib/block/cast/cast128.cpp b/src/lib/block/cast/cast128.cpp
index d6ded8cd2..d54d0614e 100644
--- a/src/lib/block/cast/cast128.cpp
+++ b/src/lib/block/cast/cast128.cpp
@@ -18,7 +18,7 @@ namespace {
*/
inline uint32_t R1(uint32_t R, uint32_t MK, uint8_t RK)
{
- uint32_t T = rotate_left(MK + R, RK);
+ const uint32_t T = rotl_var(MK + R, RK);
return (CAST_SBOX1[get_byte(0, T)] ^ CAST_SBOX2[get_byte(1, T)]) -
CAST_SBOX3[get_byte(2, T)] + CAST_SBOX4[get_byte(3, T)];
}
@@ -28,7 +28,7 @@ inline uint32_t R1(uint32_t R, uint32_t MK, uint8_t RK)
*/
inline uint32_t R2(uint32_t R, uint32_t MK, uint8_t RK)
{
- uint32_t T = rotate_left(MK ^ R, RK);
+ const uint32_t T = rotl_var(MK ^ R, RK);
return (CAST_SBOX1[get_byte(0, T)] - CAST_SBOX2[get_byte(1, T)] +
CAST_SBOX3[get_byte(2, T)]) ^ CAST_SBOX4[get_byte(3, T)];
}
@@ -38,7 +38,7 @@ inline uint32_t R2(uint32_t R, uint32_t MK, uint8_t RK)
*/
inline uint32_t R3(uint32_t R, uint32_t MK, uint8_t RK)
{
- uint32_t T = rotate_left(MK - R, RK);
+ const uint32_t T = rotl_var(MK - R, RK);
return ((CAST_SBOX1[get_byte(0, T)] + CAST_SBOX2[get_byte(1, T)]) ^
CAST_SBOX3[get_byte(2, T)]) - CAST_SBOX4[get_byte(3, T)];
}
diff --git a/src/lib/block/cast/cast256.cpp b/src/lib/block/cast/cast256.cpp
index a4a7dbd36..b4aa49166 100644
--- a/src/lib/block/cast/cast256.cpp
+++ b/src/lib/block/cast/cast256.cpp
@@ -16,31 +16,31 @@ namespace {
/*
* CAST-256 Round Type 1
*/
-void round1(uint32_t& out, uint32_t in, uint32_t mask, uint32_t rot)
+void round1(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK)
{
- uint32_t temp = rotate_left(mask + in, rot);
- out ^= (CAST_SBOX1[get_byte(0, temp)] ^ CAST_SBOX2[get_byte(1, temp)]) -
- CAST_SBOX3[get_byte(2, temp)] + CAST_SBOX4[get_byte(3, temp)];
+ const uint32_t T = rotl_var(MK + in, RK);
+ out ^= (CAST_SBOX1[get_byte(0, T)] ^ CAST_SBOX2[get_byte(1, T)]) -
+ CAST_SBOX3[get_byte(2, T)] + CAST_SBOX4[get_byte(3, T)];
}
/*
* CAST-256 Round Type 2
*/
-void round2(uint32_t& out, uint32_t in, uint32_t mask, uint32_t rot)
+void round2(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK)
{
- uint32_t temp = rotate_left(mask ^ in, rot);
- out ^= (CAST_SBOX1[get_byte(0, temp)] - CAST_SBOX2[get_byte(1, temp)] +
- CAST_SBOX3[get_byte(2, temp)]) ^ CAST_SBOX4[get_byte(3, temp)];
+ const uint32_t T = rotl_var(MK ^ in, RK);
+ out ^= (CAST_SBOX1[get_byte(0, T)] - CAST_SBOX2[get_byte(1, T)] +
+ CAST_SBOX3[get_byte(2, T)]) ^ CAST_SBOX4[get_byte(3, T)];
}
/*
* CAST-256 Round Type 3
*/
-void round3(uint32_t& out, uint32_t in, uint32_t mask, uint32_t rot)
+void round3(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK)
{
- uint32_t temp = rotate_left(mask - in, rot);
- out ^= ((CAST_SBOX1[get_byte(0, temp)] + CAST_SBOX2[get_byte(1, temp)]) ^
- CAST_SBOX3[get_byte(2, temp)]) - CAST_SBOX4[get_byte(3, temp)];
+ const uint32_t T = rotl_var(MK - in, RK);
+ out ^= ((CAST_SBOX1[get_byte(0, T)] + CAST_SBOX2[get_byte(1, T)]) ^
+ CAST_SBOX3[get_byte(2, T)]) - CAST_SBOX4[get_byte(3, T)];
}
}
diff --git a/src/lib/block/des/des.cpp b/src/lib/block/des/des.cpp
index 44f315047..15c2adb66 100644
--- a/src/lib/block/des/des.cpp
+++ b/src/lib/block/des/des.cpp
@@ -91,16 +91,16 @@ void des_encrypt(uint32_t& L, uint32_t& R,
{
uint32_t T0, T1;
- T0 = rotate_right(R, 4) ^ round_key[2*i];
- T1 = R ^ round_key[2*i + 1];
+ T0 = rotr<4>(R) ^ round_key[2*i];
+ T1 = R ^ round_key[2*i + 1];
L ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^
DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^
DES_SPBOX5[get_byte(2, T0)] ^ DES_SPBOX6[get_byte(2, T1)] ^
DES_SPBOX7[get_byte(3, T0)] ^ DES_SPBOX8[get_byte(3, T1)];
- T0 = rotate_right(L, 4) ^ round_key[2*i + 2];
- T1 = L ^ round_key[2*i + 3];
+ T0 = rotr<4>(L) ^ round_key[2*i + 2];
+ T1 = L ^ round_key[2*i + 3];
R ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^
DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^
@@ -119,16 +119,16 @@ void des_decrypt(uint32_t& L, uint32_t& R,
{
uint32_t T0, T1;
- T0 = rotate_right(R, 4) ^ round_key[2*i - 2];
- T1 = R ^ round_key[2*i - 1];
+ T0 = rotr<4>(R) ^ round_key[2*i - 2];
+ T1 = R ^ round_key[2*i - 1];
L ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^
DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^
DES_SPBOX5[get_byte(2, T0)] ^ DES_SPBOX6[get_byte(2, T1)] ^
DES_SPBOX7[get_byte(3, T0)] ^ DES_SPBOX8[get_byte(3, T1)];
- T0 = rotate_right(L, 4) ^ round_key[2*i - 4];
- T1 = L ^ round_key[2*i - 3];
+ T0 = rotr<4>(L) ^ round_key[2*i - 4];
+ T1 = L ^ round_key[2*i - 3];
R ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^
DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^
@@ -160,7 +160,7 @@ void DES::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
(DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) |
(DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) |
(DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] );
- T = rotate_left(T, 32);
+ T = rotl<32>(T);
store_be(T, out + 8*i);
}
@@ -188,7 +188,7 @@ void DES::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
(DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) |
(DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] );
- T = rotate_left(T, 32);
+ T = rotl<32>(T);
store_be(T, out + BLOCK_SIZE*i);
}
@@ -232,7 +232,7 @@ void TripleDES::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) cons
(DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) |
(DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] );
- T = rotate_left(T, 32);
+ T = rotl<32>(T);
store_be(T, out);
@@ -265,7 +265,7 @@ void TripleDES::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) cons
(DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) |
(DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] );
- T = rotate_left(T, 32);
+ T = rotl<32>(T);
store_be(T, out);
diff --git a/src/lib/block/gost_28147/gost_28147.cpp b/src/lib/block/gost_28147/gost_28147.cpp
index f73ac5910..ffe9b5d66 100644
--- a/src/lib/block/gost_28147/gost_28147.cpp
+++ b/src/lib/block/gost_28147/gost_28147.cpp
@@ -12,11 +12,17 @@ namespace Botan {
uint8_t GOST_28147_89_Params::sbox_entry(size_t row, size_t col) const
{
- uint8_t x = m_sboxes[4 * col + (row / 2)];
-
+ const uint8_t x = m_sboxes[4 * col + (row / 2)];
return (row % 2 == 0) ? (x >> 4) : (x & 0x0F);
}
+uint8_t GOST_28147_89_Params::sbox_pair(size_t row, size_t col) const
+ {
+ const uint8_t x = m_sboxes[4 * (col % 16) + row];
+ const uint8_t y = m_sboxes[4 * (col / 16) + row];
+ return (x >> 4) | (y << 4);
+ }
+
GOST_28147_89_Params::GOST_28147_89_Params(const std::string& n) : m_name(n)
{
// Encoded in the packed fromat from RFC 4357
@@ -53,13 +59,14 @@ GOST_28147_89_Params::GOST_28147_89_Params(const std::string& n) : m_name(n)
GOST_28147_89::GOST_28147_89(const GOST_28147_89_Params& param) : m_SBOX(1024)
{
// Convert the parallel 4x4 sboxes into larger word-based sboxes
- for(size_t i = 0; i != 4; ++i)
- for(size_t j = 0; j != 256; ++j)
- {
- const uint32_t T = (param.sbox_entry(2*i , j % 16)) |
- (param.sbox_entry(2*i+1, j / 16) << 4);
- m_SBOX[256*i+j] = rotate_left(T, (11+8*i) % 32);
- }
+
+ for(size_t i = 0; i != 256; ++i)
+ {
+ m_SBOX[i ] = rotl<11, uint32_t>(param.sbox_pair(0, i));
+ m_SBOX[i+256] = rotl<19, uint32_t>(param.sbox_pair(1, i));
+ m_SBOX[i+512] = rotl<27, uint32_t>(param.sbox_pair(2, i));
+ m_SBOX[i+768] = rotl< 3, uint32_t>(param.sbox_pair(3, i));
+ }
}
std::string GOST_28147_89::name() const
diff --git a/src/lib/block/gost_28147/gost_28147.h b/src/lib/block/gost_28147/gost_28147.h
index 09581191e..34e45779b 100644
--- a/src/lib/block/gost_28147/gost_28147.h
+++ b/src/lib/block/gost_28147/gost_28147.h
@@ -34,6 +34,12 @@ class BOTAN_PUBLIC_API(2,0) GOST_28147_89_Params final
std::string param_name() const { return m_name; }
/**
+ * Return a representation used for building larger tables
+ * For internal use
+ */
+ uint8_t sbox_pair(size_t row, size_t col) const;
+
+ /**
* Default GOST parameters are the ones given in GOST R 34.11 for
* testing purposes; these sboxes are also used by Crypto++, and,
* at least according to Wikipedia, the Central Bank of Russian
diff --git a/src/lib/block/kasumi/kasumi.cpp b/src/lib/block/kasumi/kasumi.cpp
index ed2524e0b..a9b5d8274 100644
--- a/src/lib/block/kasumi/kasumi.cpp
+++ b/src/lib/block/kasumi/kasumi.cpp
@@ -121,8 +121,8 @@ void KASUMI::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint16_t* K = &m_EK[8*j];
- uint16_t R = B1 ^ (rotate_left(B0, 1) & K[0]);
- uint16_t L = B0 ^ (rotate_left(R, 1) | K[1]);
+ uint16_t R = B1 ^ (rotl<1>(B0) & K[0]);
+ uint16_t L = B0 ^ (rotl<1>(R) | K[1]);
L = FI(L ^ K[ 2], K[ 3]) ^ R;
R = FI(R ^ K[ 4], K[ 5]) ^ L;
@@ -135,8 +135,8 @@ void KASUMI::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
L = FI(L ^ K[12], K[13]) ^ R;
R = FI(R ^ K[14], K[15]) ^ L;
- R ^= (rotate_left(L, 1) & K[8]);
- L ^= (rotate_left(R, 1) | K[9]);
+ R ^= (rotl<1>(L) & K[8]);
+ L ^= (rotl<1>(R) | K[9]);
B0 ^= L;
B1 ^= R;
@@ -171,14 +171,14 @@ void KASUMI::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
R = FI(R ^ K[12], K[13]) ^ L;
L = FI(L ^ K[14], K[15]) ^ R;
- L ^= (rotate_left(R, 1) & K[8]);
- R ^= (rotate_left(L, 1) | K[9]);
+ L ^= (rotl<1>(R) & K[8]);
+ R ^= (rotl<1>(L) | K[9]);
R = B0 ^= R;
L = B1 ^= L;
- L ^= (rotate_left(R, 1) & K[0]);
- R ^= (rotate_left(L, 1) | K[1]);
+ L ^= (rotl<1>(R) & K[0]);
+ R ^= (rotl<1>(L) | K[1]);
R = FI(R ^ K[2], K[3]) ^ L;
L = FI(L ^ K[4], K[5]) ^ R;
@@ -214,13 +214,13 @@ void KASUMI::key_schedule(const uint8_t key[], size_t)
for(size_t i = 0; i != 8; ++i)
{
- m_EK[8*i ] = rotate_left(K[(i+0) % 8 ], 2);
- m_EK[8*i+1] = rotate_left(K[(i+2) % 8 + 8], 1);
- m_EK[8*i+2] = rotate_left(K[(i+1) % 8 ], 5);
+ m_EK[8*i ] = rotl<2>(K[(i+0) % 8]);
+ m_EK[8*i+1] = rotl<1>(K[(i+2) % 8 + 8]);
+ m_EK[8*i+2] = rotl<5>(K[(i+1) % 8]);
m_EK[8*i+3] = K[(i+4) % 8 + 8];
- m_EK[8*i+4] = rotate_left(K[(i+5) % 8 ], 8);
+ m_EK[8*i+4] = rotl<8>(K[(i+5) % 8]);
m_EK[8*i+5] = K[(i+3) % 8 + 8];
- m_EK[8*i+6] = rotate_left(K[(i+6) % 8 ], 13);
+ m_EK[8*i+6] = rotl<13>(K[(i+6) % 8]);
m_EK[8*i+7] = K[(i+7) % 8 + 8];
}
}
diff --git a/src/lib/block/noekeon/noekeon.cpp b/src/lib/block/noekeon/noekeon.cpp
index c82badd4c..a7f60a0fd 100644
--- a/src/lib/block/noekeon/noekeon.cpp
+++ b/src/lib/block/noekeon/noekeon.cpp
@@ -21,7 +21,7 @@ inline void theta(uint32_t& A0, uint32_t& A1,
const uint32_t EK[4])
{
uint32_t T = A0 ^ A2;
- T ^= rotate_left(T, 8) ^ rotate_right(T, 8);
+ T ^= rotl<8>(T) ^ rotr<8>(T);
A1 ^= T;
A3 ^= T;
@@ -31,7 +31,7 @@ inline void theta(uint32_t& A0, uint32_t& A1,
A3 ^= EK[3];
T = A1 ^ A3;
- T ^= rotate_left(T, 8) ^ rotate_right(T, 8);
+ T ^= rotl<8>(T) ^ rotr<8>(T);
A0 ^= T;
A2 ^= T;
}
@@ -43,12 +43,12 @@ inline void theta(uint32_t& A0, uint32_t& A1,
uint32_t& A2, uint32_t& A3)
{
uint32_t T = A0 ^ A2;
- T ^= rotate_left(T, 8) ^ rotate_right(T, 8);
+ T ^= rotl<8>(T) ^ rotr<8>(T);
A1 ^= T;
A3 ^= T;
T = A1 ^ A3;
- T ^= rotate_left(T, 8) ^ rotate_right(T, 8);
+ T ^= rotl<8>(T) ^ rotr<8>(T);
A0 ^= T;
A2 ^= T;
}
@@ -135,15 +135,15 @@ void Noekeon::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
A0 ^= RC[j];
theta(A0, A1, A2, A3, m_EK.data());
- A1 = rotate_left(A1, 1);
- A2 = rotate_left(A2, 5);
- A3 = rotate_left(A3, 2);
+ A1 = rotl<1>(A1);
+ A2 = rotl<5>(A2);
+ A3 = rotl<2>(A3);
gamma(A0, A1, A2, A3);
- A1 = rotate_right(A1, 1);
- A2 = rotate_right(A2, 5);
- A3 = rotate_right(A3, 2);
+ A1 = rotr<1>(A1);
+ A2 = rotr<5>(A2);
+ A3 = rotr<2>(A3);
}
A0 ^= RC[16];
@@ -186,15 +186,15 @@ void Noekeon::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
theta(A0, A1, A2, A3, m_DK.data());
A0 ^= RC[j];
- A1 = rotate_left(A1, 1);
- A2 = rotate_left(A2, 5);
- A3 = rotate_left(A3, 2);
+ A1 = rotl<1>(A1);
+ A2 = rotl<5>(A2);
+ A3 = rotl<2>(A3);
gamma(A0, A1, A2, A3);
- A1 = rotate_right(A1, 1);
- A2 = rotate_right(A2, 5);
- A3 = rotate_right(A3, 2);
+ A1 = rotr<1>(A1);
+ A2 = rotr<5>(A2);
+ A3 = rotr<2>(A3);
}
theta(A0, A1, A2, A3, m_DK.data());
@@ -222,15 +222,15 @@ void Noekeon::key_schedule(const uint8_t key[], size_t)
A0 ^= RC[i];
theta(A0, A1, A2, A3);
- A1 = rotate_left(A1, 1);
- A2 = rotate_left(A2, 5);
- A3 = rotate_left(A3, 2);
+ A1 = rotl<1>(A1);
+ A2 = rotl<5>(A2);
+ A3 = rotl<2>(A3);
gamma(A0, A1, A2, A3);
- A1 = rotate_right(A1, 1);
- A2 = rotate_right(A2, 5);
- A3 = rotate_right(A3, 2);
+ A1 = rotr<1>(A1);
+ A2 = rotr<5>(A2);
+ A3 = rotr<2>(A3);
}
A0 ^= RC[16];
diff --git a/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp b/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp
index a77ba7b8c..f9a696d29 100644
--- a/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp
+++ b/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp
@@ -16,12 +16,7 @@ namespace Botan {
#define NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3) \
do { \
SIMD_32 T = A0 ^ A2; \
- SIMD_32 T_l8 = T; \
- SIMD_32 T_r8 = T; \
- T_l8.rotate_left(8); \
- T_r8.rotate_right(8); \
- T ^= T_l8; \
- T ^= T_r8; \
+ T ^= T.rotl<8>() ^ T.rotr<8>(); \
A1 ^= T; \
A3 ^= T; \
\
@@ -31,12 +26,7 @@ namespace Botan {
A3 ^= K3; \
\
T = A1 ^ A3; \
- T_l8 = T; \
- T_r8 = T; \
- T_l8.rotate_left(8); \
- T_r8.rotate_right(8); \
- T ^= T_l8; \
- T ^= T_r8; \
+ T ^= T.rotl<8>() ^ T.rotr<8>(); \
A0 ^= T; \
A2 ^= T; \
} while(0)
@@ -83,15 +73,15 @@ void Noekeon::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const
NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3);
- A1.rotate_left(1);
- A2.rotate_left(5);
- A3.rotate_left(2);
+ A1 = A1.rotl<1>();
+ A2 = A2.rotl<5>();
+ A3 = A3.rotl<2>();
NOK_SIMD_GAMMA(A0, A1, A2, A3);
- A1.rotate_right(1);
- A2.rotate_right(5);
- A3.rotate_right(2);
+ A1 = A1.rotr<1>();
+ A2 = A2.rotr<5>();
+ A3 = A3.rotr<2>();
}
A0 ^= SIMD_32::splat(RC[16]);
@@ -128,15 +118,15 @@ void Noekeon::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const
A0 ^= SIMD_32::splat(RC[16-i]);
- A1.rotate_left(1);
- A2.rotate_left(5);
- A3.rotate_left(2);
+ A1 = A1.rotl<1>();
+ A2 = A2.rotl<5>();
+ A3 = A3.rotl<2>();
NOK_SIMD_GAMMA(A0, A1, A2, A3);
- A1.rotate_right(1);
- A2.rotate_right(5);
- A3.rotate_right(2);
+ A1 = A1.rotr<1>();
+ A2 = A2.rotr<5>();
+ A3 = A3.rotr<2>();
}
NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3);
diff --git a/src/lib/block/serpent/serpent.cpp b/src/lib/block/serpent/serpent.cpp
index 93af81231..6e1d79766 100644
--- a/src/lib/block/serpent/serpent.cpp
+++ b/src/lib/block/serpent/serpent.cpp
@@ -22,11 +22,11 @@ namespace {
*/
inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
{
- B0 = rotate_left(B0, 13); B2 = rotate_left(B2, 3);
- B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3);
- B1 = rotate_left(B1, 1); B3 = rotate_left(B3, 7);
- B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7);
- B0 = rotate_left(B0, 5); B2 = rotate_left(B2, 22);
+ B0 = rotl<13>(B0); B2 = rotl<3>(B2);
+ B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3);
+ B1 = rotl<1>(B1); B3 = rotl<7>(B3);
+ B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7);
+ B0 = rotl<5>(B0); B2 = rotl<22>(B2);
}
/*
@@ -34,11 +34,11 @@ inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
*/
inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
{
- B2 = rotate_right(B2, 22); B0 = rotate_right(B0, 5);
- B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3;
- B3 = rotate_right(B3, 7); B1 = rotate_right(B1, 1);
- B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2;
- B2 = rotate_right(B2, 3); B0 = rotate_right(B0, 13);
+ B2 = rotr<22>(B2); B0 = rotr<5>(B0);
+ B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3;
+ B3 = rotr<7>(B3); B1 = rotr<1>(B1);
+ B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2;
+ B2 = rotr<3>(B2); B0 = rotr<13>(B0);
}
}
@@ -192,7 +192,7 @@ void Serpent::key_schedule(const uint8_t key[], size_t length)
for(size_t i = 8; i != 140; ++i)
{
uint32_t wi = W[i-8] ^ W[i-5] ^ W[i-3] ^ W[i-1] ^ PHI ^ uint32_t(i-8);
- W[i] = rotate_left(wi, 11);
+ W[i] = rotl<11>(wi);
}
SBoxE1(W[ 20],W[ 21],W[ 22],W[ 23]);
diff --git a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
index 94b3cf9ad..b184b0d4a 100644
--- a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
+++ b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
@@ -24,30 +24,30 @@ namespace Botan {
*/
#define transform(B0, B1, B2, B3) \
do { \
- B0.rotate_left(13); \
- B2.rotate_left(3); \
+ B0 = B0.rotl<13>(); \
+ B2 = B2.rotl<3>(); \
B1 ^= B0 ^ B2; \
B3 ^= B2 ^ (B0 << 3); \
- B1.rotate_left(1); \
- B3.rotate_left(7); \
+ B1 = B1.rotl<1>(); \
+ B3 = B3.rotl<7>(); \
B0 ^= B1 ^ B3; \
B2 ^= B3 ^ (B1 << 7); \
- B0.rotate_left(5); \
- B2.rotate_left(22); \
+ B0 = B0.rotl<5>(); \
+ B2 = B2.rotl<22>(); \
} while(0);
#define i_transform(B0, B1, B2, B3) \
do { \
- B2.rotate_right(22); \
- B0.rotate_right(5); \
+ B2 = B2.rotr<22>(); \
+ B0 = B0.rotr<5>(); \
B2 ^= B3 ^ (B1 << 7); \
B0 ^= B1 ^ B3; \
- B3.rotate_right(7); \
- B1.rotate_right(1); \
+ B3 = B3.rotr<7>(); \
+ B1 = B1.rotr<1>(); \
B3 ^= B2 ^ (B0 << 3); \
B1 ^= B0 ^ B2; \
- B2.rotate_right(3); \
- B0.rotate_right(13); \
+ B2 = B2.rotr<3>(); \
+ B0 = B0.rotr<13>(); \
} while(0);
/*
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp
index 30ad711db..12c87c426 100644
--- a/src/lib/block/shacal2/shacal2.cpp
+++ b/src/lib/block/shacal2/shacal2.cpp
@@ -17,8 +17,8 @@ inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
uint32_t E, uint32_t F, uint32_t G, uint32_t& H,
uint32_t RK)
{
- const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22);
- const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25);
+ const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);
+ const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);
H += E_rho + ((E & F) ^ (~E & G)) + RK;
D += H;
@@ -29,8 +29,8 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
uint32_t E, uint32_t F, uint32_t G, uint32_t& H,
uint32_t RK)
{
- const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22);
- const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25);
+ const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);
+ const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);
H -= A_rho + ((A & B) | ((A | B) & C));
D -= H;
@@ -175,8 +175,8 @@ void SHACAL2::key_schedule(const uint8_t key[], size_t len)
for(size_t i = 16; i != 64; ++i)
{
- const uint32_t sigma0_15 = rotate_right(m_RK[i-15], 7) ^ rotate_right(m_RK[i-15], 18) ^ (m_RK[i-15] >> 3);
- const uint32_t sigma1_2 = rotate_right(m_RK[i-2], 17) ^ rotate_right(m_RK[i-2], 19) ^ (m_RK[i-2] >> 10);
+ const uint32_t sigma0_15 = rotr<7>(m_RK[i-15]) ^ rotr<18>(m_RK[i-15]) ^ (m_RK[i-15] >> 3);
+ const uint32_t sigma1_2 = rotr<17>(m_RK[i-2]) ^ rotr<19>(m_RK[i-2]) ^ (m_RK[i-2] >> 10);
m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2;
}
diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
index a4324c8fb..bdcac1482 100644
--- a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
+++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
@@ -17,9 +17,9 @@ void SHACAL2_Fwd(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32&
const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H,
uint32_t RK)
{
- H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+ H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
D += H;
- H += A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+ H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
}
inline
@@ -27,9 +27,9 @@ void SHACAL2_Rev(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32&
const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H,
uint32_t RK)
{
- H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+ H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
D -= H;
- H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+ H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
}
}
diff --git a/src/lib/block/sm4/sm4.cpp b/src/lib/block/sm4/sm4.cpp
index 979491566..42c865faf 100644
--- a/src/lib/block/sm4/sm4.cpp
+++ b/src/lib/block/sm4/sm4.cpp
@@ -46,7 +46,7 @@ inline uint32_t T(uint32_t b)
const uint32_t t = make_uint32(SBOX[b0], SBOX[b1], SBOX[b2], SBOX[b3]);
// L linear transform
- return t ^ rotate_left(t, 2) ^ rotate_left(t, 10) ^ rotate_left(t, 18) ^ rotate_left(t, 24);
+ return t ^ rotl<2>(t) ^ rotl<10>(t) ^ rotl<18>(t) ^ rotl<24>(t);
}
// Variant of T for key schedule
@@ -59,7 +59,7 @@ inline uint32_t Tp(uint32_t b)
const uint32_t t = make_uint32(SBOX[b0], SBOX[b1], SBOX[b2], SBOX[b3]);
// L' linear transform
- return t ^ rotate_left(t, 13) ^ rotate_left(t, 23);
+ return t ^ rotl<13>(t) ^ rotl<23>(t);
}
}
diff --git a/src/lib/block/threefish/threefish.cpp b/src/lib/block/threefish/threefish.cpp
index 99ce135d5..937a673fd 100644
--- a/src/lib/block/threefish/threefish.cpp
+++ b/src/lib/block/threefish/threefish.cpp
@@ -17,10 +17,10 @@ namespace Botan {
X1 += X5; \
X2 += X6; \
X3 += X7; \
- X4 = rotate_left(X4, ROT1); \
- X5 = rotate_left(X5, ROT2); \
- X6 = rotate_left(X6, ROT3); \
- X7 = rotate_left(X7, ROT4); \
+ X4 = rotl<ROT1>(X4); \
+ X5 = rotl<ROT2>(X5); \
+ X6 = rotl<ROT3>(X6); \
+ X7 = rotl<ROT4>(X7); \
X4 ^= X0; \
X5 ^= X1; \
X6 ^= X2; \
@@ -177,10 +177,10 @@ void Threefish_512::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
X5 ^= X1; \
X6 ^= X2; \
X7 ^= X3; \
- X4 = rotate_right(X4, ROT1); \
- X5 = rotate_right(X5, ROT2); \
- X6 = rotate_right(X6, ROT3); \
- X7 = rotate_right(X7, ROT4); \
+ X4 = rotr<ROT1>(X4); \
+ X5 = rotr<ROT2>(X5); \
+ X6 = rotr<ROT3>(X6); \
+ X7 = rotr<ROT4>(X7); \
X0 -= X4; \
X1 -= X5; \
X2 -= X6; \
diff --git a/src/lib/block/twofish/twofish.cpp b/src/lib/block/twofish/twofish.cpp
index 51ef01ea9..3a09af8da 100644
--- a/src/lib/block/twofish/twofish.cpp
+++ b/src/lib/block/twofish/twofish.cpp
@@ -41,8 +41,8 @@ void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
Y += X + m_RK[2*j + 9];
X += m_RK[2*j + 8];
- C = rotate_right(C ^ X, 1);
- D = rotate_left(D, 1) ^ Y;
+ C = rotr<1>(C ^ X);
+ D = rotl<1>(D) ^ Y;
X = m_SB[ get_byte(3, C)] ^ m_SB[256+get_byte(2, C)] ^
m_SB[512+get_byte(1, C)] ^ m_SB[768+get_byte(0, C)];
@@ -52,8 +52,8 @@ void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
Y += X + m_RK[2*j + 11];
X += m_RK[2*j + 10];
- A = rotate_right(A ^ X, 1);
- B = rotate_left(B, 1) ^ Y;
+ A = rotr<1>(A ^ X);
+ B = rotl<1>(B) ^ Y;
}
C ^= m_RK[4];
@@ -92,8 +92,8 @@ void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
Y += X + m_RK[39 - 2*j];
X += m_RK[38 - 2*j];
- C = rotate_left(C, 1) ^ X;
- D = rotate_right(D ^ Y, 1);
+ C = rotl<1>(C) ^ X;
+ D = rotr<1>(D ^ Y);
X = m_SB[ get_byte(3, C)] ^ m_SB[256+get_byte(2, C)] ^
m_SB[512+get_byte(1, C)] ^ m_SB[768+get_byte(0, C)];
@@ -103,8 +103,8 @@ void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
Y += X + m_RK[37 - 2*j];
X += m_RK[36 - 2*j];
- A = rotate_left(A, 1) ^ X;
- B = rotate_right(B ^ Y, 1);
+ A = rotl<1>(A) ^ X;
+ B = rotr<1>(B ^ Y);
}
C ^= m_RK[0];
@@ -167,11 +167,11 @@ void Twofish::key_schedule(const uint8_t key[], size_t length)
MDS1[Q0[Q1[i+1]^key[13]]^key[ 5]] ^
MDS2[Q1[Q0[i+1]^key[14]]^key[ 6]] ^
MDS3[Q1[Q1[i+1]^key[15]]^key[ 7]];
- Y = rotate_left(Y, 8);
+ Y = rotl<8>(Y);
X += Y; Y += X;
m_RK[i] = X;
- m_RK[i+1] = rotate_left(Y, 9);
+ m_RK[i+1] = rotl<9>(Y);
}
}
else if(length == 24)
@@ -194,11 +194,11 @@ void Twofish::key_schedule(const uint8_t key[], size_t length)
MDS1[Q0[Q1[Q1[i+1]^key[21]]^key[13]]^key[ 5]] ^
MDS2[Q1[Q0[Q0[i+1]^key[22]]^key[14]]^key[ 6]] ^
MDS3[Q1[Q1[Q0[i+1]^key[23]]^key[15]]^key[ 7]];
- Y = rotate_left(Y, 8);
+ Y = rotl<8>(Y);
X += Y; Y += X;
m_RK[i] = X;
- m_RK[i+1] = rotate_left(Y, 9);
+ m_RK[i+1] = rotl<9>(Y);
}
}
else if(length == 32)
@@ -221,11 +221,11 @@ void Twofish::key_schedule(const uint8_t key[], size_t length)
MDS1[Q0[Q1[Q1[Q0[i+1]^key[29]]^key[21]]^key[13]]^key[ 5]] ^
MDS2[Q1[Q0[Q0[Q0[i+1]^key[30]]^key[22]]^key[14]]^key[ 6]] ^
MDS3[Q1[Q1[Q0[Q1[i+1]^key[31]]^key[23]]^key[15]]^key[ 7]];
- Y = rotate_left(Y, 8);
+ Y = rotl<8>(Y);
X += Y; Y += X;
m_RK[i] = X;
- m_RK[i+1] = rotate_left(Y, 9);
+ m_RK[i+1] = rotl<9>(Y);
}
}
}
diff --git a/src/lib/hash/blake2/blake2b.cpp b/src/lib/hash/blake2/blake2b.cpp
index 85171b16b..79a30de3d 100644
--- a/src/lib/hash/blake2/blake2b.cpp
+++ b/src/lib/hash/blake2/blake2b.cpp
@@ -93,13 +93,13 @@ void Blake2b::compress(bool lastblock)
#define G(r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
- d = rotate_right<uint64_t>(d ^ a, 32); \
+ d = rotr<32>(d ^ a); \
c = c + d; \
- b = rotate_right<uint64_t>(b ^ c, 24); \
+ b = rotr<24>(b ^ c); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
- d = rotate_right<uint64_t>(d ^ a, 16); \
+ d = rotr<16>(d ^ a); \
c = c + d; \
- b = rotate_right<uint64_t>(b ^ c, 63); \
+ b = rotr<63>(b ^ c); \
} while(0)
#define ROUND(r) \
diff --git a/src/lib/hash/md4/md4.cpp b/src/lib/hash/md4/md4.cpp
index 79f3a2d13..1c048a6b3 100644
--- a/src/lib/hash/md4/md4.cpp
+++ b/src/lib/hash/md4/md4.cpp
@@ -16,31 +16,55 @@ std::unique_ptr<HashFunction> MD4::copy_state() const
namespace {
-/*
-* MD4 FF Function
-*/
-inline void FF(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint8_t S)
+inline void FF4(uint32_t& A, uint32_t& B, uint32_t& C, uint32_t& D,
+ uint32_t M0, uint32_t M1, uint32_t M2, uint32_t M3)
+
{
- A += (D ^ (B & (C ^ D))) + M;
- A = rotate_left(A, S);
+ A += (D ^ (B & (C ^ D))) + M0;
+ A = rotl<3>(A);
+
+ D += (C ^ (A & (B ^ C))) + M1;
+ D = rotl<7>(D);
+
+ C += (B ^ (D & (A ^ B))) + M2;
+ C = rotl<11>(C);
+
+ B += (A ^ (C & (D ^ A))) + M3;
+ B = rotl<19>(B);
}
-/*
-* MD4 GG Function
-*/
-inline void GG(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint8_t S)
+inline void GG4(uint32_t& A, uint32_t& B, uint32_t& C, uint32_t& D,
+ uint32_t M0, uint32_t M1, uint32_t M2, uint32_t M3)
+
{
- A += ((B & C) | (D & (B | C))) + M + 0x5A827999;
- A = rotate_left(A, S);
+ A += ((B & C) | (D & (B | C))) + M0 + 0x5A827999;
+ A = rotl<3>(A);
+
+ D += ((A & B) | (C & (A | B))) + M1 + 0x5A827999;
+ D = rotl<5>(D);
+
+ C += ((D & A) | (B & (D | A))) + M2 + 0x5A827999;
+ C = rotl<9>(C);
+
+ B += ((C & D) | (A & (C | D))) + M3 + 0x5A827999;
+ B = rotl<13>(B);
}
-/*
-* MD4 HH Function
-*/
-inline void HH(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint8_t S)
+inline void HH4(uint32_t& A, uint32_t& B, uint32_t& C, uint32_t& D,
+ uint32_t M0, uint32_t M1, uint32_t M2, uint32_t M3)
+
{
- A += (B ^ C ^ D) + M + 0x6ED9EBA1;
- A = rotate_left(A, S);
+ A += (B ^ C ^ D) + M0 + 0x6ED9EBA1;
+ A = rotl<3>(A);
+
+ D += (A ^ B ^ C) + M1 + 0x6ED9EBA1;
+ D = rotl<9>(D);
+
+ C += (A ^ B ^ D) + M2 + 0x6ED9EBA1;
+ C = rotl<11>(C);
+
+ B += (A ^ C ^ D) + M3 + 0x6ED9EBA1;
+ B = rotl<15>(B);
}
}
@@ -54,34 +78,37 @@ void MD4::compress_n(const uint8_t input[], size_t blocks)
for(size_t i = 0; i != blocks; ++i)
{
- load_le(m_M.data(), input, m_M.size());
-
- FF(A,B,C,D,m_M[ 0], 3); FF(D,A,B,C,m_M[ 1], 7);
- FF(C,D,A,B,m_M[ 2],11); FF(B,C,D,A,m_M[ 3],19);
- FF(A,B,C,D,m_M[ 4], 3); FF(D,A,B,C,m_M[ 5], 7);
- FF(C,D,A,B,m_M[ 6],11); FF(B,C,D,A,m_M[ 7],19);
- FF(A,B,C,D,m_M[ 8], 3); FF(D,A,B,C,m_M[ 9], 7);
- FF(C,D,A,B,m_M[10],11); FF(B,C,D,A,m_M[11],19);
- FF(A,B,C,D,m_M[12], 3); FF(D,A,B,C,m_M[13], 7);
- FF(C,D,A,B,m_M[14],11); FF(B,C,D,A,m_M[15],19);
-
- GG(A,B,C,D,m_M[ 0], 3); GG(D,A,B,C,m_M[ 4], 5);
- GG(C,D,A,B,m_M[ 8], 9); GG(B,C,D,A,m_M[12],13);
- GG(A,B,C,D,m_M[ 1], 3); GG(D,A,B,C,m_M[ 5], 5);
- GG(C,D,A,B,m_M[ 9], 9); GG(B,C,D,A,m_M[13],13);
- GG(A,B,C,D,m_M[ 2], 3); GG(D,A,B,C,m_M[ 6], 5);
- GG(C,D,A,B,m_M[10], 9); GG(B,C,D,A,m_M[14],13);
- GG(A,B,C,D,m_M[ 3], 3); GG(D,A,B,C,m_M[ 7], 5);
- GG(C,D,A,B,m_M[11], 9); GG(B,C,D,A,m_M[15],13);
-
- HH(A,B,C,D,m_M[ 0], 3); HH(D,A,B,C,m_M[ 8], 9);
- HH(C,D,A,B,m_M[ 4],11); HH(B,C,D,A,m_M[12],15);
- HH(A,B,C,D,m_M[ 2], 3); HH(D,A,B,C,m_M[10], 9);
- HH(C,D,A,B,m_M[ 6],11); HH(B,C,D,A,m_M[14],15);
- HH(A,B,C,D,m_M[ 1], 3); HH(D,A,B,C,m_M[ 9], 9);
- HH(C,D,A,B,m_M[ 5],11); HH(B,C,D,A,m_M[13],15);
- HH(A,B,C,D,m_M[ 3], 3); HH(D,A,B,C,m_M[11], 9);
- HH(C,D,A,B,m_M[ 7],11); HH(B,C,D,A,m_M[15],15);
+ uint32_t M00 = load_le<uint32_t>(input, 0);
+ uint32_t M01 = load_le<uint32_t>(input, 1);
+ uint32_t M02 = load_le<uint32_t>(input, 2);
+ uint32_t M03 = load_le<uint32_t>(input, 3);
+ uint32_t M04 = load_le<uint32_t>(input, 4);
+ uint32_t M05 = load_le<uint32_t>(input, 5);
+ uint32_t M06 = load_le<uint32_t>(input, 6);
+ uint32_t M07 = load_le<uint32_t>(input, 7);
+ uint32_t M08 = load_le<uint32_t>(input, 8);
+ uint32_t M09 = load_le<uint32_t>(input, 9);
+ uint32_t M10 = load_le<uint32_t>(input, 10);
+ uint32_t M11 = load_le<uint32_t>(input, 11);
+ uint32_t M12 = load_le<uint32_t>(input, 12);
+ uint32_t M13 = load_le<uint32_t>(input, 13);
+ uint32_t M14 = load_le<uint32_t>(input, 14);
+ uint32_t M15 = load_le<uint32_t>(input, 15);
+
+ FF4(A, B, C, D, M00, M01, M02, M03);
+ FF4(A, B, C, D, M04, M05, M06, M07);
+ FF4(A, B, C, D, M08, M09, M10, M11);
+ FF4(A, B, C, D, M12, M13, M14, M15);
+
+ GG4(A, B, C, D, M00, M04, M08, M12);
+ GG4(A, B, C, D, M01, M05, M09, M13);
+ GG4(A, B, C, D, M02, M06, M10, M14);
+ GG4(A, B, C, D, M03, M07, M11, M15);
+
+ HH4(A, B, C, D, M00, M08, M04, M12);
+ HH4(A, B, C, D, M02, M10, M06, M14);
+ HH4(A, B, C, D, M01, M09, M05, M13);
+ HH4(A, B, C, D, M03, M11, M07, M15);
A = (m_digest[0] += A);
B = (m_digest[1] += B);
@@ -106,7 +133,6 @@ void MD4::copy_out(uint8_t output[])
void MD4::clear()
{
MDx_HashFunction::clear();
- zeroise(m_M);
m_digest[0] = 0x67452301;
m_digest[1] = 0xEFCDAB89;
m_digest[2] = 0x98BADCFE;
diff --git a/src/lib/hash/md4/md4.h b/src/lib/hash/md4/md4.h
index c51cb1682..1dd857bce 100644
--- a/src/lib/hash/md4/md4.h
+++ b/src/lib/hash/md4/md4.h
@@ -25,7 +25,7 @@ class BOTAN_PUBLIC_API(2,0) MD4 final : public MDx_HashFunction
void clear() override;
- MD4() : MDx_HashFunction(64, false, true), m_M(16), m_digest(4)
+ MD4() : MDx_HashFunction(64, false, true), m_digest(4)
{ clear(); }
private:
@@ -33,11 +33,6 @@ class BOTAN_PUBLIC_API(2,0) MD4 final : public MDx_HashFunction
void copy_out(uint8_t[]) override;
/**
- * The message buffer
- */
- secure_vector<uint32_t> m_M;
-
- /**
* The digest value
*/
secure_vector<uint32_t> m_digest;
diff --git a/src/lib/hash/md5/md5.cpp b/src/lib/hash/md5/md5.cpp
index 174443a67..de75ab2d1 100644
--- a/src/lib/hash/md5/md5.cpp
+++ b/src/lib/hash/md5/md5.cpp
@@ -19,41 +19,41 @@ namespace {
/*
* MD5 FF Function
*/
-inline void FF(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg,
- uint8_t S, uint32_t magic)
+template<size_t S>
+inline void FF(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M)
{
- A += (D ^ (B & (C ^ D))) + msg + magic;
- A = rotate_left(A, S) + B;
+ A += (D ^ (B & (C ^ D))) + M;
+ A = rotl<S>(A) + B;
}
/*
* MD5 GG Function
*/
-inline void GG(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg,
- uint8_t S, uint32_t magic)
+template<size_t S>
+inline void GG(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M)
{
- A += (C ^ (D & (B ^ C))) + msg + magic;
- A = rotate_left(A, S) + B;
+ A += (C ^ (D & (B ^ C))) + M;
+ A = rotl<S>(A) + B;
}
/*
* MD5 HH Function
*/
-inline void HH(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg,
- uint8_t S, uint32_t magic)
+template<size_t S>
+inline void HH(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M)
{
- A += (B ^ C ^ D) + msg + magic;
- A = rotate_left(A, S) + B;
+ A += (B ^ C ^ D) + M;
+ A = rotl<S>(A) + B;
}
/*
* MD5 II Function
*/
-inline void II(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg,
- uint8_t S, uint32_t magic)
+template<size_t S>
+inline void II(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M)
{
- A += (C ^ (B | ~D)) + msg + magic;
- A = rotate_left(A, S) + B;
+ A += (C ^ (B | ~D)) + M;
+ A = rotl<S>(A) + B;
}
}
@@ -69,41 +69,41 @@ void MD5::compress_n(const uint8_t input[], size_t blocks)
{
load_le(m_M.data(), input, m_M.size());
- FF(A,B,C,D,m_M[ 0], 7,0xD76AA478); FF(D,A,B,C,m_M[ 1],12,0xE8C7B756);
- FF(C,D,A,B,m_M[ 2],17,0x242070DB); FF(B,C,D,A,m_M[ 3],22,0xC1BDCEEE);
- FF(A,B,C,D,m_M[ 4], 7,0xF57C0FAF); FF(D,A,B,C,m_M[ 5],12,0x4787C62A);
- FF(C,D,A,B,m_M[ 6],17,0xA8304613); FF(B,C,D,A,m_M[ 7],22,0xFD469501);
- FF(A,B,C,D,m_M[ 8], 7,0x698098D8); FF(D,A,B,C,m_M[ 9],12,0x8B44F7AF);
- FF(C,D,A,B,m_M[10],17,0xFFFF5BB1); FF(B,C,D,A,m_M[11],22,0x895CD7BE);
- FF(A,B,C,D,m_M[12], 7,0x6B901122); FF(D,A,B,C,m_M[13],12,0xFD987193);
- FF(C,D,A,B,m_M[14],17,0xA679438E); FF(B,C,D,A,m_M[15],22,0x49B40821);
-
- GG(A,B,C,D,m_M[ 1], 5,0xF61E2562); GG(D,A,B,C,m_M[ 6], 9,0xC040B340);
- GG(C,D,A,B,m_M[11],14,0x265E5A51); GG(B,C,D,A,m_M[ 0],20,0xE9B6C7AA);
- GG(A,B,C,D,m_M[ 5], 5,0xD62F105D); GG(D,A,B,C,m_M[10], 9,0x02441453);
- GG(C,D,A,B,m_M[15],14,0xD8A1E681); GG(B,C,D,A,m_M[ 4],20,0xE7D3FBC8);
- GG(A,B,C,D,m_M[ 9], 5,0x21E1CDE6); GG(D,A,B,C,m_M[14], 9,0xC33707D6);
- GG(C,D,A,B,m_M[ 3],14,0xF4D50D87); GG(B,C,D,A,m_M[ 8],20,0x455A14ED);
- GG(A,B,C,D,m_M[13], 5,0xA9E3E905); GG(D,A,B,C,m_M[ 2], 9,0xFCEFA3F8);
- GG(C,D,A,B,m_M[ 7],14,0x676F02D9); GG(B,C,D,A,m_M[12],20,0x8D2A4C8A);
-
- HH(A,B,C,D,m_M[ 5], 4,0xFFFA3942); HH(D,A,B,C,m_M[ 8],11,0x8771F681);
- HH(C,D,A,B,m_M[11],16,0x6D9D6122); HH(B,C,D,A,m_M[14],23,0xFDE5380C);
- HH(A,B,C,D,m_M[ 1], 4,0xA4BEEA44); HH(D,A,B,C,m_M[ 4],11,0x4BDECFA9);
- HH(C,D,A,B,m_M[ 7],16,0xF6BB4B60); HH(B,C,D,A,m_M[10],23,0xBEBFBC70);
- HH(A,B,C,D,m_M[13], 4,0x289B7EC6); HH(D,A,B,C,m_M[ 0],11,0xEAA127FA);
- HH(C,D,A,B,m_M[ 3],16,0xD4EF3085); HH(B,C,D,A,m_M[ 6],23,0x04881D05);
- HH(A,B,C,D,m_M[ 9], 4,0xD9D4D039); HH(D,A,B,C,m_M[12],11,0xE6DB99E5);
- HH(C,D,A,B,m_M[15],16,0x1FA27CF8); HH(B,C,D,A,m_M[ 2],23,0xC4AC5665);
-
- II(A,B,C,D,m_M[ 0], 6,0xF4292244); II(D,A,B,C,m_M[ 7],10,0x432AFF97);
- II(C,D,A,B,m_M[14],15,0xAB9423A7); II(B,C,D,A,m_M[ 5],21,0xFC93A039);
- II(A,B,C,D,m_M[12], 6,0x655B59C3); II(D,A,B,C,m_M[ 3],10,0x8F0CCC92);
- II(C,D,A,B,m_M[10],15,0xFFEFF47D); II(B,C,D,A,m_M[ 1],21,0x85845DD1);
- II(A,B,C,D,m_M[ 8], 6,0x6FA87E4F); II(D,A,B,C,m_M[15],10,0xFE2CE6E0);
- II(C,D,A,B,m_M[ 6],15,0xA3014314); II(B,C,D,A,m_M[13],21,0x4E0811A1);
- II(A,B,C,D,m_M[ 4], 6,0xF7537E82); II(D,A,B,C,m_M[11],10,0xBD3AF235);
- II(C,D,A,B,m_M[ 2],15,0x2AD7D2BB); II(B,C,D,A,m_M[ 9],21,0xEB86D391);
+ FF< 7>(A,B,C,D,m_M[ 0]+0xD76AA478); FF<12>(D,A,B,C,m_M[ 1]+0xE8C7B756);
+ FF<17>(C,D,A,B,m_M[ 2]+0x242070DB); FF<22>(B,C,D,A,m_M[ 3]+0xC1BDCEEE);
+ FF< 7>(A,B,C,D,m_M[ 4]+0xF57C0FAF); FF<12>(D,A,B,C,m_M[ 5]+0x4787C62A);
+ FF<17>(C,D,A,B,m_M[ 6]+0xA8304613); FF<22>(B,C,D,A,m_M[ 7]+0xFD469501);
+ FF< 7>(A,B,C,D,m_M[ 8]+0x698098D8); FF<12>(D,A,B,C,m_M[ 9]+0x8B44F7AF);
+ FF<17>(C,D,A,B,m_M[10]+0xFFFF5BB1); FF<22>(B,C,D,A,m_M[11]+0x895CD7BE);
+ FF< 7>(A,B,C,D,m_M[12]+0x6B901122); FF<12>(D,A,B,C,m_M[13]+0xFD987193);
+ FF<17>(C,D,A,B,m_M[14]+0xA679438E); FF<22>(B,C,D,A,m_M[15]+0x49B40821);
+
+ GG< 5>(A,B,C,D,m_M[ 1]+0xF61E2562); GG< 9>(D,A,B,C,m_M[ 6]+0xC040B340);
+ GG<14>(C,D,A,B,m_M[11]+0x265E5A51); GG<20>(B,C,D,A,m_M[ 0]+0xE9B6C7AA);
+ GG< 5>(A,B,C,D,m_M[ 5]+0xD62F105D); GG< 9>(D,A,B,C,m_M[10]+0x02441453);
+ GG<14>(C,D,A,B,m_M[15]+0xD8A1E681); GG<20>(B,C,D,A,m_M[ 4]+0xE7D3FBC8);
+ GG< 5>(A,B,C,D,m_M[ 9]+0x21E1CDE6); GG< 9>(D,A,B,C,m_M[14]+0xC33707D6);
+ GG<14>(C,D,A,B,m_M[ 3]+0xF4D50D87); GG<20>(B,C,D,A,m_M[ 8]+0x455A14ED);
+ GG< 5>(A,B,C,D,m_M[13]+0xA9E3E905); GG< 9>(D,A,B,C,m_M[ 2]+0xFCEFA3F8);
+ GG<14>(C,D,A,B,m_M[ 7]+0x676F02D9); GG<20>(B,C,D,A,m_M[12]+0x8D2A4C8A);
+
+ HH< 4>(A,B,C,D,m_M[ 5]+0xFFFA3942); HH<11>(D,A,B,C,m_M[ 8]+0x8771F681);
+ HH<16>(C,D,A,B,m_M[11]+0x6D9D6122); HH<23>(B,C,D,A,m_M[14]+0xFDE5380C);
+ HH< 4>(A,B,C,D,m_M[ 1]+0xA4BEEA44); HH<11>(D,A,B,C,m_M[ 4]+0x4BDECFA9);
+ HH<16>(C,D,A,B,m_M[ 7]+0xF6BB4B60); HH<23>(B,C,D,A,m_M[10]+0xBEBFBC70);
+ HH< 4>(A,B,C,D,m_M[13]+0x289B7EC6); HH<11>(D,A,B,C,m_M[ 0]+0xEAA127FA);
+ HH<16>(C,D,A,B,m_M[ 3]+0xD4EF3085); HH<23>(B,C,D,A,m_M[ 6]+0x04881D05);
+ HH< 4>(A,B,C,D,m_M[ 9]+0xD9D4D039); HH<11>(D,A,B,C,m_M[12]+0xE6DB99E5);
+ HH<16>(C,D,A,B,m_M[15]+0x1FA27CF8); HH<23>(B,C,D,A,m_M[ 2]+0xC4AC5665);
+
+ II< 6>(A,B,C,D,m_M[ 0]+0xF4292244); II<10>(D,A,B,C,m_M[ 7]+0x432AFF97);
+ II<15>(C,D,A,B,m_M[14]+0xAB9423A7); II<21>(B,C,D,A,m_M[ 5]+0xFC93A039);
+ II< 6>(A,B,C,D,m_M[12]+0x655B59C3); II<10>(D,A,B,C,m_M[ 3]+0x8F0CCC92);
+ II<15>(C,D,A,B,m_M[10]+0xFFEFF47D); II<21>(B,C,D,A,m_M[ 1]+0x85845DD1);
+ II< 6>(A,B,C,D,m_M[ 8]+0x6FA87E4F); II<10>(D,A,B,C,m_M[15]+0xFE2CE6E0);
+ II<15>(C,D,A,B,m_M[ 6]+0xA3014314); II<21>(B,C,D,A,m_M[13]+0x4E0811A1);
+ II< 6>(A,B,C,D,m_M[ 4]+0xF7537E82); II<10>(D,A,B,C,m_M[11]+0xBD3AF235);
+ II<15>(C,D,A,B,m_M[ 2]+0x2AD7D2BB); II<21>(B,C,D,A,m_M[ 9]+0xEB86D391);
A = (m_digest[0] += A);
B = (m_digest[1] += B);
diff --git a/src/lib/hash/rmd160/rmd160.cpp b/src/lib/hash/rmd160/rmd160.cpp
index 95f96c281..4c84ff422 100644
--- a/src/lib/hash/rmd160/rmd160.cpp
+++ b/src/lib/hash/rmd160/rmd160.cpp
@@ -19,56 +19,61 @@ namespace {
/*
* RIPEMD-160 F1 Function
*/
+template<size_t S>
inline void F1(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E,
- uint32_t msg, uint32_t shift)
+ uint32_t M)
{
- A += (B ^ C ^ D) + msg;
- A = rotate_left(A, shift) + E;
- C = rotate_left(C, 10);
+ A += (B ^ C ^ D) + M;
+ A = rotl<S>(A) + E;
+ C = rotl<10>(C);
}
/*
* RIPEMD-160 F2 Function
*/
+template<size_t S>
inline void F2(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E,
- uint32_t msg, uint32_t shift, uint32_t magic)
+ uint32_t M)
{
- A += (D ^ (B & (C ^ D))) + msg + magic;
- A = rotate_left(A, shift) + E;
- C = rotate_left(C, 10);
+ A += (D ^ (B & (C ^ D))) + M;
+ A = rotl<S>(A) + E;
+ C = rotl<10>(C);
}
/*
* RIPEMD-160 F3 Function
*/
+template<size_t S>
inline void F3(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E,
- uint32_t msg, uint32_t shift, uint32_t magic)
+ uint32_t M)
{
- A += (D ^ (B | ~C)) + msg + magic;
- A = rotate_left(A, shift) + E;
- C = rotate_left(C, 10);
+ A += (D ^ (B | ~C)) + M;
+ A = rotl<S>(A) + E;
+ C = rotl<10>(C);
}
/*
* RIPEMD-160 F4 Function
*/
+template<size_t S>
inline void F4(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E,
- uint32_t msg, uint32_t shift, uint32_t magic)
+ uint32_t M)
{
- A += (C ^ (D & (B ^ C))) + msg + magic;
- A = rotate_left(A, shift) + E;
- C = rotate_left(C, 10);
+ A += (C ^ (D & (B ^ C))) + M;
+ A = rotl<S>(A) + E;
+ C = rotl<10>(C);
}
/*
* RIPEMD-160 F5 Function
*/
+template<size_t S>
inline void F5(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E,
- uint32_t msg, uint32_t shift, uint32_t magic)
+ uint32_t M)
{
- A += (B ^ (C | ~D)) + msg + magic;
- A = rotate_left(A, shift) + E;
- C = rotate_left(C, 10);
+ A += (B ^ (C | ~D)) + M;
+ A = rotl<S>(A) + E;
+ C = rotl<10>(C);
}
}
@@ -79,102 +84,104 @@ inline void F5(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E,
void RIPEMD_160::compress_n(const uint8_t input[], size_t blocks)
{
const uint32_t MAGIC2 = 0x5A827999, MAGIC3 = 0x6ED9EBA1,
- MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0xA953FD4E,
- MAGIC6 = 0x50A28BE6, MAGIC7 = 0x5C4DD124,
- MAGIC8 = 0x6D703EF3, MAGIC9 = 0x7A6D76E9;
+ MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0xA953FD4E,
+ MAGIC6 = 0x50A28BE6, MAGIC7 = 0x5C4DD124,
+ MAGIC8 = 0x6D703EF3, MAGIC9 = 0x7A6D76E9;
for(size_t i = 0; i != blocks; ++i)
{
load_le(m_M.data(), input, m_M.size());
- uint32_t A1 = m_digest[0], A2 = A1, B1 = m_digest[1], B2 = B1,
- C1 = m_digest[2], C2 = C1, D1 = m_digest[3], D2 = D1,
- E1 = m_digest[4], E2 = E1;
-
- F1(A1,B1,C1,D1,E1,m_M[ 0],11 ); F5(A2,B2,C2,D2,E2,m_M[ 5], 8,MAGIC6);
- F1(E1,A1,B1,C1,D1,m_M[ 1],14 ); F5(E2,A2,B2,C2,D2,m_M[14], 9,MAGIC6);
- F1(D1,E1,A1,B1,C1,m_M[ 2],15 ); F5(D2,E2,A2,B2,C2,m_M[ 7], 9,MAGIC6);
- F1(C1,D1,E1,A1,B1,m_M[ 3],12 ); F5(C2,D2,E2,A2,B2,m_M[ 0],11,MAGIC6);
- F1(B1,C1,D1,E1,A1,m_M[ 4], 5 ); F5(B2,C2,D2,E2,A2,m_M[ 9],13,MAGIC6);
- F1(A1,B1,C1,D1,E1,m_M[ 5], 8 ); F5(A2,B2,C2,D2,E2,m_M[ 2],15,MAGIC6);
- F1(E1,A1,B1,C1,D1,m_M[ 6], 7 ); F5(E2,A2,B2,C2,D2,m_M[11],15,MAGIC6);
- F1(D1,E1,A1,B1,C1,m_M[ 7], 9 ); F5(D2,E2,A2,B2,C2,m_M[ 4], 5,MAGIC6);
- F1(C1,D1,E1,A1,B1,m_M[ 8],11 ); F5(C2,D2,E2,A2,B2,m_M[13], 7,MAGIC6);
- F1(B1,C1,D1,E1,A1,m_M[ 9],13 ); F5(B2,C2,D2,E2,A2,m_M[ 6], 7,MAGIC6);
- F1(A1,B1,C1,D1,E1,m_M[10],14 ); F5(A2,B2,C2,D2,E2,m_M[15], 8,MAGIC6);
- F1(E1,A1,B1,C1,D1,m_M[11],15 ); F5(E2,A2,B2,C2,D2,m_M[ 8],11,MAGIC6);
- F1(D1,E1,A1,B1,C1,m_M[12], 6 ); F5(D2,E2,A2,B2,C2,m_M[ 1],14,MAGIC6);
- F1(C1,D1,E1,A1,B1,m_M[13], 7 ); F5(C2,D2,E2,A2,B2,m_M[10],14,MAGIC6);
- F1(B1,C1,D1,E1,A1,m_M[14], 9 ); F5(B2,C2,D2,E2,A2,m_M[ 3],12,MAGIC6);
- F1(A1,B1,C1,D1,E1,m_M[15], 8 ); F5(A2,B2,C2,D2,E2,m_M[12], 6,MAGIC6);
-
- F2(E1,A1,B1,C1,D1,m_M[ 7], 7,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[ 6], 9,MAGIC7);
- F2(D1,E1,A1,B1,C1,m_M[ 4], 6,MAGIC2); F4(D2,E2,A2,B2,C2,m_M[11],13,MAGIC7);
- F2(C1,D1,E1,A1,B1,m_M[13], 8,MAGIC2); F4(C2,D2,E2,A2,B2,m_M[ 3],15,MAGIC7);
- F2(B1,C1,D1,E1,A1,m_M[ 1],13,MAGIC2); F4(B2,C2,D2,E2,A2,m_M[ 7], 7,MAGIC7);
- F2(A1,B1,C1,D1,E1,m_M[10],11,MAGIC2); F4(A2,B2,C2,D2,E2,m_M[ 0],12,MAGIC7);
- F2(E1,A1,B1,C1,D1,m_M[ 6], 9,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[13], 8,MAGIC7);
- F2(D1,E1,A1,B1,C1,m_M[15], 7,MAGIC2); F4(D2,E2,A2,B2,C2,m_M[ 5], 9,MAGIC7);
- F2(C1,D1,E1,A1,B1,m_M[ 3],15,MAGIC2); F4(C2,D2,E2,A2,B2,m_M[10],11,MAGIC7);
- F2(B1,C1,D1,E1,A1,m_M[12], 7,MAGIC2); F4(B2,C2,D2,E2,A2,m_M[14], 7,MAGIC7);
- F2(A1,B1,C1,D1,E1,m_M[ 0],12,MAGIC2); F4(A2,B2,C2,D2,E2,m_M[15], 7,MAGIC7);
- F2(E1,A1,B1,C1,D1,m_M[ 9],15,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[ 8],12,MAGIC7);
- F2(D1,E1,A1,B1,C1,m_M[ 5], 9,MAGIC2); F4(D2,E2,A2,B2,C2,m_M[12], 7,MAGIC7);
- F2(C1,D1,E1,A1,B1,m_M[ 2],11,MAGIC2); F4(C2,D2,E2,A2,B2,m_M[ 4], 6,MAGIC7);
- F2(B1,C1,D1,E1,A1,m_M[14], 7,MAGIC2); F4(B2,C2,D2,E2,A2,m_M[ 9],15,MAGIC7);
- F2(A1,B1,C1,D1,E1,m_M[11],13,MAGIC2); F4(A2,B2,C2,D2,E2,m_M[ 1],13,MAGIC7);
- F2(E1,A1,B1,C1,D1,m_M[ 8],12,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[ 2],11,MAGIC7);
-
- F3(D1,E1,A1,B1,C1,m_M[ 3],11,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[15], 9,MAGIC8);
- F3(C1,D1,E1,A1,B1,m_M[10],13,MAGIC3); F3(C2,D2,E2,A2,B2,m_M[ 5], 7,MAGIC8);
- F3(B1,C1,D1,E1,A1,m_M[14], 6,MAGIC3); F3(B2,C2,D2,E2,A2,m_M[ 1],15,MAGIC8);
- F3(A1,B1,C1,D1,E1,m_M[ 4], 7,MAGIC3); F3(A2,B2,C2,D2,E2,m_M[ 3],11,MAGIC8);
- F3(E1,A1,B1,C1,D1,m_M[ 9],14,MAGIC3); F3(E2,A2,B2,C2,D2,m_M[ 7], 8,MAGIC8);
- F3(D1,E1,A1,B1,C1,m_M[15], 9,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[14], 6,MAGIC8);
- F3(C1,D1,E1,A1,B1,m_M[ 8],13,MAGIC3); F3(C2,D2,E2,A2,B2,m_M[ 6], 6,MAGIC8);
- F3(B1,C1,D1,E1,A1,m_M[ 1],15,MAGIC3); F3(B2,C2,D2,E2,A2,m_M[ 9],14,MAGIC8);
- F3(A1,B1,C1,D1,E1,m_M[ 2],14,MAGIC3); F3(A2,B2,C2,D2,E2,m_M[11],12,MAGIC8);
- F3(E1,A1,B1,C1,D1,m_M[ 7], 8,MAGIC3); F3(E2,A2,B2,C2,D2,m_M[ 8],13,MAGIC8);
- F3(D1,E1,A1,B1,C1,m_M[ 0],13,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[12], 5,MAGIC8);
- F3(C1,D1,E1,A1,B1,m_M[ 6], 6,MAGIC3); F3(C2,D2,E2,A2,B2,m_M[ 2],14,MAGIC8);
- F3(B1,C1,D1,E1,A1,m_M[13], 5,MAGIC3); F3(B2,C2,D2,E2,A2,m_M[10],13,MAGIC8);
- F3(A1,B1,C1,D1,E1,m_M[11],12,MAGIC3); F3(A2,B2,C2,D2,E2,m_M[ 0],13,MAGIC8);
- F3(E1,A1,B1,C1,D1,m_M[ 5], 7,MAGIC3); F3(E2,A2,B2,C2,D2,m_M[ 4], 7,MAGIC8);
- F3(D1,E1,A1,B1,C1,m_M[12], 5,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[13], 5,MAGIC8);
-
- F4(C1,D1,E1,A1,B1,m_M[ 1],11,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[ 8],15,MAGIC9);
- F4(B1,C1,D1,E1,A1,m_M[ 9],12,MAGIC4); F2(B2,C2,D2,E2,A2,m_M[ 6], 5,MAGIC9);
- F4(A1,B1,C1,D1,E1,m_M[11],14,MAGIC4); F2(A2,B2,C2,D2,E2,m_M[ 4], 8,MAGIC9);
- F4(E1,A1,B1,C1,D1,m_M[10],15,MAGIC4); F2(E2,A2,B2,C2,D2,m_M[ 1],11,MAGIC9);
- F4(D1,E1,A1,B1,C1,m_M[ 0],14,MAGIC4); F2(D2,E2,A2,B2,C2,m_M[ 3],14,MAGIC9);
- F4(C1,D1,E1,A1,B1,m_M[ 8],15,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[11],14,MAGIC9);
- F4(B1,C1,D1,E1,A1,m_M[12], 9,MAGIC4); F2(B2,C2,D2,E2,A2,m_M[15], 6,MAGIC9);
- F4(A1,B1,C1,D1,E1,m_M[ 4], 8,MAGIC4); F2(A2,B2,C2,D2,E2,m_M[ 0],14,MAGIC9);
- F4(E1,A1,B1,C1,D1,m_M[13], 9,MAGIC4); F2(E2,A2,B2,C2,D2,m_M[ 5], 6,MAGIC9);
- F4(D1,E1,A1,B1,C1,m_M[ 3],14,MAGIC4); F2(D2,E2,A2,B2,C2,m_M[12], 9,MAGIC9);
- F4(C1,D1,E1,A1,B1,m_M[ 7], 5,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[ 2],12,MAGIC9);
- F4(B1,C1,D1,E1,A1,m_M[15], 6,MAGIC4); F2(B2,C2,D2,E2,A2,m_M[13], 9,MAGIC9);
- F4(A1,B1,C1,D1,E1,m_M[14], 8,MAGIC4); F2(A2,B2,C2,D2,E2,m_M[ 9],12,MAGIC9);
- F4(E1,A1,B1,C1,D1,m_M[ 5], 6,MAGIC4); F2(E2,A2,B2,C2,D2,m_M[ 7], 5,MAGIC9);
- F4(D1,E1,A1,B1,C1,m_M[ 6], 5,MAGIC4); F2(D2,E2,A2,B2,C2,m_M[10],15,MAGIC9);
- F4(C1,D1,E1,A1,B1,m_M[ 2],12,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[14], 8,MAGIC9);
-
- F5(B1,C1,D1,E1,A1,m_M[ 4], 9,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[12], 8 );
- F5(A1,B1,C1,D1,E1,m_M[ 0],15,MAGIC5); F1(A2,B2,C2,D2,E2,m_M[15], 5 );
- F5(E1,A1,B1,C1,D1,m_M[ 5], 5,MAGIC5); F1(E2,A2,B2,C2,D2,m_M[10],12 );
- F5(D1,E1,A1,B1,C1,m_M[ 9],11,MAGIC5); F1(D2,E2,A2,B2,C2,m_M[ 4], 9 );
- F5(C1,D1,E1,A1,B1,m_M[ 7], 6,MAGIC5); F1(C2,D2,E2,A2,B2,m_M[ 1],12 );
- F5(B1,C1,D1,E1,A1,m_M[12], 8,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[ 5], 5 );
- F5(A1,B1,C1,D1,E1,m_M[ 2],13,MAGIC5); F1(A2,B2,C2,D2,E2,m_M[ 8],14 );
- F5(E1,A1,B1,C1,D1,m_M[10],12,MAGIC5); F1(E2,A2,B2,C2,D2,m_M[ 7], 6 );
- F5(D1,E1,A1,B1,C1,m_M[14], 5,MAGIC5); F1(D2,E2,A2,B2,C2,m_M[ 6], 8 );
- F5(C1,D1,E1,A1,B1,m_M[ 1],12,MAGIC5); F1(C2,D2,E2,A2,B2,m_M[ 2],13 );
- F5(B1,C1,D1,E1,A1,m_M[ 3],13,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[13], 6 );
- F5(A1,B1,C1,D1,E1,m_M[ 8],14,MAGIC5); F1(A2,B2,C2,D2,E2,m_M[14], 5 );
- F5(E1,A1,B1,C1,D1,m_M[11],11,MAGIC5); F1(E2,A2,B2,C2,D2,m_M[ 0],15 );
- F5(D1,E1,A1,B1,C1,m_M[ 6], 8,MAGIC5); F1(D2,E2,A2,B2,C2,m_M[ 3],13 );
- F5(C1,D1,E1,A1,B1,m_M[15], 5,MAGIC5); F1(C2,D2,E2,A2,B2,m_M[ 9],11 );
- F5(B1,C1,D1,E1,A1,m_M[13], 6,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[11],11 );
+ uint32_t A1 = m_digest[0], A2 = A1,
+ B1 = m_digest[1], B2 = B1,
+ C1 = m_digest[2], C2 = C1,
+ D1 = m_digest[3], D2 = D1,
+ E1 = m_digest[4], E2 = E1;
+
+ F1<11>(A1,B1,C1,D1,E1,m_M[ 0] ); F5< 8>(A2,B2,C2,D2,E2,m_M[ 5]+MAGIC6);
+ F1<14>(E1,A1,B1,C1,D1,m_M[ 1] ); F5< 9>(E2,A2,B2,C2,D2,m_M[14]+MAGIC6);
+ F1<15>(D1,E1,A1,B1,C1,m_M[ 2] ); F5< 9>(D2,E2,A2,B2,C2,m_M[ 7]+MAGIC6);
+ F1<12>(C1,D1,E1,A1,B1,m_M[ 3] ); F5<11>(C2,D2,E2,A2,B2,m_M[ 0]+MAGIC6);
+ F1< 5>(B1,C1,D1,E1,A1,m_M[ 4] ); F5<13>(B2,C2,D2,E2,A2,m_M[ 9]+MAGIC6);
+ F1< 8>(A1,B1,C1,D1,E1,m_M[ 5] ); F5<15>(A2,B2,C2,D2,E2,m_M[ 2]+MAGIC6);
+ F1< 7>(E1,A1,B1,C1,D1,m_M[ 6] ); F5<15>(E2,A2,B2,C2,D2,m_M[11]+MAGIC6);
+ F1< 9>(D1,E1,A1,B1,C1,m_M[ 7] ); F5< 5>(D2,E2,A2,B2,C2,m_M[ 4]+MAGIC6);
+ F1<11>(C1,D1,E1,A1,B1,m_M[ 8] ); F5< 7>(C2,D2,E2,A2,B2,m_M[13]+MAGIC6);
+ F1<13>(B1,C1,D1,E1,A1,m_M[ 9] ); F5< 7>(B2,C2,D2,E2,A2,m_M[ 6]+MAGIC6);
+ F1<14>(A1,B1,C1,D1,E1,m_M[10] ); F5< 8>(A2,B2,C2,D2,E2,m_M[15]+MAGIC6);
+ F1<15>(E1,A1,B1,C1,D1,m_M[11] ); F5<11>(E2,A2,B2,C2,D2,m_M[ 8]+MAGIC6);
+ F1< 6>(D1,E1,A1,B1,C1,m_M[12] ); F5<14>(D2,E2,A2,B2,C2,m_M[ 1]+MAGIC6);
+ F1< 7>(C1,D1,E1,A1,B1,m_M[13] ); F5<14>(C2,D2,E2,A2,B2,m_M[10]+MAGIC6);
+ F1< 9>(B1,C1,D1,E1,A1,m_M[14] ); F5<12>(B2,C2,D2,E2,A2,m_M[ 3]+MAGIC6);
+ F1< 8>(A1,B1,C1,D1,E1,m_M[15] ); F5< 6>(A2,B2,C2,D2,E2,m_M[12]+MAGIC6);
+
+ F2< 7>(E1,A1,B1,C1,D1,m_M[ 7]+MAGIC2); F4< 9>(E2,A2,B2,C2,D2,m_M[ 6]+MAGIC7);
+ F2< 6>(D1,E1,A1,B1,C1,m_M[ 4]+MAGIC2); F4<13>(D2,E2,A2,B2,C2,m_M[11]+MAGIC7);
+ F2< 8>(C1,D1,E1,A1,B1,m_M[13]+MAGIC2); F4<15>(C2,D2,E2,A2,B2,m_M[ 3]+MAGIC7);
+ F2<13>(B1,C1,D1,E1,A1,m_M[ 1]+MAGIC2); F4< 7>(B2,C2,D2,E2,A2,m_M[ 7]+MAGIC7);
+ F2<11>(A1,B1,C1,D1,E1,m_M[10]+MAGIC2); F4<12>(A2,B2,C2,D2,E2,m_M[ 0]+MAGIC7);
+ F2< 9>(E1,A1,B1,C1,D1,m_M[ 6]+MAGIC2); F4< 8>(E2,A2,B2,C2,D2,m_M[13]+MAGIC7);
+ F2< 7>(D1,E1,A1,B1,C1,m_M[15]+MAGIC2); F4< 9>(D2,E2,A2,B2,C2,m_M[ 5]+MAGIC7);
+ F2<15>(C1,D1,E1,A1,B1,m_M[ 3]+MAGIC2); F4<11>(C2,D2,E2,A2,B2,m_M[10]+MAGIC7);
+ F2< 7>(B1,C1,D1,E1,A1,m_M[12]+MAGIC2); F4< 7>(B2,C2,D2,E2,A2,m_M[14]+MAGIC7);
+ F2<12>(A1,B1,C1,D1,E1,m_M[ 0]+MAGIC2); F4< 7>(A2,B2,C2,D2,E2,m_M[15]+MAGIC7);
+ F2<15>(E1,A1,B1,C1,D1,m_M[ 9]+MAGIC2); F4<12>(E2,A2,B2,C2,D2,m_M[ 8]+MAGIC7);
+ F2< 9>(D1,E1,A1,B1,C1,m_M[ 5]+MAGIC2); F4< 7>(D2,E2,A2,B2,C2,m_M[12]+MAGIC7);
+ F2<11>(C1,D1,E1,A1,B1,m_M[ 2]+MAGIC2); F4< 6>(C2,D2,E2,A2,B2,m_M[ 4]+MAGIC7);
+ F2< 7>(B1,C1,D1,E1,A1,m_M[14]+MAGIC2); F4<15>(B2,C2,D2,E2,A2,m_M[ 9]+MAGIC7);
+ F2<13>(A1,B1,C1,D1,E1,m_M[11]+MAGIC2); F4<13>(A2,B2,C2,D2,E2,m_M[ 1]+MAGIC7);
+ F2<12>(E1,A1,B1,C1,D1,m_M[ 8]+MAGIC2); F4<11>(E2,A2,B2,C2,D2,m_M[ 2]+MAGIC7);
+
+ F3<11>(D1,E1,A1,B1,C1,m_M[ 3]+MAGIC3); F3< 9>(D2,E2,A2,B2,C2,m_M[15]+MAGIC8);
+ F3<13>(C1,D1,E1,A1,B1,m_M[10]+MAGIC3); F3< 7>(C2,D2,E2,A2,B2,m_M[ 5]+MAGIC8);
+ F3< 6>(B1,C1,D1,E1,A1,m_M[14]+MAGIC3); F3<15>(B2,C2,D2,E2,A2,m_M[ 1]+MAGIC8);
+ F3< 7>(A1,B1,C1,D1,E1,m_M[ 4]+MAGIC3); F3<11>(A2,B2,C2,D2,E2,m_M[ 3]+MAGIC8);
+ F3<14>(E1,A1,B1,C1,D1,m_M[ 9]+MAGIC3); F3< 8>(E2,A2,B2,C2,D2,m_M[ 7]+MAGIC8);
+ F3< 9>(D1,E1,A1,B1,C1,m_M[15]+MAGIC3); F3< 6>(D2,E2,A2,B2,C2,m_M[14]+MAGIC8);
+ F3<13>(C1,D1,E1,A1,B1,m_M[ 8]+MAGIC3); F3< 6>(C2,D2,E2,A2,B2,m_M[ 6]+MAGIC8);
+ F3<15>(B1,C1,D1,E1,A1,m_M[ 1]+MAGIC3); F3<14>(B2,C2,D2,E2,A2,m_M[ 9]+MAGIC8);
+ F3<14>(A1,B1,C1,D1,E1,m_M[ 2]+MAGIC3); F3<12>(A2,B2,C2,D2,E2,m_M[11]+MAGIC8);
+ F3< 8>(E1,A1,B1,C1,D1,m_M[ 7]+MAGIC3); F3<13>(E2,A2,B2,C2,D2,m_M[ 8]+MAGIC8);
+ F3<13>(D1,E1,A1,B1,C1,m_M[ 0]+MAGIC3); F3< 5>(D2,E2,A2,B2,C2,m_M[12]+MAGIC8);
+ F3< 6>(C1,D1,E1,A1,B1,m_M[ 6]+MAGIC3); F3<14>(C2,D2,E2,A2,B2,m_M[ 2]+MAGIC8);
+ F3< 5>(B1,C1,D1,E1,A1,m_M[13]+MAGIC3); F3<13>(B2,C2,D2,E2,A2,m_M[10]+MAGIC8);
+ F3<12>(A1,B1,C1,D1,E1,m_M[11]+MAGIC3); F3<13>(A2,B2,C2,D2,E2,m_M[ 0]+MAGIC8);
+ F3< 7>(E1,A1,B1,C1,D1,m_M[ 5]+MAGIC3); F3< 7>(E2,A2,B2,C2,D2,m_M[ 4]+MAGIC8);
+ F3< 5>(D1,E1,A1,B1,C1,m_M[12]+MAGIC3); F3< 5>(D2,E2,A2,B2,C2,m_M[13]+MAGIC8);
+
+ F4<11>(C1,D1,E1,A1,B1,m_M[ 1]+MAGIC4); F2<15>(C2,D2,E2,A2,B2,m_M[ 8]+MAGIC9);
+ F4<12>(B1,C1,D1,E1,A1,m_M[ 9]+MAGIC4); F2< 5>(B2,C2,D2,E2,A2,m_M[ 6]+MAGIC9);
+ F4<14>(A1,B1,C1,D1,E1,m_M[11]+MAGIC4); F2< 8>(A2,B2,C2,D2,E2,m_M[ 4]+MAGIC9);
+ F4<15>(E1,A1,B1,C1,D1,m_M[10]+MAGIC4); F2<11>(E2,A2,B2,C2,D2,m_M[ 1]+MAGIC9);
+ F4<14>(D1,E1,A1,B1,C1,m_M[ 0]+MAGIC4); F2<14>(D2,E2,A2,B2,C2,m_M[ 3]+MAGIC9);
+ F4<15>(C1,D1,E1,A1,B1,m_M[ 8]+MAGIC4); F2<14>(C2,D2,E2,A2,B2,m_M[11]+MAGIC9);
+ F4< 9>(B1,C1,D1,E1,A1,m_M[12]+MAGIC4); F2< 6>(B2,C2,D2,E2,A2,m_M[15]+MAGIC9);
+ F4< 8>(A1,B1,C1,D1,E1,m_M[ 4]+MAGIC4); F2<14>(A2,B2,C2,D2,E2,m_M[ 0]+MAGIC9);
+ F4< 9>(E1,A1,B1,C1,D1,m_M[13]+MAGIC4); F2< 6>(E2,A2,B2,C2,D2,m_M[ 5]+MAGIC9);
+ F4<14>(D1,E1,A1,B1,C1,m_M[ 3]+MAGIC4); F2< 9>(D2,E2,A2,B2,C2,m_M[12]+MAGIC9);
+ F4< 5>(C1,D1,E1,A1,B1,m_M[ 7]+MAGIC4); F2<12>(C2,D2,E2,A2,B2,m_M[ 2]+MAGIC9);
+ F4< 6>(B1,C1,D1,E1,A1,m_M[15]+MAGIC4); F2< 9>(B2,C2,D2,E2,A2,m_M[13]+MAGIC9);
+ F4< 8>(A1,B1,C1,D1,E1,m_M[14]+MAGIC4); F2<12>(A2,B2,C2,D2,E2,m_M[ 9]+MAGIC9);
+ F4< 6>(E1,A1,B1,C1,D1,m_M[ 5]+MAGIC4); F2< 5>(E2,A2,B2,C2,D2,m_M[ 7]+MAGIC9);
+ F4< 5>(D1,E1,A1,B1,C1,m_M[ 6]+MAGIC4); F2<15>(D2,E2,A2,B2,C2,m_M[10]+MAGIC9);
+ F4<12>(C1,D1,E1,A1,B1,m_M[ 2]+MAGIC4); F2< 8>(C2,D2,E2,A2,B2,m_M[14]+MAGIC9);
+
+ F5< 9>(B1,C1,D1,E1,A1,m_M[ 4]+MAGIC5); F1< 8>(B2,C2,D2,E2,A2,m_M[12] );
+ F5<15>(A1,B1,C1,D1,E1,m_M[ 0]+MAGIC5); F1< 5>(A2,B2,C2,D2,E2,m_M[15] );
+ F5< 5>(E1,A1,B1,C1,D1,m_M[ 5]+MAGIC5); F1<12>(E2,A2,B2,C2,D2,m_M[10] );
+ F5<11>(D1,E1,A1,B1,C1,m_M[ 9]+MAGIC5); F1< 9>(D2,E2,A2,B2,C2,m_M[ 4] );
+ F5< 6>(C1,D1,E1,A1,B1,m_M[ 7]+MAGIC5); F1<12>(C2,D2,E2,A2,B2,m_M[ 1] );
+ F5< 8>(B1,C1,D1,E1,A1,m_M[12]+MAGIC5); F1< 5>(B2,C2,D2,E2,A2,m_M[ 5] );
+ F5<13>(A1,B1,C1,D1,E1,m_M[ 2]+MAGIC5); F1<14>(A2,B2,C2,D2,E2,m_M[ 8] );
+ F5<12>(E1,A1,B1,C1,D1,m_M[10]+MAGIC5); F1< 6>(E2,A2,B2,C2,D2,m_M[ 7] );
+ F5< 5>(D1,E1,A1,B1,C1,m_M[14]+MAGIC5); F1< 8>(D2,E2,A2,B2,C2,m_M[ 6] );
+ F5<12>(C1,D1,E1,A1,B1,m_M[ 1]+MAGIC5); F1<13>(C2,D2,E2,A2,B2,m_M[ 2] );
+ F5<13>(B1,C1,D1,E1,A1,m_M[ 3]+MAGIC5); F1< 6>(B2,C2,D2,E2,A2,m_M[13] );
+ F5<14>(A1,B1,C1,D1,E1,m_M[ 8]+MAGIC5); F1< 5>(A2,B2,C2,D2,E2,m_M[14] );
+ F5<11>(E1,A1,B1,C1,D1,m_M[11]+MAGIC5); F1<15>(E2,A2,B2,C2,D2,m_M[ 0] );
+ F5< 8>(D1,E1,A1,B1,C1,m_M[ 6]+MAGIC5); F1<13>(D2,E2,A2,B2,C2,m_M[ 3] );
+ F5< 5>(C1,D1,E1,A1,B1,m_M[15]+MAGIC5); F1<11>(C2,D2,E2,A2,B2,m_M[ 9] );
+ F5< 6>(B1,C1,D1,E1,A1,m_M[13]+MAGIC5); F1<11>(B2,C2,D2,E2,A2,m_M[11] );
C1 = m_digest[1] + C1 + D2;
m_digest[1] = m_digest[2] + D1 + E2;
diff --git a/src/lib/hash/sha1/sha160.cpp b/src/lib/hash/sha1/sha160.cpp
index fcca67341..8c12a4f04 100644
--- a/src/lib/hash/sha1/sha160.cpp
+++ b/src/lib/hash/sha1/sha160.cpp
@@ -24,8 +24,8 @@ namespace {
*/
inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += (D ^ (B & (C ^ D))) + msg + 0x5A827999 + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += (D ^ (B & (C ^ D))) + msg + 0x5A827999 + rotl<5>(A);
+ B = rotl<30>(B);
}
/*
@@ -33,8 +33,8 @@ inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin
*/
inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += (B ^ C ^ D) + msg + 0x6ED9EBA1 + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += (B ^ C ^ D) + msg + 0x6ED9EBA1 + rotl<5>(A);
+ B = rotl<30>(B);
}
/*
@@ -42,8 +42,8 @@ inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin
*/
inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += ((B & C) | ((B | C) & D)) + msg + 0x8F1BBCDC + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += ((B & C) | ((B | C) & D)) + msg + 0x8F1BBCDC + rotl<5>(A);
+ B = rotl<30>(B);
}
/*
@@ -51,8 +51,8 @@ inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin
*/
inline void F4(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += (B ^ C ^ D) + msg + 0xCA62C1D6 + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += (B ^ C ^ D) + msg + 0xCA62C1D6 + rotl<5>(A);
+ B = rotl<30>(B);
}
}
@@ -99,14 +99,14 @@ void SHA_160::compress_n(const uint8_t input[], size_t blocks)
for(size_t j = 16; j != 80; j += 8)
{
- m_W[j ] = rotate_left((m_W[j-3] ^ m_W[j-8] ^ m_W[j-14] ^ m_W[j-16]), 1);
- m_W[j+1] = rotate_left((m_W[j-2] ^ m_W[j-7] ^ m_W[j-13] ^ m_W[j-15]), 1);
- m_W[j+2] = rotate_left((m_W[j-1] ^ m_W[j-6] ^ m_W[j-12] ^ m_W[j-14]), 1);
- m_W[j+3] = rotate_left((m_W[j ] ^ m_W[j-5] ^ m_W[j-11] ^ m_W[j-13]), 1);
- m_W[j+4] = rotate_left((m_W[j+1] ^ m_W[j-4] ^ m_W[j-10] ^ m_W[j-12]), 1);
- m_W[j+5] = rotate_left((m_W[j+2] ^ m_W[j-3] ^ m_W[j- 9] ^ m_W[j-11]), 1);
- m_W[j+6] = rotate_left((m_W[j+3] ^ m_W[j-2] ^ m_W[j- 8] ^ m_W[j-10]), 1);
- m_W[j+7] = rotate_left((m_W[j+4] ^ m_W[j-1] ^ m_W[j- 7] ^ m_W[j- 9]), 1);
+ m_W[j ] = rotl<1>(m_W[j-3] ^ m_W[j-8] ^ m_W[j-14] ^ m_W[j-16]);
+ m_W[j+1] = rotl<1>(m_W[j-2] ^ m_W[j-7] ^ m_W[j-13] ^ m_W[j-15]);
+ m_W[j+2] = rotl<1>(m_W[j-1] ^ m_W[j-6] ^ m_W[j-12] ^ m_W[j-14]);
+ m_W[j+3] = rotl<1>(m_W[j ] ^ m_W[j-5] ^ m_W[j-11] ^ m_W[j-13]);
+ m_W[j+4] = rotl<1>(m_W[j+1] ^ m_W[j-4] ^ m_W[j-10] ^ m_W[j-12]);
+ m_W[j+5] = rotl<1>(m_W[j+2] ^ m_W[j-3] ^ m_W[j- 9] ^ m_W[j-11]);
+ m_W[j+6] = rotl<1>(m_W[j+3] ^ m_W[j-2] ^ m_W[j- 8] ^ m_W[j-10]);
+ m_W[j+7] = rotl<1>(m_W[j+4] ^ m_W[j-1] ^ m_W[j- 7] ^ m_W[j- 9]);
}
F1(A, B, C, D, E, m_W[ 0]); F1(E, A, B, C, D, m_W[ 1]);
diff --git a/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp b/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp
index 8c7785051..0b7f8f837 100644
--- a/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp
+++ b/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp
@@ -113,8 +113,8 @@ W0 = W[t]..W[t+3]
*/
inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += (D ^ (B & (C ^ D))) + msg + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += (D ^ (B & (C ^ D))) + msg + rotl<5>(A);
+ B = rotl<30>(B);
}
/*
@@ -122,8 +122,8 @@ inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin
*/
inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += (B ^ C ^ D) + msg + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += (B ^ C ^ D) + msg + rotl<5>(A);
+ B = rotl<30>(B);
}
/*
@@ -131,8 +131,8 @@ inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin
*/
inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += ((B & C) | ((B | C) & D)) + msg + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += ((B & C) | ((B | C) & D)) + msg + rotl<5>(A);
+ B = rotl<30>(B);
}
/*
@@ -140,8 +140,8 @@ inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin
*/
inline void F4(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
{
- E += (B ^ C ^ D) + msg + rotate_left(A, 5);
- B = rotate_left(B, 30);
+ E += (B ^ C ^ D) + msg + rotl<5>(A);
+ B = rotl<30>(B);
}
}
diff --git a/src/lib/hash/sha2_32/sha2_32.cpp b/src/lib/hash/sha2_32/sha2_32.cpp
index 281e6ed2b..0710747d0 100644
--- a/src/lib/hash/sha2_32/sha2_32.cpp
+++ b/src/lib/hash/sha2_32/sha2_32.cpp
@@ -28,10 +28,10 @@ std::unique_ptr<HashFunction> SHA_256::copy_state() const
* even though it is much faster if inlined.
*/
#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do { \
- uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22); \
- uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25); \
- uint32_t M2_sigma = rotate_right(M2, 17) ^ rotate_right(M2, 19) ^ (M2 >> 10); \
- uint32_t M4_sigma = rotate_right(M4, 7) ^ rotate_right(M4, 18) ^ (M4 >> 3); \
+ uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); \
+ uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); \
+ uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10); \
+ uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3); \
H += magic + E_rho + ((E & F) ^ (~E & G)) + M1; \
D += H; \
H += A_rho + ((A & B) | ((A | B) & C)); \
diff --git a/src/lib/hash/sha2_64/sha2_64.cpp b/src/lib/hash/sha2_64/sha2_64.cpp
index 8e01b6b4d..45992e996 100644
--- a/src/lib/hash/sha2_64/sha2_64.cpp
+++ b/src/lib/hash/sha2_64/sha2_64.cpp
@@ -26,44 +26,29 @@ std::unique_ptr<HashFunction> SHA_512_256::copy_state() const
namespace {
-namespace SHA2_64 {
-
-/*
-* SHA-{384,512} Rho Function
-*/
-inline uint64_t rho(uint64_t X, uint32_t rot1, uint32_t rot2, uint32_t rot3)
- {
- return (rotate_right(X, rot1) ^ rotate_right(X, rot2) ^
- rotate_right(X, rot3));
- }
-
-/*
-* SHA-{384,512} Sigma Function
-*/
-inline uint64_t sigma(uint64_t X, uint32_t rot1, uint32_t rot2, uint32_t shift)
- {
- return (rotate_right(X, rot1) ^ rotate_right(X, rot2) ^ (X >> shift));
- }
-
/*
* SHA-512 F1 Function
*
* Use a macro as many compilers won't inline a function this big,
* even though it is much faster if inlined.
*/
-#define SHA2_64_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) \
- do { \
- H += magic + rho(E, 14, 18, 41) + ((E & F) ^ (~E & G)) + M1; \
- D += H; \
- H += rho(A, 28, 34, 39) + ((A & B) | ((A | B) & C)); \
- M1 += sigma(M2, 19, 61, 6) + M3 + sigma(M4, 1, 8, 7); \
+#define SHA2_64_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) \
+ do { \
+ const uint64_t E_rho = rotr<14>(E) ^ rotr<18>(E) ^ rotr<41>(E); \
+ const uint64_t A_rho = rotr<28>(A) ^ rotr<34>(A) ^ rotr<39>(A); \
+ const uint64_t M2_sigma = rotr<19>(M2) ^ rotr<61>(M2) ^ (M2 >> 6); \
+ const uint64_t M4_sigma = rotr<1>(M4) ^ rotr<8>(M4) ^ (M4 >> 7); \
+ H += magic + E_rho + ((E & F) ^ (~E & G)) + M1; \
+ D += H; \
+ H += A_rho + ((A & B) | ((A | B) & C)); \
+ M1 += M2_sigma + M3 + M4_sigma; \
} while(0);
/*
* SHA-{384,512} Compression Function
*/
-void compress(secure_vector<uint64_t>& digest,
- const uint8_t input[], size_t blocks)
+void SHA64_compress(secure_vector<uint64_t>& digest,
+ const uint8_t input[], size_t blocks)
{
uint64_t A = digest[0], B = digest[1], C = digest[2],
D = digest[3], E = digest[4], F = digest[5],
@@ -184,21 +169,19 @@ void compress(secure_vector<uint64_t>& digest,
}
-}
-
void SHA_512_256::compress_n(const uint8_t input[], size_t blocks)
{
- SHA2_64::compress(m_digest, input, blocks);
+ SHA64_compress(m_digest, input, blocks);
}
void SHA_384::compress_n(const uint8_t input[], size_t blocks)
{
- SHA2_64::compress(m_digest, input, blocks);
+ SHA64_compress(m_digest, input, blocks);
}
void SHA_512::compress_n(const uint8_t input[], size_t blocks)
{
- SHA2_64::compress(m_digest, input, blocks);
+ SHA64_compress(m_digest, input, blocks);
}
void SHA_512_256::copy_out(uint8_t output[])
diff --git a/src/lib/hash/sha3/sha3.cpp b/src/lib/hash/sha3/sha3.cpp
index e829c3f70..1556e5498 100644
--- a/src/lib/hash/sha3/sha3.cpp
+++ b/src/lib/hash/sha3/sha3.cpp
@@ -37,37 +37,37 @@ void SHA_3::permute(uint64_t A[25])
const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
- const uint64_t D0 = rotate_left(C0, 1) ^ C3;
- const uint64_t D1 = rotate_left(C1, 1) ^ C4;
- const uint64_t D2 = rotate_left(C2, 1) ^ C0;
- const uint64_t D3 = rotate_left(C3, 1) ^ C1;
- const uint64_t D4 = rotate_left(C4, 1) ^ C2;
+ const uint64_t D0 = rotl<1>(C0) ^ C3;
+ const uint64_t D1 = rotl<1>(C1) ^ C4;
+ const uint64_t D2 = rotl<1>(C2) ^ C0;
+ const uint64_t D3 = rotl<1>(C3) ^ C1;
+ const uint64_t D4 = rotl<1>(C4) ^ C2;
const uint64_t B00 = A[ 0] ^ D1;
- const uint64_t B10 = rotate_left(A[ 1] ^ D2, 1);
- const uint64_t B20 = rotate_left(A[ 2] ^ D3, 62);
- const uint64_t B05 = rotate_left(A[ 3] ^ D4, 28);
- const uint64_t B15 = rotate_left(A[ 4] ^ D0, 27);
- const uint64_t B16 = rotate_left(A[ 5] ^ D1, 36);
- const uint64_t B01 = rotate_left(A[ 6] ^ D2, 44);
- const uint64_t B11 = rotate_left(A[ 7] ^ D3, 6);
- const uint64_t B21 = rotate_left(A[ 8] ^ D4, 55);
- const uint64_t B06 = rotate_left(A[ 9] ^ D0, 20);
- const uint64_t B07 = rotate_left(A[10] ^ D1, 3);
- const uint64_t B17 = rotate_left(A[11] ^ D2, 10);
- const uint64_t B02 = rotate_left(A[12] ^ D3, 43);
- const uint64_t B12 = rotate_left(A[13] ^ D4, 25);
- const uint64_t B22 = rotate_left(A[14] ^ D0, 39);
- const uint64_t B23 = rotate_left(A[15] ^ D1, 41);
- const uint64_t B08 = rotate_left(A[16] ^ D2, 45);
- const uint64_t B18 = rotate_left(A[17] ^ D3, 15);
- const uint64_t B03 = rotate_left(A[18] ^ D4, 21);
- const uint64_t B13 = rotate_left(A[19] ^ D0, 8);
- const uint64_t B14 = rotate_left(A[20] ^ D1, 18);
- const uint64_t B24 = rotate_left(A[21] ^ D2, 2);
- const uint64_t B09 = rotate_left(A[22] ^ D3, 61);
- const uint64_t B19 = rotate_left(A[23] ^ D4, 56);
- const uint64_t B04 = rotate_left(A[24] ^ D0, 14);
+ const uint64_t B10 = rotl<1>(A[ 1] ^ D2);
+ const uint64_t B20 = rotl<62>(A[ 2] ^ D3);
+ const uint64_t B05 = rotl<28>(A[ 3] ^ D4);
+ const uint64_t B15 = rotl<27>(A[ 4] ^ D0);
+ const uint64_t B16 = rotl<36>(A[ 5] ^ D1);
+ const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
+ const uint64_t B11 = rotl<6>(A[ 7] ^ D3);
+ const uint64_t B21 = rotl<55>(A[ 8] ^ D4);
+ const uint64_t B06 = rotl<20>(A[ 9] ^ D0);
+ const uint64_t B07 = rotl<3>(A[10] ^ D1);
+ const uint64_t B17 = rotl<10>(A[11] ^ D2);
+ const uint64_t B02 = rotl<43>(A[12] ^ D3);
+ const uint64_t B12 = rotl<25>(A[13] ^ D4);
+ const uint64_t B22 = rotl<39>(A[14] ^ D0);
+ const uint64_t B23 = rotl<41>(A[15] ^ D1);
+ const uint64_t B08 = rotl<45>(A[16] ^ D2);
+ const uint64_t B18 = rotl<15>(A[17] ^ D3);
+ const uint64_t B03 = rotl<21>(A[18] ^ D4);
+ const uint64_t B13 = rotl<8>(A[19] ^ D0);
+ const uint64_t B14 = rotl<18>(A[20] ^ D1);
+ const uint64_t B24 = rotl<2>(A[21] ^ D2);
+ const uint64_t B09 = rotl<61>(A[22] ^ D3);
+ const uint64_t B19 = rotl<56>(A[23] ^ D4);
+ const uint64_t B04 = rotl<14>(A[24] ^ D0);
A[ 0] = B00 ^ (~B01 & B02);
A[ 1] = B01 ^ (~B02 & B03);
diff --git a/src/lib/hash/sm3/sm3.cpp b/src/lib/hash/sm3/sm3.cpp
index aeb8f2e47..c3220d243 100644
--- a/src/lib/hash/sm3/sm3.cpp
+++ b/src/lib/hash/sm3/sm3.cpp
@@ -23,12 +23,12 @@ const uint32_t SM3_IV[] = {
inline uint32_t P0(uint32_t X)
{
- return X ^ rotate_left(X, 9) ^ rotate_left(X, 17);
+ return X ^ rotl<9>(X) ^ rotl<17>(X);
}
inline uint32_t P1(uint32_t X)
{
- return X ^ rotate_left(X, 15) ^ rotate_left(X, 23);
+ return X ^ rotl<15>(X) ^ rotl<23>(X);
}
inline uint32_t FF1(uint32_t X, uint32_t Y, uint32_t Z)
@@ -47,14 +47,14 @@ inline void R1(uint32_t A, uint32_t& B, uint32_t C, uint32_t& D,
uint32_t E, uint32_t& F, uint32_t G, uint32_t& H,
uint32_t TJ, uint32_t Wi, uint32_t Wj)
{
- const uint32_t A12 = rotate_left(A, 12);
- const uint32_t SS1 = rotate_left(A12 + E + TJ, 7);
+ const uint32_t A12 = rotl<12>(A);
+ const uint32_t SS1 = rotl<7>(A12 + E + TJ);
const uint32_t TT1 = (A ^ B ^ C) + D + (SS1 ^ A12) + Wj;
const uint32_t TT2 = (E ^ F ^ G) + H + SS1 + Wi;
- B = rotate_left(B, 9);
+ B = rotl<9>(B);
D = TT1;
- F = rotate_left(F, 19);
+ F = rotl<19>(F);
H = P0(TT2);
}
@@ -62,14 +62,14 @@ inline void R2(uint32_t A, uint32_t& B, uint32_t C, uint32_t& D,
uint32_t E, uint32_t& F, uint32_t G, uint32_t& H,
uint32_t TJ, uint32_t Wi, uint32_t Wj)
{
- const uint32_t A12 = rotate_left(A, 12);
- const uint32_t SS1 = rotate_left(A12 + E + TJ, 7);
+ const uint32_t A12 = rotl<12>(A);
+ const uint32_t SS1 = rotl<7>(A12 + E + TJ);
const uint32_t TT1 = FF1(A, B, C) + D + (SS1 ^ A12) + Wj;
const uint32_t TT2 = GG1(E, F, G) + H + SS1 + Wi;
- B = rotate_left(B, 9);
+ B = rotl<9>(B);
D = TT1;
- F = rotate_left(F, 19);
+ F = rotl<19>(F);
H = P0(TT2);
}
@@ -105,58 +105,58 @@ void SM3::compress_n(const uint8_t input[], size_t blocks)
W[15] = load_be<uint32_t>(input, 15);
// Message Extension (b)
- W[16] = P1(W[ 0] ^ W[ 7] ^ rotate_left(W[13], 15)) ^ rotate_left(W[ 3], 7) ^ W[10];
- W[17] = P1(W[ 1] ^ W[ 8] ^ rotate_left(W[14], 15)) ^ rotate_left(W[ 4], 7) ^ W[11];
- W[18] = P1(W[ 2] ^ W[ 9] ^ rotate_left(W[15], 15)) ^ rotate_left(W[ 5], 7) ^ W[12];
- W[19] = P1(W[ 3] ^ W[10] ^ rotate_left(W[16], 15)) ^ rotate_left(W[ 6], 7) ^ W[13];
- W[20] = P1(W[ 4] ^ W[11] ^ rotate_left(W[17], 15)) ^ rotate_left(W[ 7], 7) ^ W[14];
- W[21] = P1(W[ 5] ^ W[12] ^ rotate_left(W[18], 15)) ^ rotate_left(W[ 8], 7) ^ W[15];
- W[22] = P1(W[ 6] ^ W[13] ^ rotate_left(W[19], 15)) ^ rotate_left(W[ 9], 7) ^ W[16];
- W[23] = P1(W[ 7] ^ W[14] ^ rotate_left(W[20], 15)) ^ rotate_left(W[10], 7) ^ W[17];
- W[24] = P1(W[ 8] ^ W[15] ^ rotate_left(W[21], 15)) ^ rotate_left(W[11], 7) ^ W[18];
- W[25] = P1(W[ 9] ^ W[16] ^ rotate_left(W[22], 15)) ^ rotate_left(W[12], 7) ^ W[19];
- W[26] = P1(W[10] ^ W[17] ^ rotate_left(W[23], 15)) ^ rotate_left(W[13], 7) ^ W[20];
- W[27] = P1(W[11] ^ W[18] ^ rotate_left(W[24], 15)) ^ rotate_left(W[14], 7) ^ W[21];
- W[28] = P1(W[12] ^ W[19] ^ rotate_left(W[25], 15)) ^ rotate_left(W[15], 7) ^ W[22];
- W[29] = P1(W[13] ^ W[20] ^ rotate_left(W[26], 15)) ^ rotate_left(W[16], 7) ^ W[23];
- W[30] = P1(W[14] ^ W[21] ^ rotate_left(W[27], 15)) ^ rotate_left(W[17], 7) ^ W[24];
- W[31] = P1(W[15] ^ W[22] ^ rotate_left(W[28], 15)) ^ rotate_left(W[18], 7) ^ W[25];
- W[32] = P1(W[16] ^ W[23] ^ rotate_left(W[29], 15)) ^ rotate_left(W[19], 7) ^ W[26];
- W[33] = P1(W[17] ^ W[24] ^ rotate_left(W[30], 15)) ^ rotate_left(W[20], 7) ^ W[27];
- W[34] = P1(W[18] ^ W[25] ^ rotate_left(W[31], 15)) ^ rotate_left(W[21], 7) ^ W[28];
- W[35] = P1(W[19] ^ W[26] ^ rotate_left(W[32], 15)) ^ rotate_left(W[22], 7) ^ W[29];
- W[36] = P1(W[20] ^ W[27] ^ rotate_left(W[33], 15)) ^ rotate_left(W[23], 7) ^ W[30];
- W[37] = P1(W[21] ^ W[28] ^ rotate_left(W[34], 15)) ^ rotate_left(W[24], 7) ^ W[31];
- W[38] = P1(W[22] ^ W[29] ^ rotate_left(W[35], 15)) ^ rotate_left(W[25], 7) ^ W[32];
- W[39] = P1(W[23] ^ W[30] ^ rotate_left(W[36], 15)) ^ rotate_left(W[26], 7) ^ W[33];
- W[40] = P1(W[24] ^ W[31] ^ rotate_left(W[37], 15)) ^ rotate_left(W[27], 7) ^ W[34];
- W[41] = P1(W[25] ^ W[32] ^ rotate_left(W[38], 15)) ^ rotate_left(W[28], 7) ^ W[35];
- W[42] = P1(W[26] ^ W[33] ^ rotate_left(W[39], 15)) ^ rotate_left(W[29], 7) ^ W[36];
- W[43] = P1(W[27] ^ W[34] ^ rotate_left(W[40], 15)) ^ rotate_left(W[30], 7) ^ W[37];
- W[44] = P1(W[28] ^ W[35] ^ rotate_left(W[41], 15)) ^ rotate_left(W[31], 7) ^ W[38];
- W[45] = P1(W[29] ^ W[36] ^ rotate_left(W[42], 15)) ^ rotate_left(W[32], 7) ^ W[39];
- W[46] = P1(W[30] ^ W[37] ^ rotate_left(W[43], 15)) ^ rotate_left(W[33], 7) ^ W[40];
- W[47] = P1(W[31] ^ W[38] ^ rotate_left(W[44], 15)) ^ rotate_left(W[34], 7) ^ W[41];
- W[48] = P1(W[32] ^ W[39] ^ rotate_left(W[45], 15)) ^ rotate_left(W[35], 7) ^ W[42];
- W[49] = P1(W[33] ^ W[40] ^ rotate_left(W[46], 15)) ^ rotate_left(W[36], 7) ^ W[43];
- W[50] = P1(W[34] ^ W[41] ^ rotate_left(W[47], 15)) ^ rotate_left(W[37], 7) ^ W[44];
- W[51] = P1(W[35] ^ W[42] ^ rotate_left(W[48], 15)) ^ rotate_left(W[38], 7) ^ W[45];
- W[52] = P1(W[36] ^ W[43] ^ rotate_left(W[49], 15)) ^ rotate_left(W[39], 7) ^ W[46];
- W[53] = P1(W[37] ^ W[44] ^ rotate_left(W[50], 15)) ^ rotate_left(W[40], 7) ^ W[47];
- W[54] = P1(W[38] ^ W[45] ^ rotate_left(W[51], 15)) ^ rotate_left(W[41], 7) ^ W[48];
- W[55] = P1(W[39] ^ W[46] ^ rotate_left(W[52], 15)) ^ rotate_left(W[42], 7) ^ W[49];
- W[56] = P1(W[40] ^ W[47] ^ rotate_left(W[53], 15)) ^ rotate_left(W[43], 7) ^ W[50];
- W[57] = P1(W[41] ^ W[48] ^ rotate_left(W[54], 15)) ^ rotate_left(W[44], 7) ^ W[51];
- W[58] = P1(W[42] ^ W[49] ^ rotate_left(W[55], 15)) ^ rotate_left(W[45], 7) ^ W[52];
- W[59] = P1(W[43] ^ W[50] ^ rotate_left(W[56], 15)) ^ rotate_left(W[46], 7) ^ W[53];
- W[60] = P1(W[44] ^ W[51] ^ rotate_left(W[57], 15)) ^ rotate_left(W[47], 7) ^ W[54];
- W[61] = P1(W[45] ^ W[52] ^ rotate_left(W[58], 15)) ^ rotate_left(W[48], 7) ^ W[55];
- W[62] = P1(W[46] ^ W[53] ^ rotate_left(W[59], 15)) ^ rotate_left(W[49], 7) ^ W[56];
- W[63] = P1(W[47] ^ W[54] ^ rotate_left(W[60], 15)) ^ rotate_left(W[50], 7) ^ W[57];
- W[64] = P1(W[48] ^ W[55] ^ rotate_left(W[61], 15)) ^ rotate_left(W[51], 7) ^ W[58];
- W[65] = P1(W[49] ^ W[56] ^ rotate_left(W[62], 15)) ^ rotate_left(W[52], 7) ^ W[59];
- W[66] = P1(W[50] ^ W[57] ^ rotate_left(W[63], 15)) ^ rotate_left(W[53], 7) ^ W[60];
- W[67] = P1(W[51] ^ W[58] ^ rotate_left(W[64], 15)) ^ rotate_left(W[54], 7) ^ W[61];
+ W[16] = P1(W[ 0] ^ W[ 7] ^ rotl<15>(W[13])) ^ rotl<7>(W[ 3]) ^ W[10];
+ W[17] = P1(W[ 1] ^ W[ 8] ^ rotl<15>(W[14])) ^ rotl<7>(W[ 4]) ^ W[11];
+ W[18] = P1(W[ 2] ^ W[ 9] ^ rotl<15>(W[15])) ^ rotl<7>(W[ 5]) ^ W[12];
+ W[19] = P1(W[ 3] ^ W[10] ^ rotl<15>(W[16])) ^ rotl<7>(W[ 6]) ^ W[13];
+ W[20] = P1(W[ 4] ^ W[11] ^ rotl<15>(W[17])) ^ rotl<7>(W[ 7]) ^ W[14];
+ W[21] = P1(W[ 5] ^ W[12] ^ rotl<15>(W[18])) ^ rotl<7>(W[ 8]) ^ W[15];
+ W[22] = P1(W[ 6] ^ W[13] ^ rotl<15>(W[19])) ^ rotl<7>(W[ 9]) ^ W[16];
+ W[23] = P1(W[ 7] ^ W[14] ^ rotl<15>(W[20])) ^ rotl<7>(W[10]) ^ W[17];
+ W[24] = P1(W[ 8] ^ W[15] ^ rotl<15>(W[21])) ^ rotl<7>(W[11]) ^ W[18];
+ W[25] = P1(W[ 9] ^ W[16] ^ rotl<15>(W[22])) ^ rotl<7>(W[12]) ^ W[19];
+ W[26] = P1(W[10] ^ W[17] ^ rotl<15>(W[23])) ^ rotl<7>(W[13]) ^ W[20];
+ W[27] = P1(W[11] ^ W[18] ^ rotl<15>(W[24])) ^ rotl<7>(W[14]) ^ W[21];
+ W[28] = P1(W[12] ^ W[19] ^ rotl<15>(W[25])) ^ rotl<7>(W[15]) ^ W[22];
+ W[29] = P1(W[13] ^ W[20] ^ rotl<15>(W[26])) ^ rotl<7>(W[16]) ^ W[23];
+ W[30] = P1(W[14] ^ W[21] ^ rotl<15>(W[27])) ^ rotl<7>(W[17]) ^ W[24];
+ W[31] = P1(W[15] ^ W[22] ^ rotl<15>(W[28])) ^ rotl<7>(W[18]) ^ W[25];
+ W[32] = P1(W[16] ^ W[23] ^ rotl<15>(W[29])) ^ rotl<7>(W[19]) ^ W[26];
+ W[33] = P1(W[17] ^ W[24] ^ rotl<15>(W[30])) ^ rotl<7>(W[20]) ^ W[27];
+ W[34] = P1(W[18] ^ W[25] ^ rotl<15>(W[31])) ^ rotl<7>(W[21]) ^ W[28];
+ W[35] = P1(W[19] ^ W[26] ^ rotl<15>(W[32])) ^ rotl<7>(W[22]) ^ W[29];
+ W[36] = P1(W[20] ^ W[27] ^ rotl<15>(W[33])) ^ rotl<7>(W[23]) ^ W[30];
+ W[37] = P1(W[21] ^ W[28] ^ rotl<15>(W[34])) ^ rotl<7>(W[24]) ^ W[31];
+ W[38] = P1(W[22] ^ W[29] ^ rotl<15>(W[35])) ^ rotl<7>(W[25]) ^ W[32];
+ W[39] = P1(W[23] ^ W[30] ^ rotl<15>(W[36])) ^ rotl<7>(W[26]) ^ W[33];
+ W[40] = P1(W[24] ^ W[31] ^ rotl<15>(W[37])) ^ rotl<7>(W[27]) ^ W[34];
+ W[41] = P1(W[25] ^ W[32] ^ rotl<15>(W[38])) ^ rotl<7>(W[28]) ^ W[35];
+ W[42] = P1(W[26] ^ W[33] ^ rotl<15>(W[39])) ^ rotl<7>(W[29]) ^ W[36];
+ W[43] = P1(W[27] ^ W[34] ^ rotl<15>(W[40])) ^ rotl<7>(W[30]) ^ W[37];
+ W[44] = P1(W[28] ^ W[35] ^ rotl<15>(W[41])) ^ rotl<7>(W[31]) ^ W[38];
+ W[45] = P1(W[29] ^ W[36] ^ rotl<15>(W[42])) ^ rotl<7>(W[32]) ^ W[39];
+ W[46] = P1(W[30] ^ W[37] ^ rotl<15>(W[43])) ^ rotl<7>(W[33]) ^ W[40];
+ W[47] = P1(W[31] ^ W[38] ^ rotl<15>(W[44])) ^ rotl<7>(W[34]) ^ W[41];
+ W[48] = P1(W[32] ^ W[39] ^ rotl<15>(W[45])) ^ rotl<7>(W[35]) ^ W[42];
+ W[49] = P1(W[33] ^ W[40] ^ rotl<15>(W[46])) ^ rotl<7>(W[36]) ^ W[43];
+ W[50] = P1(W[34] ^ W[41] ^ rotl<15>(W[47])) ^ rotl<7>(W[37]) ^ W[44];
+ W[51] = P1(W[35] ^ W[42] ^ rotl<15>(W[48])) ^ rotl<7>(W[38]) ^ W[45];
+ W[52] = P1(W[36] ^ W[43] ^ rotl<15>(W[49])) ^ rotl<7>(W[39]) ^ W[46];
+ W[53] = P1(W[37] ^ W[44] ^ rotl<15>(W[50])) ^ rotl<7>(W[40]) ^ W[47];
+ W[54] = P1(W[38] ^ W[45] ^ rotl<15>(W[51])) ^ rotl<7>(W[41]) ^ W[48];
+ W[55] = P1(W[39] ^ W[46] ^ rotl<15>(W[52])) ^ rotl<7>(W[42]) ^ W[49];
+ W[56] = P1(W[40] ^ W[47] ^ rotl<15>(W[53])) ^ rotl<7>(W[43]) ^ W[50];
+ W[57] = P1(W[41] ^ W[48] ^ rotl<15>(W[54])) ^ rotl<7>(W[44]) ^ W[51];
+ W[58] = P1(W[42] ^ W[49] ^ rotl<15>(W[55])) ^ rotl<7>(W[45]) ^ W[52];
+ W[59] = P1(W[43] ^ W[50] ^ rotl<15>(W[56])) ^ rotl<7>(W[46]) ^ W[53];
+ W[60] = P1(W[44] ^ W[51] ^ rotl<15>(W[57])) ^ rotl<7>(W[47]) ^ W[54];
+ W[61] = P1(W[45] ^ W[52] ^ rotl<15>(W[58])) ^ rotl<7>(W[48]) ^ W[55];
+ W[62] = P1(W[46] ^ W[53] ^ rotl<15>(W[59])) ^ rotl<7>(W[49]) ^ W[56];
+ W[63] = P1(W[47] ^ W[54] ^ rotl<15>(W[60])) ^ rotl<7>(W[50]) ^ W[57];
+ W[64] = P1(W[48] ^ W[55] ^ rotl<15>(W[61])) ^ rotl<7>(W[51]) ^ W[58];
+ W[65] = P1(W[49] ^ W[56] ^ rotl<15>(W[62])) ^ rotl<7>(W[52]) ^ W[59];
+ W[66] = P1(W[50] ^ W[57] ^ rotl<15>(W[63])) ^ rotl<7>(W[53]) ^ W[60];
+ W[67] = P1(W[51] ^ W[58] ^ rotl<15>(W[64])) ^ rotl<7>(W[54]) ^ W[61];
R1(A, B, C, D, E, F, G, H, 0x79CC4519, W[ 0], W[ 0] ^ W[ 4]);
R1(D, A, B, C, H, E, F, G, 0xF3988A32, W[ 1], W[ 1] ^ W[ 5]);
diff --git a/src/lib/mac/siphash/siphash.cpp b/src/lib/mac/siphash/siphash.cpp
index 54adcd5a5..255a35493 100644
--- a/src/lib/mac/siphash/siphash.cpp
+++ b/src/lib/mac/siphash/siphash.cpp
@@ -19,16 +19,16 @@ void SipRounds(uint64_t M, secure_vector<uint64_t>& V, size_t r)
for(size_t i = 0; i != r; ++i)
{
V0 += V1; V2 += V3;
- V1 = rotate_left(V1, 13);
- V3 = rotate_left(V3, 16);
+ V1 = rotl<13>(V1);
+ V3 = rotl<16>(V3);
V1 ^= V0; V3 ^= V2;
- V0 = rotate_left(V0, 32);
+ V0 = rotl<32>(V0);
V2 += V1; V0 += V3;
- V1 = rotate_left(V1, 17);
- V3 = rotate_left(V3, 21);
+ V1 = rotl<17>(V1);
+ V3 = rotl<21>(V3);
V1 ^= V2; V3 ^= V0;
- V2 = rotate_left(V2, 32);
+ V2 = rotl<32>(V2);
}
V0 ^= M;
diff --git a/src/lib/stream/chacha/chacha.cpp b/src/lib/stream/chacha/chacha.cpp
index d56f9e60a..0bbb47bcb 100644
--- a/src/lib/stream/chacha/chacha.cpp
+++ b/src/lib/stream/chacha/chacha.cpp
@@ -49,12 +49,12 @@ void ChaCha::chacha_x4(uint8_t output[64*4], uint32_t input[16], size_t rounds)
x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];
-#define CHACHA_QUARTER_ROUND(a, b, c, d) \
- do { \
- a += b; d ^= a; d = rotate_left(d, 16); \
- c += d; b ^= c; b = rotate_left(b, 12); \
- a += b; d ^= a; d = rotate_left(d, 8); \
- c += d; b ^= c; b = rotate_left(b, 7); \
+#define CHACHA_QUARTER_ROUND(a, b, c, d) \
+ do { \
+ a += b; d ^= a; d = rotl<16>(d); \
+ c += d; b ^= c; b = rotl<12>(b); \
+ a += b; d ^= a; d = rotl<8>(d); \
+ c += d; b ^= c; b = rotl<7>(b); \
} while(0)
for(size_t r = 0; r != rounds / 2; ++r)
diff --git a/src/lib/stream/salsa20/salsa20.cpp b/src/lib/stream/salsa20/salsa20.cpp
index 1c8846183..e27b2d2bb 100644
--- a/src/lib/stream/salsa20/salsa20.cpp
+++ b/src/lib/stream/salsa20/salsa20.cpp
@@ -14,10 +14,10 @@ namespace {
#define SALSA20_QUARTER_ROUND(x1, x2, x3, x4) \
do { \
- x2 ^= rotate_left(x1 + x4, 7); \
- x3 ^= rotate_left(x2 + x1, 9); \
- x4 ^= rotate_left(x3 + x2, 13); \
- x1 ^= rotate_left(x4 + x3, 18); \
+ x2 ^= rotl<7>(x1 + x4); \
+ x3 ^= rotl<9>(x2 + x1); \
+ x4 ^= rotl<13>(x3 + x2); \
+ x1 ^= rotl<18>(x4 + x3); \
} while(0)
/*
@@ -26,9 +26,9 @@ namespace {
void hsalsa20(uint32_t output[8], const uint32_t input[16])
{
uint32_t x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3],
- x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
- x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
- x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];
+ x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
+ x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
+ x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];
for(size_t i = 0; i != 10; ++i)
{
@@ -59,9 +59,9 @@ void hsalsa20(uint32_t output[8], const uint32_t input[16])
void salsa20(uint8_t output[64], const uint32_t input[16])
{
uint32_t x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3],
- x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
- x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
- x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];
+ x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
+ x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
+ x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];
for(size_t i = 0; i != 10; ++i)
{
diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h
index 23b3113ce..c1aa8b594 100644
--- a/src/lib/utils/bswap.h
+++ b/src/lib/utils/bswap.h
@@ -19,7 +19,7 @@ namespace Botan {
*/
inline uint16_t reverse_bytes(uint16_t val)
{
- return rotate_left(val, 8);
+ return rotl<8>(val);
}
/**
@@ -64,8 +64,8 @@ inline uint32_t reverse_bytes(uint32_t val)
#else
// Generic implementation
- return (rotate_right(val, 8) & 0xFF00FF00) |
- (rotate_left (val, 8) & 0x00FF00FF);
+ return (rotr<8>(val) & 0xFF00FF00) |
+ (rotl<8>(val) & 0x00FF00FF);
#endif
}
diff --git a/src/lib/utils/rotate.h b/src/lib/utils/rotate.h
index cb92daf96..4bb76c9ed 100644
--- a/src/lib/utils/rotate.h
+++ b/src/lib/utils/rotate.h
@@ -1,6 +1,6 @@
/*
* Word Rotation Operations
-* (C) 1999-2008 Jack Lloyd
+* (C) 1999-2008,2017 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -13,27 +13,90 @@
namespace Botan {
/**
-* Bit rotation left
+* Bit rotation left by a compile-time constant amount
* @param input the input word
-* @param rot the number of bits to rotate
+* @return input rotated left by ROT bits
+*/
+template<size_t ROT, typename T>
+inline T rotl(T input)
+ {
+ static_assert(ROT > 0 && ROT < 8*sizeof(T), "Invalid rotation constant");
+ return static_cast<T>((input << ROT) | (input >> (8*sizeof(T) - ROT)));
+ }
+
+/**
+* Bit rotation right by a compile-time constant amount
+* @param input the input word
+* @return input rotated right by ROT bits
+*/
+template<size_t ROT, typename T>
+inline T rotr(T input)
+ {
+ static_assert(ROT > 0 && ROT < 8*sizeof(T), "Invalid rotation constant");
+ return static_cast<T>((input >> ROT) | (input << (8*sizeof(T) - ROT)));
+ }
+
+/**
+* Bit rotation left, variable rotation amount
+* @param input the input word
+* @param rot the number of bits to rotate, must be between 0 and sizeof(T)*8-1
* @return input rotated left by rot bits
*/
-template<typename T> inline T rotate_left(T input, size_t rot)
+template<typename T>
+inline T rotl_var(T input, size_t rot)
{
- rot %= 8 * sizeof(T);
- return (rot == 0) ? input : static_cast<T>((input << rot) | (input >> (8*sizeof(T)-rot)));;
+ return rot ? static_cast<T>((input << rot) | (input >> (sizeof(T)*8 - rot))) : input;
}
/**
-* Bit rotation right
+* Bit rotation right, variable rotation amount
* @param input the input word
-* @param rot the number of bits to rotate
+* @param rot the number of bits to rotate, must be between 0 and sizeof(T)*8-1
* @return input rotated right by rot bits
*/
-template<typename T> inline T rotate_right(T input, size_t rot)
+template<typename T>
+inline T rotr_var(T input, size_t rot)
+ {
+ return rot ? static_cast<T>((input >> rot) | (input << (sizeof(T)*8 - rot))) : input;
+ }
+
+#if BOTAN_USE_GCC_INLINE_ASM
+
+#if defined(BOTAN_TARGET_ARCH_IS_X86_64) || defined(BOTAN_TARGET_ARCH_IS_X86_32)
+
+template<>
+inline uint32_t rotl_var(uint32_t input, size_t rot)
+ {
+ asm("roll %1,%0" : "+r" (input) : "c" (static_cast<uint8_t>(rot)));
+ return input;
+ }
+
+template<>
+inline uint32_t rotr_var(uint32_t input, size_t rot)
+ {
+ asm("rorl %1,%0" : "+r" (input) : "c" (static_cast<uint8_t>(rot)));
+ return input;
+ }
+
+#endif
+
+#endif
+
+
+template<typename T>
+BOTAN_DEPRECATED("Use rotl<N> or rotl_var")
+inline T rotate_left(T input, size_t rot)
+ {
+ // rotl_var does not reduce
+ return rotl_var(input, rot % (8 * sizeof(T)));
+ }
+
+template<typename T>
+BOTAN_DEPRECATED("Use rotr<N> or rotr_var")
+inline T rotate_right(T input, size_t rot)
{
- rot %= 8 * sizeof(T);
- return (rot == 0) ? input : static_cast<T>((input >> rot) | (input << (8*sizeof(T)-rot)));
+ // rotr_var does not reduce
+ return rotr_var(input, rot % (8 * sizeof(T)));
}
}
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h
index 2a177b388..aca7a8e9a 100644
--- a/src/lib/utils/simd/simd_32.h
+++ b/src/lib/utils/simd/simd_32.h
@@ -281,56 +281,57 @@ class SIMD_4x32 final
/*
- Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3)
+ * This is used for SHA-2/SHACAL2
+ * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3)
*/
- SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const
+ template<size_t ROT1, size_t ROT2, size_t ROT3>
+ SIMD_4x32 rho() const
{
SIMD_4x32 res;
#if defined(BOTAN_SIMD_USE_SSE2)
- res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)),
- _mm_srli_epi32(m_sse, static_cast<int>(rot1)));
+ res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT1)),
+ _mm_srli_epi32(m_sse, static_cast<int>(ROT1)));
res.m_sse = _mm_xor_si128(
res.m_sse,
- _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)),
- _mm_srli_epi32(m_sse, static_cast<int>(rot2))));
+ _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT2)),
+ _mm_srli_epi32(m_sse, static_cast<int>(ROT2))));
res.m_sse = _mm_xor_si128(
res.m_sse,
- _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)),
- _mm_srli_epi32(m_sse, static_cast<int>(rot3))));
+ _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT3)),
+ _mm_srli_epi32(m_sse, static_cast<int>(ROT3))));
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
- const unsigned int r1 = static_cast<unsigned int>(32-rot1);
- const unsigned int r2 = static_cast<unsigned int>(32-rot2);
- const unsigned int r3 = static_cast<unsigned int>(32-rot3);
+ const unsigned int r1 = static_cast<unsigned int>(32-ROT1);
+ const unsigned int r2 = static_cast<unsigned int>(32-ROT2);
+ const unsigned int r3 = static_cast<unsigned int>(32-ROT3);
res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1});
res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2}));
res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3}));
#elif defined(BOTAN_SIMD_USE_NEON)
- res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)),
- vshrq_n_u32(m_neon, static_cast<int>(rot1)));
+ res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT1)),
+ vshrq_n_u32(m_neon, static_cast<int>(ROT1)));
res.m_neon = veorq_u32(
res.m_neon,
- vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)),
- vshrq_n_u32(m_neon, static_cast<int>(rot2))));
+ vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT2)),
+ vshrq_n_u32(m_neon, static_cast<int>(ROT2))));
res.m_neon = veorq_u32(
res.m_neon,
- vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)),
- vshrq_n_u32(m_neon, static_cast<int>(rot3))));
+ vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT3)),
+ vshrq_n_u32(m_neon, static_cast<int>(ROT3))));
#else
for(size_t i = 0; i != 4; ++i)
{
- res.m_scalar[i] =
- Botan::rotate_right(m_scalar[i], rot1) ^
- Botan::rotate_right(m_scalar[i], rot2) ^
- Botan::rotate_right(m_scalar[i], rot3);
+ res.m_scalar[i] = Botan::rotr<ROT1>(m_scalar[i]) ^
+ Botan::rotr<ROT2>(m_scalar[i]) ^
+ Botan::rotr<ROT3>(m_scalar[i]);
}
#endif
@@ -338,38 +339,42 @@ class SIMD_4x32 final
}
/**
- * Rotate each element of SIMD register n bits left
+ * Left rotation by a compile time constant
*/
- void rotate_left(size_t rot)
+ template<size_t ROT>
+ SIMD_4x32 rotl() const
{
+ static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
+
#if defined(BOTAN_SIMD_USE_SSE2)
- m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(rot)),
- _mm_srli_epi32(m_sse, static_cast<int>(32-rot)));
+ return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(ROT)),
+ _mm_srli_epi32(m_sse, static_cast<int>(32-ROT))));
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
- const unsigned int r = static_cast<unsigned int>(rot);
- m_vmx = vec_rl(m_vmx, (__vector unsigned int){r, r, r, r});
+ const unsigned int r = static_cast<unsigned int>(ROT);
+ return SIMD_4x32(vec_rl(m_vmx, (__vector unsigned int){r, r, r, r}));
#elif defined(BOTAN_SIMD_USE_NEON)
- m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(rot)),
- vshrq_n_u32(m_neon, static_cast<int>(32-rot)));
+ return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(ROT)),
+ vshrq_n_u32(m_neon, static_cast<int>(32-ROT))));
#else
- m_scalar[0] = Botan::rotate_left(m_scalar[0], rot);
- m_scalar[1] = Botan::rotate_left(m_scalar[1], rot);
- m_scalar[2] = Botan::rotate_left(m_scalar[2], rot);
- m_scalar[3] = Botan::rotate_left(m_scalar[3], rot);
+ return SIMD_4x32(Botan::rotl<ROT>(m_scalar[0]),
+ Botan::rotl<ROT>(m_scalar[1]),
+ Botan::rotl<ROT>(m_scalar[2]),
+ Botan::rotl<ROT>(m_scalar[3]));
#endif
}
/**
- * Rotate each element of SIMD register n bits right
+ * Right rotation by a compile time constant
*/
- void rotate_right(size_t rot)
+ template<size_t ROT>
+ SIMD_4x32 rotr() const
{
- rotate_left(32 - rot);
+ return this->rotl<32-ROT>();
}
/**
@@ -596,13 +601,11 @@ class SIMD_4x32 final
//return SIMD_4x32(vrev64q_u32(m_neon));
// FIXME this is really slow
- SIMD_4x32 ror8(m_neon);
- ror8.rotate_right(8);
- SIMD_4x32 rol8(m_neon);
- rol8.rotate_left(8);
+ SIMD_4x32 ror8 = this->rotr<8>();
+ SIMD_4x32 rol8 = this->rotl<8>();
- SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00);
- SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF);
+ const SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00);
+ const SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF);
return (ror8 & mask1) | (rol8 & mask2);
#else
// scalar
diff --git a/src/tests/test_simd.cpp b/src/tests/test_simd.cpp
index 6da743514..1f192c6b6 100644
--- a/src/tests/test_simd.cpp
+++ b/src/tests/test_simd.cpp
@@ -42,23 +42,21 @@ class SIMD_32_Tests final : public Test
const Botan::SIMD_4x32 input(pat1, pat2, pat3, pat4);
- Botan::SIMD_4x32 rol = input;
- rol.rotate_left(3);
-
- test_eq(result, "rotate_left", rol,
- Botan::rotate_left(pat1, 3),
- Botan::rotate_left(pat2, 3),
- Botan::rotate_left(pat3, 3),
- Botan::rotate_left(pat4, 3));
-
- Botan::SIMD_4x32 ror = input;
- ror.rotate_right(9);
-
- test_eq(result, "rotate_right", ror,
- Botan::rotate_right(pat1, 9),
- Botan::rotate_right(pat2, 9),
- Botan::rotate_right(pat3, 9),
- Botan::rotate_right(pat4, 9));
+ Botan::SIMD_4x32 rol = input.rotl<3>();
+
+ test_eq(result, "rotl", rol,
+ Botan::rotl<3>(pat1),
+ Botan::rotl<3>(pat2),
+ Botan::rotl<3>(pat3),
+ Botan::rotl<3>(pat4));
+
+ Botan::SIMD_4x32 ror = input.rotr<9>();
+
+ test_eq(result, "rotr", ror,
+ Botan::rotr<9>(pat1),
+ Botan::rotr<9>(pat2),
+ Botan::rotr<9>(pat3),
+ Botan::rotr<9>(pat4));
Botan::SIMD_4x32 add = input + splat;
test_eq(result, "add +", add, pat1 + pat1, pat2 + pat1, pat3 + pat1, pat4 + pat1);