diff options
author | Jack Lloyd <[email protected]> | 2017-10-12 13:23:21 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-10-12 13:23:21 -0400 |
commit | fa8b83578dcffc394b8449207f60662d7e562728 (patch) | |
tree | bc4bb70f1fc988d592c49ee031f7529f4162c66a /src/lib | |
parent | e100b238667a8fcf471b8c86f8f8c33bbe3426e7 (diff) | |
parent | d11f369b6a72f584499a883e0ee7cbb63723dc93 (diff) |
Merge GH #1247 Improve bit rotation functions
Diffstat (limited to 'src/lib')
35 files changed, 724 insertions, 644 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp index 71a8c6a44..1893ab4a0 100644 --- a/src/lib/block/aes/aes.cpp +++ b/src/lib/block/aes/aes.cpp @@ -114,9 +114,9 @@ const std::vector<uint32_t>& AES_TE() const uint32_t x = make_uint32(xtime(s), s, s, xtime3(s)); TE[i] = x; - TE[i+256] = rotate_right(x, 8); - TE[i+512] = rotate_right(x, 16); - TE[i+768] = rotate_right(x, 24); + TE[i+256] = rotr< 8>(x); + TE[i+512] = rotr<16>(x); + TE[i+768] = rotr<24>(x); } return TE; }; @@ -135,9 +135,9 @@ const std::vector<uint32_t>& AES_TD() const uint32_t x = make_uint32(xtime14(s), xtime9(s), xtime13(s), xtime11(s)); TD[i] = x; - TD[i+256] = rotate_right(x, 8); - TD[i+512] = rotate_right(x, 16); - TD[i+768] = rotate_right(x, 24); + TD[i+256] = rotr< 8>(x); + TD[i+512] = rotr<16>(x); + TD[i+768] = rotr<24>(x); } return TD; }; @@ -188,24 +188,24 @@ void aes_encrypt_n(const uint8_t in[], uint8_t out[], */ uint32_t B0 = TE[get_byte(0, T0)] ^ - rotate_right(TE[get_byte(1, T1)], 8) ^ - rotate_right(TE[get_byte(2, T2)], 16) ^ - rotate_right(TE[get_byte(3, T3)], 24) ^ EK[4]; + rotr< 8>(TE[get_byte(1, T1)]) ^ + rotr<16>(TE[get_byte(2, T2)]) ^ + rotr<24>(TE[get_byte(3, T3)]) ^ EK[4]; uint32_t B1 = TE[get_byte(0, T1)] ^ - rotate_right(TE[get_byte(1, T2)], 8) ^ - rotate_right(TE[get_byte(2, T3)], 16) ^ - rotate_right(TE[get_byte(3, T0)], 24) ^ EK[5]; + rotr< 8>(TE[get_byte(1, T2)]) ^ + rotr<16>(TE[get_byte(2, T3)]) ^ + rotr<24>(TE[get_byte(3, T0)]) ^ EK[5]; uint32_t B2 = TE[get_byte(0, T2)] ^ - rotate_right(TE[get_byte(1, T3)], 8) ^ - rotate_right(TE[get_byte(2, T0)], 16) ^ - rotate_right(TE[get_byte(3, T1)], 24) ^ EK[6]; + rotr< 8>(TE[get_byte(1, T3)]) ^ + rotr<16>(TE[get_byte(2, T0)]) ^ + rotr<24>(TE[get_byte(3, T1)]) ^ EK[6]; uint32_t B3 = TE[get_byte(0, T3)] ^ - rotate_right(TE[get_byte(1, T0)], 8) ^ - rotate_right(TE[get_byte(2, T1)], 16) ^ - rotate_right(TE[get_byte(3, T2)], 24) ^ EK[7]; + rotr< 8>(TE[get_byte(1, T0)]) ^ + rotr<16>(TE[get_byte(2, T1)]) ^ + rotr<24>(TE[get_byte(3, T2)]) ^ EK[7]; for(size_t r = 2*4; r < EK.size(); r += 2*4) { @@ -276,24 +276,24 @@ void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, T0 ^= Z; uint32_t B0 = TD[get_byte(0, T0)] ^ - rotate_right(TD[get_byte(1, T3)], 8) ^ - rotate_right(TD[get_byte(2, T2)], 16) ^ - rotate_right(TD[get_byte(3, T1)], 24) ^ DK[4]; + rotr< 8>(TD[get_byte(1, T3)]) ^ + rotr<16>(TD[get_byte(2, T2)]) ^ + rotr<24>(TD[get_byte(3, T1)]) ^ DK[4]; uint32_t B1 = TD[get_byte(0, T1)] ^ - rotate_right(TD[get_byte(1, T0)], 8) ^ - rotate_right(TD[get_byte(2, T3)], 16) ^ - rotate_right(TD[get_byte(3, T2)], 24) ^ DK[5]; + rotr< 8>(TD[get_byte(1, T0)]) ^ + rotr<16>(TD[get_byte(2, T3)]) ^ + rotr<24>(TD[get_byte(3, T2)]) ^ DK[5]; uint32_t B2 = TD[get_byte(0, T2)] ^ - rotate_right(TD[get_byte(1, T1)], 8) ^ - rotate_right(TD[get_byte(2, T0)], 16) ^ - rotate_right(TD[get_byte(3, T3)], 24) ^ DK[6]; + rotr< 8>(TD[get_byte(1, T1)]) ^ + rotr<16>(TD[get_byte(2, T0)]) ^ + rotr<24>(TD[get_byte(3, T3)]) ^ DK[6]; uint32_t B3 = TD[get_byte(0, T3)] ^ - rotate_right(TD[get_byte(1, T2)], 8) ^ - rotate_right(TD[get_byte(2, T1)], 16) ^ - rotate_right(TD[get_byte(3, T0)], 24) ^ DK[7]; + rotr< 8>(TD[get_byte(1, T2)]) ^ + rotr<16>(TD[get_byte(2, T1)]) ^ + rotr<24>(TD[get_byte(3, T0)]) ^ DK[7]; for(size_t r = 2*4; r < DK.size(); r += 2*4) { diff --git a/src/lib/block/aria/aria.cpp b/src/lib/block/aria/aria.cpp index 5b449722a..1583dd7d3 100644 --- a/src/lib/block/aria/aria.cpp +++ b/src/lib/block/aria/aria.cpp @@ -183,7 +183,7 @@ inline void ARIA_FO(uint32_t& T0, uint32_t& T1, uint32_t& T2, uint32_t& T3) T1 ^= T2; T1 = ((T1 << 8) & 0xFF00FF00) | ((T1 >> 8) & 0x00FF00FF); - T2 = rotate_right(T2, 16); + T2 = rotr<16>(T2); T3 = reverse_bytes(T3); T1 ^= T2; @@ -205,7 +205,7 @@ inline void ARIA_FE(uint32_t& T0, uint32_t& T1, uint32_t& T2, uint32_t& T3) T1 ^= T2; T3 = ((T3 << 8) & 0xFF00FF00) | ((T3 >> 8) & 0x00FF00FF); - T0 = rotate_right(T0, 16); + T0 = rotr<16>(T0); T1 = reverse_bytes(T1); T1 ^= T2; @@ -411,9 +411,9 @@ void key_schedule(secure_vector<uint32_t>& ERK, { for(size_t j = 0; j != 4; ++j) { - DRK[i+j] = rotate_right(DRK[i+j], 8) ^ - rotate_right(DRK[i+j], 16) ^ - rotate_right(DRK[i+j], 24); + DRK[i+j] = rotr<8>(DRK[i+j]) ^ + rotr<16>(DRK[i+j]) ^ + rotr<24>(DRK[i+j]); } DRK[i+1] ^= DRK[i+2]; DRK[i+2] ^= DRK[i+3]; @@ -421,7 +421,7 @@ void key_schedule(secure_vector<uint32_t>& ERK, DRK[i+2] ^= DRK[i+0]; DRK[i+1] ^= DRK[i+2]; DRK[i+1] = ((DRK[i+1] << 8) & 0xFF00FF00) | ((DRK[i+1] >> 8) & 0x00FF00FF); - DRK[i+2] = rotate_right(DRK[i+2], 16); + DRK[i+2] = rotr<16>(DRK[i+2]); DRK[i+3] = reverse_bytes(DRK[i+3]); DRK[i+1] ^= DRK[i+2]; DRK[i+2] ^= DRK[i+3]; diff --git a/src/lib/block/camellia/camellia.cpp b/src/lib/block/camellia/camellia.cpp index ea84fa313..89db6f8b9 100644 --- a/src/lib/block/camellia/camellia.cpp +++ b/src/lib/block/camellia/camellia.cpp @@ -577,12 +577,12 @@ uint64_t F_SLOW(uint64_t v, uint64_t K) const uint64_t x = v ^ K; const uint8_t t1 = SBOX[get_byte(0, x)]; - const uint8_t t2 = rotate_left(SBOX[get_byte(1, x)], 1); - const uint8_t t3 = rotate_left(SBOX[get_byte(2, x)], 7); - const uint8_t t4 = SBOX[rotate_left(get_byte(3, x), 1)]; - const uint8_t t5 = rotate_left(SBOX[get_byte(4, x)], 1); - const uint8_t t6 = rotate_left(SBOX[get_byte(5, x)], 7); - const uint8_t t7 = SBOX[rotate_left(get_byte(6, x), 1)]; + const uint8_t t2 = rotl<1>(SBOX[get_byte(1, x)]); + const uint8_t t3 = rotl<7>(SBOX[get_byte(2, x)]); + const uint8_t t4 = SBOX[rotl<1>(get_byte(3, x))]; + const uint8_t t5 = rotl<1>(SBOX[get_byte(4, x)]); + const uint8_t t6 = rotl<7>(SBOX[get_byte(5, x)]); + const uint8_t t7 = SBOX[rotl<1>(get_byte(6, x))]; const uint8_t t8 = SBOX[get_byte(7, x)]; const uint8_t y1 = t1 ^ t3 ^ t4 ^ t6 ^ t7 ^ t8; @@ -619,7 +619,7 @@ inline uint64_t FL(uint64_t v, uint64_t K) const uint32_t k1 = static_cast<uint32_t>(K >> 32); const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF); - x2 ^= rotate_left(x1 & k1, 1); + x2 ^= rotl<1>(x1 & k1); x1 ^= (x2 | k2); return ((static_cast<uint64_t>(x1) << 32) | x2); @@ -634,7 +634,7 @@ inline uint64_t FLINV(uint64_t v, uint64_t K) const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF); x1 ^= (x2 | k2); - x2 ^= rotate_left(x1 & k1, 1); + x2 ^= rotl<1>(x1 & k1); return ((static_cast<uint64_t>(x1) << 32) | x2); } diff --git a/src/lib/block/cast/cast128.cpp b/src/lib/block/cast/cast128.cpp index d6ded8cd2..d54d0614e 100644 --- a/src/lib/block/cast/cast128.cpp +++ b/src/lib/block/cast/cast128.cpp @@ -18,7 +18,7 @@ namespace { */ inline uint32_t R1(uint32_t R, uint32_t MK, uint8_t RK) { - uint32_t T = rotate_left(MK + R, RK); + const uint32_t T = rotl_var(MK + R, RK); return (CAST_SBOX1[get_byte(0, T)] ^ CAST_SBOX2[get_byte(1, T)]) - CAST_SBOX3[get_byte(2, T)] + CAST_SBOX4[get_byte(3, T)]; } @@ -28,7 +28,7 @@ inline uint32_t R1(uint32_t R, uint32_t MK, uint8_t RK) */ inline uint32_t R2(uint32_t R, uint32_t MK, uint8_t RK) { - uint32_t T = rotate_left(MK ^ R, RK); + const uint32_t T = rotl_var(MK ^ R, RK); return (CAST_SBOX1[get_byte(0, T)] - CAST_SBOX2[get_byte(1, T)] + CAST_SBOX3[get_byte(2, T)]) ^ CAST_SBOX4[get_byte(3, T)]; } @@ -38,7 +38,7 @@ inline uint32_t R2(uint32_t R, uint32_t MK, uint8_t RK) */ inline uint32_t R3(uint32_t R, uint32_t MK, uint8_t RK) { - uint32_t T = rotate_left(MK - R, RK); + const uint32_t T = rotl_var(MK - R, RK); return ((CAST_SBOX1[get_byte(0, T)] + CAST_SBOX2[get_byte(1, T)]) ^ CAST_SBOX3[get_byte(2, T)]) - CAST_SBOX4[get_byte(3, T)]; } diff --git a/src/lib/block/cast/cast256.cpp b/src/lib/block/cast/cast256.cpp index a4a7dbd36..b4aa49166 100644 --- a/src/lib/block/cast/cast256.cpp +++ b/src/lib/block/cast/cast256.cpp @@ -16,31 +16,31 @@ namespace { /* * CAST-256 Round Type 1 */ -void round1(uint32_t& out, uint32_t in, uint32_t mask, uint32_t rot) +void round1(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK) { - uint32_t temp = rotate_left(mask + in, rot); - out ^= (CAST_SBOX1[get_byte(0, temp)] ^ CAST_SBOX2[get_byte(1, temp)]) - - CAST_SBOX3[get_byte(2, temp)] + CAST_SBOX4[get_byte(3, temp)]; + const uint32_t T = rotl_var(MK + in, RK); + out ^= (CAST_SBOX1[get_byte(0, T)] ^ CAST_SBOX2[get_byte(1, T)]) - + CAST_SBOX3[get_byte(2, T)] + CAST_SBOX4[get_byte(3, T)]; } /* * CAST-256 Round Type 2 */ -void round2(uint32_t& out, uint32_t in, uint32_t mask, uint32_t rot) +void round2(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK) { - uint32_t temp = rotate_left(mask ^ in, rot); - out ^= (CAST_SBOX1[get_byte(0, temp)] - CAST_SBOX2[get_byte(1, temp)] + - CAST_SBOX3[get_byte(2, temp)]) ^ CAST_SBOX4[get_byte(3, temp)]; + const uint32_t T = rotl_var(MK ^ in, RK); + out ^= (CAST_SBOX1[get_byte(0, T)] - CAST_SBOX2[get_byte(1, T)] + + CAST_SBOX3[get_byte(2, T)]) ^ CAST_SBOX4[get_byte(3, T)]; } /* * CAST-256 Round Type 3 */ -void round3(uint32_t& out, uint32_t in, uint32_t mask, uint32_t rot) +void round3(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK) { - uint32_t temp = rotate_left(mask - in, rot); - out ^= ((CAST_SBOX1[get_byte(0, temp)] + CAST_SBOX2[get_byte(1, temp)]) ^ - CAST_SBOX3[get_byte(2, temp)]) - CAST_SBOX4[get_byte(3, temp)]; + const uint32_t T = rotl_var(MK - in, RK); + out ^= ((CAST_SBOX1[get_byte(0, T)] + CAST_SBOX2[get_byte(1, T)]) ^ + CAST_SBOX3[get_byte(2, T)]) - CAST_SBOX4[get_byte(3, T)]; } } diff --git a/src/lib/block/des/des.cpp b/src/lib/block/des/des.cpp index 44f315047..15c2adb66 100644 --- a/src/lib/block/des/des.cpp +++ b/src/lib/block/des/des.cpp @@ -91,16 +91,16 @@ void des_encrypt(uint32_t& L, uint32_t& R, { uint32_t T0, T1; - T0 = rotate_right(R, 4) ^ round_key[2*i]; - T1 = R ^ round_key[2*i + 1]; + T0 = rotr<4>(R) ^ round_key[2*i]; + T1 = R ^ round_key[2*i + 1]; L ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^ DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^ DES_SPBOX5[get_byte(2, T0)] ^ DES_SPBOX6[get_byte(2, T1)] ^ DES_SPBOX7[get_byte(3, T0)] ^ DES_SPBOX8[get_byte(3, T1)]; - T0 = rotate_right(L, 4) ^ round_key[2*i + 2]; - T1 = L ^ round_key[2*i + 3]; + T0 = rotr<4>(L) ^ round_key[2*i + 2]; + T1 = L ^ round_key[2*i + 3]; R ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^ DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^ @@ -119,16 +119,16 @@ void des_decrypt(uint32_t& L, uint32_t& R, { uint32_t T0, T1; - T0 = rotate_right(R, 4) ^ round_key[2*i - 2]; - T1 = R ^ round_key[2*i - 1]; + T0 = rotr<4>(R) ^ round_key[2*i - 2]; + T1 = R ^ round_key[2*i - 1]; L ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^ DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^ DES_SPBOX5[get_byte(2, T0)] ^ DES_SPBOX6[get_byte(2, T1)] ^ DES_SPBOX7[get_byte(3, T0)] ^ DES_SPBOX8[get_byte(3, T1)]; - T0 = rotate_right(L, 4) ^ round_key[2*i - 4]; - T1 = L ^ round_key[2*i - 3]; + T0 = rotr<4>(L) ^ round_key[2*i - 4]; + T1 = L ^ round_key[2*i - 3]; R ^= DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^ DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^ @@ -160,7 +160,7 @@ void DES::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = rotate_left(T, 32); + T = rotl<32>(T); store_be(T, out + 8*i); } @@ -188,7 +188,7 @@ void DES::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = rotate_left(T, 32); + T = rotl<32>(T); store_be(T, out + BLOCK_SIZE*i); } @@ -232,7 +232,7 @@ void TripleDES::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) cons (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = rotate_left(T, 32); + T = rotl<32>(T); store_be(T, out); @@ -265,7 +265,7 @@ void TripleDES::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) cons (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = rotate_left(T, 32); + T = rotl<32>(T); store_be(T, out); diff --git a/src/lib/block/gost_28147/gost_28147.cpp b/src/lib/block/gost_28147/gost_28147.cpp index f73ac5910..ffe9b5d66 100644 --- a/src/lib/block/gost_28147/gost_28147.cpp +++ b/src/lib/block/gost_28147/gost_28147.cpp @@ -12,11 +12,17 @@ namespace Botan { uint8_t GOST_28147_89_Params::sbox_entry(size_t row, size_t col) const { - uint8_t x = m_sboxes[4 * col + (row / 2)]; - + const uint8_t x = m_sboxes[4 * col + (row / 2)]; return (row % 2 == 0) ? (x >> 4) : (x & 0x0F); } +uint8_t GOST_28147_89_Params::sbox_pair(size_t row, size_t col) const + { + const uint8_t x = m_sboxes[4 * (col % 16) + row]; + const uint8_t y = m_sboxes[4 * (col / 16) + row]; + return (x >> 4) | (y << 4); + } + GOST_28147_89_Params::GOST_28147_89_Params(const std::string& n) : m_name(n) { // Encoded in the packed fromat from RFC 4357 @@ -53,13 +59,14 @@ GOST_28147_89_Params::GOST_28147_89_Params(const std::string& n) : m_name(n) GOST_28147_89::GOST_28147_89(const GOST_28147_89_Params& param) : m_SBOX(1024) { // Convert the parallel 4x4 sboxes into larger word-based sboxes - for(size_t i = 0; i != 4; ++i) - for(size_t j = 0; j != 256; ++j) - { - const uint32_t T = (param.sbox_entry(2*i , j % 16)) | - (param.sbox_entry(2*i+1, j / 16) << 4); - m_SBOX[256*i+j] = rotate_left(T, (11+8*i) % 32); - } + + for(size_t i = 0; i != 256; ++i) + { + m_SBOX[i ] = rotl<11, uint32_t>(param.sbox_pair(0, i)); + m_SBOX[i+256] = rotl<19, uint32_t>(param.sbox_pair(1, i)); + m_SBOX[i+512] = rotl<27, uint32_t>(param.sbox_pair(2, i)); + m_SBOX[i+768] = rotl< 3, uint32_t>(param.sbox_pair(3, i)); + } } std::string GOST_28147_89::name() const diff --git a/src/lib/block/gost_28147/gost_28147.h b/src/lib/block/gost_28147/gost_28147.h index 09581191e..34e45779b 100644 --- a/src/lib/block/gost_28147/gost_28147.h +++ b/src/lib/block/gost_28147/gost_28147.h @@ -34,6 +34,12 @@ class BOTAN_PUBLIC_API(2,0) GOST_28147_89_Params final std::string param_name() const { return m_name; } /** + * Return a representation used for building larger tables + * For internal use + */ + uint8_t sbox_pair(size_t row, size_t col) const; + + /** * Default GOST parameters are the ones given in GOST R 34.11 for * testing purposes; these sboxes are also used by Crypto++, and, * at least according to Wikipedia, the Central Bank of Russian diff --git a/src/lib/block/kasumi/kasumi.cpp b/src/lib/block/kasumi/kasumi.cpp index ed2524e0b..a9b5d8274 100644 --- a/src/lib/block/kasumi/kasumi.cpp +++ b/src/lib/block/kasumi/kasumi.cpp @@ -121,8 +121,8 @@ void KASUMI::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { const uint16_t* K = &m_EK[8*j]; - uint16_t R = B1 ^ (rotate_left(B0, 1) & K[0]); - uint16_t L = B0 ^ (rotate_left(R, 1) | K[1]); + uint16_t R = B1 ^ (rotl<1>(B0) & K[0]); + uint16_t L = B0 ^ (rotl<1>(R) | K[1]); L = FI(L ^ K[ 2], K[ 3]) ^ R; R = FI(R ^ K[ 4], K[ 5]) ^ L; @@ -135,8 +135,8 @@ void KASUMI::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const L = FI(L ^ K[12], K[13]) ^ R; R = FI(R ^ K[14], K[15]) ^ L; - R ^= (rotate_left(L, 1) & K[8]); - L ^= (rotate_left(R, 1) | K[9]); + R ^= (rotl<1>(L) & K[8]); + L ^= (rotl<1>(R) | K[9]); B0 ^= L; B1 ^= R; @@ -171,14 +171,14 @@ void KASUMI::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const R = FI(R ^ K[12], K[13]) ^ L; L = FI(L ^ K[14], K[15]) ^ R; - L ^= (rotate_left(R, 1) & K[8]); - R ^= (rotate_left(L, 1) | K[9]); + L ^= (rotl<1>(R) & K[8]); + R ^= (rotl<1>(L) | K[9]); R = B0 ^= R; L = B1 ^= L; - L ^= (rotate_left(R, 1) & K[0]); - R ^= (rotate_left(L, 1) | K[1]); + L ^= (rotl<1>(R) & K[0]); + R ^= (rotl<1>(L) | K[1]); R = FI(R ^ K[2], K[3]) ^ L; L = FI(L ^ K[4], K[5]) ^ R; @@ -214,13 +214,13 @@ void KASUMI::key_schedule(const uint8_t key[], size_t) for(size_t i = 0; i != 8; ++i) { - m_EK[8*i ] = rotate_left(K[(i+0) % 8 ], 2); - m_EK[8*i+1] = rotate_left(K[(i+2) % 8 + 8], 1); - m_EK[8*i+2] = rotate_left(K[(i+1) % 8 ], 5); + m_EK[8*i ] = rotl<2>(K[(i+0) % 8]); + m_EK[8*i+1] = rotl<1>(K[(i+2) % 8 + 8]); + m_EK[8*i+2] = rotl<5>(K[(i+1) % 8]); m_EK[8*i+3] = K[(i+4) % 8 + 8]; - m_EK[8*i+4] = rotate_left(K[(i+5) % 8 ], 8); + m_EK[8*i+4] = rotl<8>(K[(i+5) % 8]); m_EK[8*i+5] = K[(i+3) % 8 + 8]; - m_EK[8*i+6] = rotate_left(K[(i+6) % 8 ], 13); + m_EK[8*i+6] = rotl<13>(K[(i+6) % 8]); m_EK[8*i+7] = K[(i+7) % 8 + 8]; } } diff --git a/src/lib/block/noekeon/noekeon.cpp b/src/lib/block/noekeon/noekeon.cpp index c82badd4c..a7f60a0fd 100644 --- a/src/lib/block/noekeon/noekeon.cpp +++ b/src/lib/block/noekeon/noekeon.cpp @@ -21,7 +21,7 @@ inline void theta(uint32_t& A0, uint32_t& A1, const uint32_t EK[4]) { uint32_t T = A0 ^ A2; - T ^= rotate_left(T, 8) ^ rotate_right(T, 8); + T ^= rotl<8>(T) ^ rotr<8>(T); A1 ^= T; A3 ^= T; @@ -31,7 +31,7 @@ inline void theta(uint32_t& A0, uint32_t& A1, A3 ^= EK[3]; T = A1 ^ A3; - T ^= rotate_left(T, 8) ^ rotate_right(T, 8); + T ^= rotl<8>(T) ^ rotr<8>(T); A0 ^= T; A2 ^= T; } @@ -43,12 +43,12 @@ inline void theta(uint32_t& A0, uint32_t& A1, uint32_t& A2, uint32_t& A3) { uint32_t T = A0 ^ A2; - T ^= rotate_left(T, 8) ^ rotate_right(T, 8); + T ^= rotl<8>(T) ^ rotr<8>(T); A1 ^= T; A3 ^= T; T = A1 ^ A3; - T ^= rotate_left(T, 8) ^ rotate_right(T, 8); + T ^= rotl<8>(T) ^ rotr<8>(T); A0 ^= T; A2 ^= T; } @@ -135,15 +135,15 @@ void Noekeon::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const A0 ^= RC[j]; theta(A0, A1, A2, A3, m_EK.data()); - A1 = rotate_left(A1, 1); - A2 = rotate_left(A2, 5); - A3 = rotate_left(A3, 2); + A1 = rotl<1>(A1); + A2 = rotl<5>(A2); + A3 = rotl<2>(A3); gamma(A0, A1, A2, A3); - A1 = rotate_right(A1, 1); - A2 = rotate_right(A2, 5); - A3 = rotate_right(A3, 2); + A1 = rotr<1>(A1); + A2 = rotr<5>(A2); + A3 = rotr<2>(A3); } A0 ^= RC[16]; @@ -186,15 +186,15 @@ void Noekeon::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const theta(A0, A1, A2, A3, m_DK.data()); A0 ^= RC[j]; - A1 = rotate_left(A1, 1); - A2 = rotate_left(A2, 5); - A3 = rotate_left(A3, 2); + A1 = rotl<1>(A1); + A2 = rotl<5>(A2); + A3 = rotl<2>(A3); gamma(A0, A1, A2, A3); - A1 = rotate_right(A1, 1); - A2 = rotate_right(A2, 5); - A3 = rotate_right(A3, 2); + A1 = rotr<1>(A1); + A2 = rotr<5>(A2); + A3 = rotr<2>(A3); } theta(A0, A1, A2, A3, m_DK.data()); @@ -222,15 +222,15 @@ void Noekeon::key_schedule(const uint8_t key[], size_t) A0 ^= RC[i]; theta(A0, A1, A2, A3); - A1 = rotate_left(A1, 1); - A2 = rotate_left(A2, 5); - A3 = rotate_left(A3, 2); + A1 = rotl<1>(A1); + A2 = rotl<5>(A2); + A3 = rotl<2>(A3); gamma(A0, A1, A2, A3); - A1 = rotate_right(A1, 1); - A2 = rotate_right(A2, 5); - A3 = rotate_right(A3, 2); + A1 = rotr<1>(A1); + A2 = rotr<5>(A2); + A3 = rotr<2>(A3); } A0 ^= RC[16]; diff --git a/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp b/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp index a77ba7b8c..f9a696d29 100644 --- a/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp +++ b/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp @@ -16,12 +16,7 @@ namespace Botan { #define NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3) \ do { \ SIMD_32 T = A0 ^ A2; \ - SIMD_32 T_l8 = T; \ - SIMD_32 T_r8 = T; \ - T_l8.rotate_left(8); \ - T_r8.rotate_right(8); \ - T ^= T_l8; \ - T ^= T_r8; \ + T ^= T.rotl<8>() ^ T.rotr<8>(); \ A1 ^= T; \ A3 ^= T; \ \ @@ -31,12 +26,7 @@ namespace Botan { A3 ^= K3; \ \ T = A1 ^ A3; \ - T_l8 = T; \ - T_r8 = T; \ - T_l8.rotate_left(8); \ - T_r8.rotate_right(8); \ - T ^= T_l8; \ - T ^= T_r8; \ + T ^= T.rotl<8>() ^ T.rotr<8>(); \ A0 ^= T; \ A2 ^= T; \ } while(0) @@ -83,15 +73,15 @@ void Noekeon::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); - A1.rotate_left(1); - A2.rotate_left(5); - A3.rotate_left(2); + A1 = A1.rotl<1>(); + A2 = A2.rotl<5>(); + A3 = A3.rotl<2>(); NOK_SIMD_GAMMA(A0, A1, A2, A3); - A1.rotate_right(1); - A2.rotate_right(5); - A3.rotate_right(2); + A1 = A1.rotr<1>(); + A2 = A2.rotr<5>(); + A3 = A3.rotr<2>(); } A0 ^= SIMD_32::splat(RC[16]); @@ -128,15 +118,15 @@ void Noekeon::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const A0 ^= SIMD_32::splat(RC[16-i]); - A1.rotate_left(1); - A2.rotate_left(5); - A3.rotate_left(2); + A1 = A1.rotl<1>(); + A2 = A2.rotl<5>(); + A3 = A3.rotl<2>(); NOK_SIMD_GAMMA(A0, A1, A2, A3); - A1.rotate_right(1); - A2.rotate_right(5); - A3.rotate_right(2); + A1 = A1.rotr<1>(); + A2 = A2.rotr<5>(); + A3 = A3.rotr<2>(); } NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); diff --git a/src/lib/block/serpent/serpent.cpp b/src/lib/block/serpent/serpent.cpp index 93af81231..6e1d79766 100644 --- a/src/lib/block/serpent/serpent.cpp +++ b/src/lib/block/serpent/serpent.cpp @@ -22,11 +22,11 @@ namespace { */ inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) { - B0 = rotate_left(B0, 13); B2 = rotate_left(B2, 3); - B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3); - B1 = rotate_left(B1, 1); B3 = rotate_left(B3, 7); - B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7); - B0 = rotate_left(B0, 5); B2 = rotate_left(B2, 22); + B0 = rotl<13>(B0); B2 = rotl<3>(B2); + B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3); + B1 = rotl<1>(B1); B3 = rotl<7>(B3); + B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7); + B0 = rotl<5>(B0); B2 = rotl<22>(B2); } /* @@ -34,11 +34,11 @@ inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) */ inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) { - B2 = rotate_right(B2, 22); B0 = rotate_right(B0, 5); - B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3; - B3 = rotate_right(B3, 7); B1 = rotate_right(B1, 1); - B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2; - B2 = rotate_right(B2, 3); B0 = rotate_right(B0, 13); + B2 = rotr<22>(B2); B0 = rotr<5>(B0); + B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3; + B3 = rotr<7>(B3); B1 = rotr<1>(B1); + B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2; + B2 = rotr<3>(B2); B0 = rotr<13>(B0); } } @@ -192,7 +192,7 @@ void Serpent::key_schedule(const uint8_t key[], size_t length) for(size_t i = 8; i != 140; ++i) { uint32_t wi = W[i-8] ^ W[i-5] ^ W[i-3] ^ W[i-1] ^ PHI ^ uint32_t(i-8); - W[i] = rotate_left(wi, 11); + W[i] = rotl<11>(wi); } SBoxE1(W[ 20],W[ 21],W[ 22],W[ 23]); diff --git a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp index 94b3cf9ad..b184b0d4a 100644 --- a/src/lib/block/serpent/serpent_simd/serpent_simd.cpp +++ b/src/lib/block/serpent/serpent_simd/serpent_simd.cpp @@ -24,30 +24,30 @@ namespace Botan { */ #define transform(B0, B1, B2, B3) \ do { \ - B0.rotate_left(13); \ - B2.rotate_left(3); \ + B0 = B0.rotl<13>(); \ + B2 = B2.rotl<3>(); \ B1 ^= B0 ^ B2; \ B3 ^= B2 ^ (B0 << 3); \ - B1.rotate_left(1); \ - B3.rotate_left(7); \ + B1 = B1.rotl<1>(); \ + B3 = B3.rotl<7>(); \ B0 ^= B1 ^ B3; \ B2 ^= B3 ^ (B1 << 7); \ - B0.rotate_left(5); \ - B2.rotate_left(22); \ + B0 = B0.rotl<5>(); \ + B2 = B2.rotl<22>(); \ } while(0); #define i_transform(B0, B1, B2, B3) \ do { \ - B2.rotate_right(22); \ - B0.rotate_right(5); \ + B2 = B2.rotr<22>(); \ + B0 = B0.rotr<5>(); \ B2 ^= B3 ^ (B1 << 7); \ B0 ^= B1 ^ B3; \ - B3.rotate_right(7); \ - B1.rotate_right(1); \ + B3 = B3.rotr<7>(); \ + B1 = B1.rotr<1>(); \ B3 ^= B2 ^ (B0 << 3); \ B1 ^= B0 ^ B2; \ - B2.rotate_right(3); \ - B0.rotate_right(13); \ + B2 = B2.rotr<3>(); \ + B0 = B0.rotr<13>(); \ } while(0); /* diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp index 30ad711db..12c87c426 100644 --- a/src/lib/block/shacal2/shacal2.cpp +++ b/src/lib/block/shacal2/shacal2.cpp @@ -17,8 +17,8 @@ inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, uint32_t E, uint32_t F, uint32_t G, uint32_t& H, uint32_t RK) { - const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22); - const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25); + const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); + const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); H += E_rho + ((E & F) ^ (~E & G)) + RK; D += H; @@ -29,8 +29,8 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, uint32_t E, uint32_t F, uint32_t G, uint32_t& H, uint32_t RK) { - const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22); - const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25); + const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); + const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); H -= A_rho + ((A & B) | ((A | B) & C)); D -= H; @@ -175,8 +175,8 @@ void SHACAL2::key_schedule(const uint8_t key[], size_t len) for(size_t i = 16; i != 64; ++i) { - const uint32_t sigma0_15 = rotate_right(m_RK[i-15], 7) ^ rotate_right(m_RK[i-15], 18) ^ (m_RK[i-15] >> 3); - const uint32_t sigma1_2 = rotate_right(m_RK[i-2], 17) ^ rotate_right(m_RK[i-2], 19) ^ (m_RK[i-2] >> 10); + const uint32_t sigma0_15 = rotr<7>(m_RK[i-15]) ^ rotr<18>(m_RK[i-15]) ^ (m_RK[i-15] >> 3); + const uint32_t sigma1_2 = rotr<17>(m_RK[i-2]) ^ rotr<19>(m_RK[i-2]) ^ (m_RK[i-2] >> 10); m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2; } diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp index a4324c8fb..bdcac1482 100644 --- a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp +++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp @@ -17,9 +17,9 @@ void SHACAL2_Fwd(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32& const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H, uint32_t RK) { - H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); + H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); D += H; - H += A.rho(2,13,22) + ((A & B) | ((A | B) & C)); + H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); } inline @@ -27,9 +27,9 @@ void SHACAL2_Rev(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32& const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H, uint32_t RK) { - H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C)); + H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); D -= H; - H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); + H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK); } } diff --git a/src/lib/block/sm4/sm4.cpp b/src/lib/block/sm4/sm4.cpp index 979491566..42c865faf 100644 --- a/src/lib/block/sm4/sm4.cpp +++ b/src/lib/block/sm4/sm4.cpp @@ -46,7 +46,7 @@ inline uint32_t T(uint32_t b) const uint32_t t = make_uint32(SBOX[b0], SBOX[b1], SBOX[b2], SBOX[b3]); // L linear transform - return t ^ rotate_left(t, 2) ^ rotate_left(t, 10) ^ rotate_left(t, 18) ^ rotate_left(t, 24); + return t ^ rotl<2>(t) ^ rotl<10>(t) ^ rotl<18>(t) ^ rotl<24>(t); } // Variant of T for key schedule @@ -59,7 +59,7 @@ inline uint32_t Tp(uint32_t b) const uint32_t t = make_uint32(SBOX[b0], SBOX[b1], SBOX[b2], SBOX[b3]); // L' linear transform - return t ^ rotate_left(t, 13) ^ rotate_left(t, 23); + return t ^ rotl<13>(t) ^ rotl<23>(t); } } diff --git a/src/lib/block/threefish/threefish.cpp b/src/lib/block/threefish/threefish.cpp index 99ce135d5..937a673fd 100644 --- a/src/lib/block/threefish/threefish.cpp +++ b/src/lib/block/threefish/threefish.cpp @@ -17,10 +17,10 @@ namespace Botan { X1 += X5; \ X2 += X6; \ X3 += X7; \ - X4 = rotate_left(X4, ROT1); \ - X5 = rotate_left(X5, ROT2); \ - X6 = rotate_left(X6, ROT3); \ - X7 = rotate_left(X7, ROT4); \ + X4 = rotl<ROT1>(X4); \ + X5 = rotl<ROT2>(X5); \ + X6 = rotl<ROT3>(X6); \ + X7 = rotl<ROT4>(X7); \ X4 ^= X0; \ X5 ^= X1; \ X6 ^= X2; \ @@ -177,10 +177,10 @@ void Threefish_512::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) X5 ^= X1; \ X6 ^= X2; \ X7 ^= X3; \ - X4 = rotate_right(X4, ROT1); \ - X5 = rotate_right(X5, ROT2); \ - X6 = rotate_right(X6, ROT3); \ - X7 = rotate_right(X7, ROT4); \ + X4 = rotr<ROT1>(X4); \ + X5 = rotr<ROT2>(X5); \ + X6 = rotr<ROT3>(X6); \ + X7 = rotr<ROT4>(X7); \ X0 -= X4; \ X1 -= X5; \ X2 -= X6; \ diff --git a/src/lib/block/twofish/twofish.cpp b/src/lib/block/twofish/twofish.cpp index 51ef01ea9..3a09af8da 100644 --- a/src/lib/block/twofish/twofish.cpp +++ b/src/lib/block/twofish/twofish.cpp @@ -41,8 +41,8 @@ void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const Y += X + m_RK[2*j + 9]; X += m_RK[2*j + 8]; - C = rotate_right(C ^ X, 1); - D = rotate_left(D, 1) ^ Y; + C = rotr<1>(C ^ X); + D = rotl<1>(D) ^ Y; X = m_SB[ get_byte(3, C)] ^ m_SB[256+get_byte(2, C)] ^ m_SB[512+get_byte(1, C)] ^ m_SB[768+get_byte(0, C)]; @@ -52,8 +52,8 @@ void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const Y += X + m_RK[2*j + 11]; X += m_RK[2*j + 10]; - A = rotate_right(A ^ X, 1); - B = rotate_left(B, 1) ^ Y; + A = rotr<1>(A ^ X); + B = rotl<1>(B) ^ Y; } C ^= m_RK[4]; @@ -92,8 +92,8 @@ void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const Y += X + m_RK[39 - 2*j]; X += m_RK[38 - 2*j]; - C = rotate_left(C, 1) ^ X; - D = rotate_right(D ^ Y, 1); + C = rotl<1>(C) ^ X; + D = rotr<1>(D ^ Y); X = m_SB[ get_byte(3, C)] ^ m_SB[256+get_byte(2, C)] ^ m_SB[512+get_byte(1, C)] ^ m_SB[768+get_byte(0, C)]; @@ -103,8 +103,8 @@ void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const Y += X + m_RK[37 - 2*j]; X += m_RK[36 - 2*j]; - A = rotate_left(A, 1) ^ X; - B = rotate_right(B ^ Y, 1); + A = rotl<1>(A) ^ X; + B = rotr<1>(B ^ Y); } C ^= m_RK[0]; @@ -167,11 +167,11 @@ void Twofish::key_schedule(const uint8_t key[], size_t length) MDS1[Q0[Q1[i+1]^key[13]]^key[ 5]] ^ MDS2[Q1[Q0[i+1]^key[14]]^key[ 6]] ^ MDS3[Q1[Q1[i+1]^key[15]]^key[ 7]]; - Y = rotate_left(Y, 8); + Y = rotl<8>(Y); X += Y; Y += X; m_RK[i] = X; - m_RK[i+1] = rotate_left(Y, 9); + m_RK[i+1] = rotl<9>(Y); } } else if(length == 24) @@ -194,11 +194,11 @@ void Twofish::key_schedule(const uint8_t key[], size_t length) MDS1[Q0[Q1[Q1[i+1]^key[21]]^key[13]]^key[ 5]] ^ MDS2[Q1[Q0[Q0[i+1]^key[22]]^key[14]]^key[ 6]] ^ MDS3[Q1[Q1[Q0[i+1]^key[23]]^key[15]]^key[ 7]]; - Y = rotate_left(Y, 8); + Y = rotl<8>(Y); X += Y; Y += X; m_RK[i] = X; - m_RK[i+1] = rotate_left(Y, 9); + m_RK[i+1] = rotl<9>(Y); } } else if(length == 32) @@ -221,11 +221,11 @@ void Twofish::key_schedule(const uint8_t key[], size_t length) MDS1[Q0[Q1[Q1[Q0[i+1]^key[29]]^key[21]]^key[13]]^key[ 5]] ^ MDS2[Q1[Q0[Q0[Q0[i+1]^key[30]]^key[22]]^key[14]]^key[ 6]] ^ MDS3[Q1[Q1[Q0[Q1[i+1]^key[31]]^key[23]]^key[15]]^key[ 7]]; - Y = rotate_left(Y, 8); + Y = rotl<8>(Y); X += Y; Y += X; m_RK[i] = X; - m_RK[i+1] = rotate_left(Y, 9); + m_RK[i+1] = rotl<9>(Y); } } } diff --git a/src/lib/hash/blake2/blake2b.cpp b/src/lib/hash/blake2/blake2b.cpp index 85171b16b..79a30de3d 100644 --- a/src/lib/hash/blake2/blake2b.cpp +++ b/src/lib/hash/blake2/blake2b.cpp @@ -93,13 +93,13 @@ void Blake2b::compress(bool lastblock) #define G(r, i, a, b, c, d) \ do { \ a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotate_right<uint64_t>(d ^ a, 32); \ + d = rotr<32>(d ^ a); \ c = c + d; \ - b = rotate_right<uint64_t>(b ^ c, 24); \ + b = rotr<24>(b ^ c); \ a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotate_right<uint64_t>(d ^ a, 16); \ + d = rotr<16>(d ^ a); \ c = c + d; \ - b = rotate_right<uint64_t>(b ^ c, 63); \ + b = rotr<63>(b ^ c); \ } while(0) #define ROUND(r) \ diff --git a/src/lib/hash/md4/md4.cpp b/src/lib/hash/md4/md4.cpp index 79f3a2d13..1c048a6b3 100644 --- a/src/lib/hash/md4/md4.cpp +++ b/src/lib/hash/md4/md4.cpp @@ -16,31 +16,55 @@ std::unique_ptr<HashFunction> MD4::copy_state() const namespace { -/* -* MD4 FF Function -*/ -inline void FF(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint8_t S) +inline void FF4(uint32_t& A, uint32_t& B, uint32_t& C, uint32_t& D, + uint32_t M0, uint32_t M1, uint32_t M2, uint32_t M3) + { - A += (D ^ (B & (C ^ D))) + M; - A = rotate_left(A, S); + A += (D ^ (B & (C ^ D))) + M0; + A = rotl<3>(A); + + D += (C ^ (A & (B ^ C))) + M1; + D = rotl<7>(D); + + C += (B ^ (D & (A ^ B))) + M2; + C = rotl<11>(C); + + B += (A ^ (C & (D ^ A))) + M3; + B = rotl<19>(B); } -/* -* MD4 GG Function -*/ -inline void GG(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint8_t S) +inline void GG4(uint32_t& A, uint32_t& B, uint32_t& C, uint32_t& D, + uint32_t M0, uint32_t M1, uint32_t M2, uint32_t M3) + { - A += ((B & C) | (D & (B | C))) + M + 0x5A827999; - A = rotate_left(A, S); + A += ((B & C) | (D & (B | C))) + M0 + 0x5A827999; + A = rotl<3>(A); + + D += ((A & B) | (C & (A | B))) + M1 + 0x5A827999; + D = rotl<5>(D); + + C += ((D & A) | (B & (D | A))) + M2 + 0x5A827999; + C = rotl<9>(C); + + B += ((C & D) | (A & (C | D))) + M3 + 0x5A827999; + B = rotl<13>(B); } -/* -* MD4 HH Function -*/ -inline void HH(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M, uint8_t S) +inline void HH4(uint32_t& A, uint32_t& B, uint32_t& C, uint32_t& D, + uint32_t M0, uint32_t M1, uint32_t M2, uint32_t M3) + { - A += (B ^ C ^ D) + M + 0x6ED9EBA1; - A = rotate_left(A, S); + A += (B ^ C ^ D) + M0 + 0x6ED9EBA1; + A = rotl<3>(A); + + D += (A ^ B ^ C) + M1 + 0x6ED9EBA1; + D = rotl<9>(D); + + C += (A ^ B ^ D) + M2 + 0x6ED9EBA1; + C = rotl<11>(C); + + B += (A ^ C ^ D) + M3 + 0x6ED9EBA1; + B = rotl<15>(B); } } @@ -54,34 +78,37 @@ void MD4::compress_n(const uint8_t input[], size_t blocks) for(size_t i = 0; i != blocks; ++i) { - load_le(m_M.data(), input, m_M.size()); - - FF(A,B,C,D,m_M[ 0], 3); FF(D,A,B,C,m_M[ 1], 7); - FF(C,D,A,B,m_M[ 2],11); FF(B,C,D,A,m_M[ 3],19); - FF(A,B,C,D,m_M[ 4], 3); FF(D,A,B,C,m_M[ 5], 7); - FF(C,D,A,B,m_M[ 6],11); FF(B,C,D,A,m_M[ 7],19); - FF(A,B,C,D,m_M[ 8], 3); FF(D,A,B,C,m_M[ 9], 7); - FF(C,D,A,B,m_M[10],11); FF(B,C,D,A,m_M[11],19); - FF(A,B,C,D,m_M[12], 3); FF(D,A,B,C,m_M[13], 7); - FF(C,D,A,B,m_M[14],11); FF(B,C,D,A,m_M[15],19); - - GG(A,B,C,D,m_M[ 0], 3); GG(D,A,B,C,m_M[ 4], 5); - GG(C,D,A,B,m_M[ 8], 9); GG(B,C,D,A,m_M[12],13); - GG(A,B,C,D,m_M[ 1], 3); GG(D,A,B,C,m_M[ 5], 5); - GG(C,D,A,B,m_M[ 9], 9); GG(B,C,D,A,m_M[13],13); - GG(A,B,C,D,m_M[ 2], 3); GG(D,A,B,C,m_M[ 6], 5); - GG(C,D,A,B,m_M[10], 9); GG(B,C,D,A,m_M[14],13); - GG(A,B,C,D,m_M[ 3], 3); GG(D,A,B,C,m_M[ 7], 5); - GG(C,D,A,B,m_M[11], 9); GG(B,C,D,A,m_M[15],13); - - HH(A,B,C,D,m_M[ 0], 3); HH(D,A,B,C,m_M[ 8], 9); - HH(C,D,A,B,m_M[ 4],11); HH(B,C,D,A,m_M[12],15); - HH(A,B,C,D,m_M[ 2], 3); HH(D,A,B,C,m_M[10], 9); - HH(C,D,A,B,m_M[ 6],11); HH(B,C,D,A,m_M[14],15); - HH(A,B,C,D,m_M[ 1], 3); HH(D,A,B,C,m_M[ 9], 9); - HH(C,D,A,B,m_M[ 5],11); HH(B,C,D,A,m_M[13],15); - HH(A,B,C,D,m_M[ 3], 3); HH(D,A,B,C,m_M[11], 9); - HH(C,D,A,B,m_M[ 7],11); HH(B,C,D,A,m_M[15],15); + uint32_t M00 = load_le<uint32_t>(input, 0); + uint32_t M01 = load_le<uint32_t>(input, 1); + uint32_t M02 = load_le<uint32_t>(input, 2); + uint32_t M03 = load_le<uint32_t>(input, 3); + uint32_t M04 = load_le<uint32_t>(input, 4); + uint32_t M05 = load_le<uint32_t>(input, 5); + uint32_t M06 = load_le<uint32_t>(input, 6); + uint32_t M07 = load_le<uint32_t>(input, 7); + uint32_t M08 = load_le<uint32_t>(input, 8); + uint32_t M09 = load_le<uint32_t>(input, 9); + uint32_t M10 = load_le<uint32_t>(input, 10); + uint32_t M11 = load_le<uint32_t>(input, 11); + uint32_t M12 = load_le<uint32_t>(input, 12); + uint32_t M13 = load_le<uint32_t>(input, 13); + uint32_t M14 = load_le<uint32_t>(input, 14); + uint32_t M15 = load_le<uint32_t>(input, 15); + + FF4(A, B, C, D, M00, M01, M02, M03); + FF4(A, B, C, D, M04, M05, M06, M07); + FF4(A, B, C, D, M08, M09, M10, M11); + FF4(A, B, C, D, M12, M13, M14, M15); + + GG4(A, B, C, D, M00, M04, M08, M12); + GG4(A, B, C, D, M01, M05, M09, M13); + GG4(A, B, C, D, M02, M06, M10, M14); + GG4(A, B, C, D, M03, M07, M11, M15); + + HH4(A, B, C, D, M00, M08, M04, M12); + HH4(A, B, C, D, M02, M10, M06, M14); + HH4(A, B, C, D, M01, M09, M05, M13); + HH4(A, B, C, D, M03, M11, M07, M15); A = (m_digest[0] += A); B = (m_digest[1] += B); @@ -106,7 +133,6 @@ void MD4::copy_out(uint8_t output[]) void MD4::clear() { MDx_HashFunction::clear(); - zeroise(m_M); m_digest[0] = 0x67452301; m_digest[1] = 0xEFCDAB89; m_digest[2] = 0x98BADCFE; diff --git a/src/lib/hash/md4/md4.h b/src/lib/hash/md4/md4.h index c51cb1682..1dd857bce 100644 --- a/src/lib/hash/md4/md4.h +++ b/src/lib/hash/md4/md4.h @@ -25,7 +25,7 @@ class BOTAN_PUBLIC_API(2,0) MD4 final : public MDx_HashFunction void clear() override; - MD4() : MDx_HashFunction(64, false, true), m_M(16), m_digest(4) + MD4() : MDx_HashFunction(64, false, true), m_digest(4) { clear(); } private: @@ -33,11 +33,6 @@ class BOTAN_PUBLIC_API(2,0) MD4 final : public MDx_HashFunction void copy_out(uint8_t[]) override; /** - * The message buffer - */ - secure_vector<uint32_t> m_M; - - /** * The digest value */ secure_vector<uint32_t> m_digest; diff --git a/src/lib/hash/md5/md5.cpp b/src/lib/hash/md5/md5.cpp index 174443a67..de75ab2d1 100644 --- a/src/lib/hash/md5/md5.cpp +++ b/src/lib/hash/md5/md5.cpp @@ -19,41 +19,41 @@ namespace { /* * MD5 FF Function */ -inline void FF(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg, - uint8_t S, uint32_t magic) +template<size_t S> +inline void FF(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M) { - A += (D ^ (B & (C ^ D))) + msg + magic; - A = rotate_left(A, S) + B; + A += (D ^ (B & (C ^ D))) + M; + A = rotl<S>(A) + B; } /* * MD5 GG Function */ -inline void GG(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg, - uint8_t S, uint32_t magic) +template<size_t S> +inline void GG(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M) { - A += (C ^ (D & (B ^ C))) + msg + magic; - A = rotate_left(A, S) + B; + A += (C ^ (D & (B ^ C))) + M; + A = rotl<S>(A) + B; } /* * MD5 HH Function */ -inline void HH(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg, - uint8_t S, uint32_t magic) +template<size_t S> +inline void HH(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M) { - A += (B ^ C ^ D) + msg + magic; - A = rotate_left(A, S) + B; + A += (B ^ C ^ D) + M; + A = rotl<S>(A) + B; } /* * MD5 II Function */ -inline void II(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t msg, - uint8_t S, uint32_t magic) +template<size_t S> +inline void II(uint32_t& A, uint32_t B, uint32_t C, uint32_t D, uint32_t M) { - A += (C ^ (B | ~D)) + msg + magic; - A = rotate_left(A, S) + B; + A += (C ^ (B | ~D)) + M; + A = rotl<S>(A) + B; } } @@ -69,41 +69,41 @@ void MD5::compress_n(const uint8_t input[], size_t blocks) { load_le(m_M.data(), input, m_M.size()); - FF(A,B,C,D,m_M[ 0], 7,0xD76AA478); FF(D,A,B,C,m_M[ 1],12,0xE8C7B756); - FF(C,D,A,B,m_M[ 2],17,0x242070DB); FF(B,C,D,A,m_M[ 3],22,0xC1BDCEEE); - FF(A,B,C,D,m_M[ 4], 7,0xF57C0FAF); FF(D,A,B,C,m_M[ 5],12,0x4787C62A); - FF(C,D,A,B,m_M[ 6],17,0xA8304613); FF(B,C,D,A,m_M[ 7],22,0xFD469501); - FF(A,B,C,D,m_M[ 8], 7,0x698098D8); FF(D,A,B,C,m_M[ 9],12,0x8B44F7AF); - FF(C,D,A,B,m_M[10],17,0xFFFF5BB1); FF(B,C,D,A,m_M[11],22,0x895CD7BE); - FF(A,B,C,D,m_M[12], 7,0x6B901122); FF(D,A,B,C,m_M[13],12,0xFD987193); - FF(C,D,A,B,m_M[14],17,0xA679438E); FF(B,C,D,A,m_M[15],22,0x49B40821); - - GG(A,B,C,D,m_M[ 1], 5,0xF61E2562); GG(D,A,B,C,m_M[ 6], 9,0xC040B340); - GG(C,D,A,B,m_M[11],14,0x265E5A51); GG(B,C,D,A,m_M[ 0],20,0xE9B6C7AA); - GG(A,B,C,D,m_M[ 5], 5,0xD62F105D); GG(D,A,B,C,m_M[10], 9,0x02441453); - GG(C,D,A,B,m_M[15],14,0xD8A1E681); GG(B,C,D,A,m_M[ 4],20,0xE7D3FBC8); - GG(A,B,C,D,m_M[ 9], 5,0x21E1CDE6); GG(D,A,B,C,m_M[14], 9,0xC33707D6); - GG(C,D,A,B,m_M[ 3],14,0xF4D50D87); GG(B,C,D,A,m_M[ 8],20,0x455A14ED); - GG(A,B,C,D,m_M[13], 5,0xA9E3E905); GG(D,A,B,C,m_M[ 2], 9,0xFCEFA3F8); - GG(C,D,A,B,m_M[ 7],14,0x676F02D9); GG(B,C,D,A,m_M[12],20,0x8D2A4C8A); - - HH(A,B,C,D,m_M[ 5], 4,0xFFFA3942); HH(D,A,B,C,m_M[ 8],11,0x8771F681); - HH(C,D,A,B,m_M[11],16,0x6D9D6122); HH(B,C,D,A,m_M[14],23,0xFDE5380C); - HH(A,B,C,D,m_M[ 1], 4,0xA4BEEA44); HH(D,A,B,C,m_M[ 4],11,0x4BDECFA9); - HH(C,D,A,B,m_M[ 7],16,0xF6BB4B60); HH(B,C,D,A,m_M[10],23,0xBEBFBC70); - HH(A,B,C,D,m_M[13], 4,0x289B7EC6); HH(D,A,B,C,m_M[ 0],11,0xEAA127FA); - HH(C,D,A,B,m_M[ 3],16,0xD4EF3085); HH(B,C,D,A,m_M[ 6],23,0x04881D05); - HH(A,B,C,D,m_M[ 9], 4,0xD9D4D039); HH(D,A,B,C,m_M[12],11,0xE6DB99E5); - HH(C,D,A,B,m_M[15],16,0x1FA27CF8); HH(B,C,D,A,m_M[ 2],23,0xC4AC5665); - - II(A,B,C,D,m_M[ 0], 6,0xF4292244); II(D,A,B,C,m_M[ 7],10,0x432AFF97); - II(C,D,A,B,m_M[14],15,0xAB9423A7); II(B,C,D,A,m_M[ 5],21,0xFC93A039); - II(A,B,C,D,m_M[12], 6,0x655B59C3); II(D,A,B,C,m_M[ 3],10,0x8F0CCC92); - II(C,D,A,B,m_M[10],15,0xFFEFF47D); II(B,C,D,A,m_M[ 1],21,0x85845DD1); - II(A,B,C,D,m_M[ 8], 6,0x6FA87E4F); II(D,A,B,C,m_M[15],10,0xFE2CE6E0); - II(C,D,A,B,m_M[ 6],15,0xA3014314); II(B,C,D,A,m_M[13],21,0x4E0811A1); - II(A,B,C,D,m_M[ 4], 6,0xF7537E82); II(D,A,B,C,m_M[11],10,0xBD3AF235); - II(C,D,A,B,m_M[ 2],15,0x2AD7D2BB); II(B,C,D,A,m_M[ 9],21,0xEB86D391); + FF< 7>(A,B,C,D,m_M[ 0]+0xD76AA478); FF<12>(D,A,B,C,m_M[ 1]+0xE8C7B756); + FF<17>(C,D,A,B,m_M[ 2]+0x242070DB); FF<22>(B,C,D,A,m_M[ 3]+0xC1BDCEEE); + FF< 7>(A,B,C,D,m_M[ 4]+0xF57C0FAF); FF<12>(D,A,B,C,m_M[ 5]+0x4787C62A); + FF<17>(C,D,A,B,m_M[ 6]+0xA8304613); FF<22>(B,C,D,A,m_M[ 7]+0xFD469501); + FF< 7>(A,B,C,D,m_M[ 8]+0x698098D8); FF<12>(D,A,B,C,m_M[ 9]+0x8B44F7AF); + FF<17>(C,D,A,B,m_M[10]+0xFFFF5BB1); FF<22>(B,C,D,A,m_M[11]+0x895CD7BE); + FF< 7>(A,B,C,D,m_M[12]+0x6B901122); FF<12>(D,A,B,C,m_M[13]+0xFD987193); + FF<17>(C,D,A,B,m_M[14]+0xA679438E); FF<22>(B,C,D,A,m_M[15]+0x49B40821); + + GG< 5>(A,B,C,D,m_M[ 1]+0xF61E2562); GG< 9>(D,A,B,C,m_M[ 6]+0xC040B340); + GG<14>(C,D,A,B,m_M[11]+0x265E5A51); GG<20>(B,C,D,A,m_M[ 0]+0xE9B6C7AA); + GG< 5>(A,B,C,D,m_M[ 5]+0xD62F105D); GG< 9>(D,A,B,C,m_M[10]+0x02441453); + GG<14>(C,D,A,B,m_M[15]+0xD8A1E681); GG<20>(B,C,D,A,m_M[ 4]+0xE7D3FBC8); + GG< 5>(A,B,C,D,m_M[ 9]+0x21E1CDE6); GG< 9>(D,A,B,C,m_M[14]+0xC33707D6); + GG<14>(C,D,A,B,m_M[ 3]+0xF4D50D87); GG<20>(B,C,D,A,m_M[ 8]+0x455A14ED); + GG< 5>(A,B,C,D,m_M[13]+0xA9E3E905); GG< 9>(D,A,B,C,m_M[ 2]+0xFCEFA3F8); + GG<14>(C,D,A,B,m_M[ 7]+0x676F02D9); GG<20>(B,C,D,A,m_M[12]+0x8D2A4C8A); + + HH< 4>(A,B,C,D,m_M[ 5]+0xFFFA3942); HH<11>(D,A,B,C,m_M[ 8]+0x8771F681); + HH<16>(C,D,A,B,m_M[11]+0x6D9D6122); HH<23>(B,C,D,A,m_M[14]+0xFDE5380C); + HH< 4>(A,B,C,D,m_M[ 1]+0xA4BEEA44); HH<11>(D,A,B,C,m_M[ 4]+0x4BDECFA9); + HH<16>(C,D,A,B,m_M[ 7]+0xF6BB4B60); HH<23>(B,C,D,A,m_M[10]+0xBEBFBC70); + HH< 4>(A,B,C,D,m_M[13]+0x289B7EC6); HH<11>(D,A,B,C,m_M[ 0]+0xEAA127FA); + HH<16>(C,D,A,B,m_M[ 3]+0xD4EF3085); HH<23>(B,C,D,A,m_M[ 6]+0x04881D05); + HH< 4>(A,B,C,D,m_M[ 9]+0xD9D4D039); HH<11>(D,A,B,C,m_M[12]+0xE6DB99E5); + HH<16>(C,D,A,B,m_M[15]+0x1FA27CF8); HH<23>(B,C,D,A,m_M[ 2]+0xC4AC5665); + + II< 6>(A,B,C,D,m_M[ 0]+0xF4292244); II<10>(D,A,B,C,m_M[ 7]+0x432AFF97); + II<15>(C,D,A,B,m_M[14]+0xAB9423A7); II<21>(B,C,D,A,m_M[ 5]+0xFC93A039); + II< 6>(A,B,C,D,m_M[12]+0x655B59C3); II<10>(D,A,B,C,m_M[ 3]+0x8F0CCC92); + II<15>(C,D,A,B,m_M[10]+0xFFEFF47D); II<21>(B,C,D,A,m_M[ 1]+0x85845DD1); + II< 6>(A,B,C,D,m_M[ 8]+0x6FA87E4F); II<10>(D,A,B,C,m_M[15]+0xFE2CE6E0); + II<15>(C,D,A,B,m_M[ 6]+0xA3014314); II<21>(B,C,D,A,m_M[13]+0x4E0811A1); + II< 6>(A,B,C,D,m_M[ 4]+0xF7537E82); II<10>(D,A,B,C,m_M[11]+0xBD3AF235); + II<15>(C,D,A,B,m_M[ 2]+0x2AD7D2BB); II<21>(B,C,D,A,m_M[ 9]+0xEB86D391); A = (m_digest[0] += A); B = (m_digest[1] += B); diff --git a/src/lib/hash/rmd160/rmd160.cpp b/src/lib/hash/rmd160/rmd160.cpp index 95f96c281..4c84ff422 100644 --- a/src/lib/hash/rmd160/rmd160.cpp +++ b/src/lib/hash/rmd160/rmd160.cpp @@ -19,56 +19,61 @@ namespace { /* * RIPEMD-160 F1 Function */ +template<size_t S> inline void F1(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E, - uint32_t msg, uint32_t shift) + uint32_t M) { - A += (B ^ C ^ D) + msg; - A = rotate_left(A, shift) + E; - C = rotate_left(C, 10); + A += (B ^ C ^ D) + M; + A = rotl<S>(A) + E; + C = rotl<10>(C); } /* * RIPEMD-160 F2 Function */ +template<size_t S> inline void F2(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E, - uint32_t msg, uint32_t shift, uint32_t magic) + uint32_t M) { - A += (D ^ (B & (C ^ D))) + msg + magic; - A = rotate_left(A, shift) + E; - C = rotate_left(C, 10); + A += (D ^ (B & (C ^ D))) + M; + A = rotl<S>(A) + E; + C = rotl<10>(C); } /* * RIPEMD-160 F3 Function */ +template<size_t S> inline void F3(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E, - uint32_t msg, uint32_t shift, uint32_t magic) + uint32_t M) { - A += (D ^ (B | ~C)) + msg + magic; - A = rotate_left(A, shift) + E; - C = rotate_left(C, 10); + A += (D ^ (B | ~C)) + M; + A = rotl<S>(A) + E; + C = rotl<10>(C); } /* * RIPEMD-160 F4 Function */ +template<size_t S> inline void F4(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E, - uint32_t msg, uint32_t shift, uint32_t magic) + uint32_t M) { - A += (C ^ (D & (B ^ C))) + msg + magic; - A = rotate_left(A, shift) + E; - C = rotate_left(C, 10); + A += (C ^ (D & (B ^ C))) + M; + A = rotl<S>(A) + E; + C = rotl<10>(C); } /* * RIPEMD-160 F5 Function */ +template<size_t S> inline void F5(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E, - uint32_t msg, uint32_t shift, uint32_t magic) + uint32_t M) { - A += (B ^ (C | ~D)) + msg + magic; - A = rotate_left(A, shift) + E; - C = rotate_left(C, 10); + A += (B ^ (C | ~D)) + M; + A = rotl<S>(A) + E; + C = rotl<10>(C); } } @@ -79,102 +84,104 @@ inline void F5(uint32_t& A, uint32_t B, uint32_t& C, uint32_t D, uint32_t E, void RIPEMD_160::compress_n(const uint8_t input[], size_t blocks) { const uint32_t MAGIC2 = 0x5A827999, MAGIC3 = 0x6ED9EBA1, - MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0xA953FD4E, - MAGIC6 = 0x50A28BE6, MAGIC7 = 0x5C4DD124, - MAGIC8 = 0x6D703EF3, MAGIC9 = 0x7A6D76E9; + MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0xA953FD4E, + MAGIC6 = 0x50A28BE6, MAGIC7 = 0x5C4DD124, + MAGIC8 = 0x6D703EF3, MAGIC9 = 0x7A6D76E9; for(size_t i = 0; i != blocks; ++i) { load_le(m_M.data(), input, m_M.size()); - uint32_t A1 = m_digest[0], A2 = A1, B1 = m_digest[1], B2 = B1, - C1 = m_digest[2], C2 = C1, D1 = m_digest[3], D2 = D1, - E1 = m_digest[4], E2 = E1; - - F1(A1,B1,C1,D1,E1,m_M[ 0],11 ); F5(A2,B2,C2,D2,E2,m_M[ 5], 8,MAGIC6); - F1(E1,A1,B1,C1,D1,m_M[ 1],14 ); F5(E2,A2,B2,C2,D2,m_M[14], 9,MAGIC6); - F1(D1,E1,A1,B1,C1,m_M[ 2],15 ); F5(D2,E2,A2,B2,C2,m_M[ 7], 9,MAGIC6); - F1(C1,D1,E1,A1,B1,m_M[ 3],12 ); F5(C2,D2,E2,A2,B2,m_M[ 0],11,MAGIC6); - F1(B1,C1,D1,E1,A1,m_M[ 4], 5 ); F5(B2,C2,D2,E2,A2,m_M[ 9],13,MAGIC6); - F1(A1,B1,C1,D1,E1,m_M[ 5], 8 ); F5(A2,B2,C2,D2,E2,m_M[ 2],15,MAGIC6); - F1(E1,A1,B1,C1,D1,m_M[ 6], 7 ); F5(E2,A2,B2,C2,D2,m_M[11],15,MAGIC6); - F1(D1,E1,A1,B1,C1,m_M[ 7], 9 ); F5(D2,E2,A2,B2,C2,m_M[ 4], 5,MAGIC6); - F1(C1,D1,E1,A1,B1,m_M[ 8],11 ); F5(C2,D2,E2,A2,B2,m_M[13], 7,MAGIC6); - F1(B1,C1,D1,E1,A1,m_M[ 9],13 ); F5(B2,C2,D2,E2,A2,m_M[ 6], 7,MAGIC6); - F1(A1,B1,C1,D1,E1,m_M[10],14 ); F5(A2,B2,C2,D2,E2,m_M[15], 8,MAGIC6); - F1(E1,A1,B1,C1,D1,m_M[11],15 ); F5(E2,A2,B2,C2,D2,m_M[ 8],11,MAGIC6); - F1(D1,E1,A1,B1,C1,m_M[12], 6 ); F5(D2,E2,A2,B2,C2,m_M[ 1],14,MAGIC6); - F1(C1,D1,E1,A1,B1,m_M[13], 7 ); F5(C2,D2,E2,A2,B2,m_M[10],14,MAGIC6); - F1(B1,C1,D1,E1,A1,m_M[14], 9 ); F5(B2,C2,D2,E2,A2,m_M[ 3],12,MAGIC6); - F1(A1,B1,C1,D1,E1,m_M[15], 8 ); F5(A2,B2,C2,D2,E2,m_M[12], 6,MAGIC6); - - F2(E1,A1,B1,C1,D1,m_M[ 7], 7,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[ 6], 9,MAGIC7); - F2(D1,E1,A1,B1,C1,m_M[ 4], 6,MAGIC2); F4(D2,E2,A2,B2,C2,m_M[11],13,MAGIC7); - F2(C1,D1,E1,A1,B1,m_M[13], 8,MAGIC2); F4(C2,D2,E2,A2,B2,m_M[ 3],15,MAGIC7); - F2(B1,C1,D1,E1,A1,m_M[ 1],13,MAGIC2); F4(B2,C2,D2,E2,A2,m_M[ 7], 7,MAGIC7); - F2(A1,B1,C1,D1,E1,m_M[10],11,MAGIC2); F4(A2,B2,C2,D2,E2,m_M[ 0],12,MAGIC7); - F2(E1,A1,B1,C1,D1,m_M[ 6], 9,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[13], 8,MAGIC7); - F2(D1,E1,A1,B1,C1,m_M[15], 7,MAGIC2); F4(D2,E2,A2,B2,C2,m_M[ 5], 9,MAGIC7); - F2(C1,D1,E1,A1,B1,m_M[ 3],15,MAGIC2); F4(C2,D2,E2,A2,B2,m_M[10],11,MAGIC7); - F2(B1,C1,D1,E1,A1,m_M[12], 7,MAGIC2); F4(B2,C2,D2,E2,A2,m_M[14], 7,MAGIC7); - F2(A1,B1,C1,D1,E1,m_M[ 0],12,MAGIC2); F4(A2,B2,C2,D2,E2,m_M[15], 7,MAGIC7); - F2(E1,A1,B1,C1,D1,m_M[ 9],15,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[ 8],12,MAGIC7); - F2(D1,E1,A1,B1,C1,m_M[ 5], 9,MAGIC2); F4(D2,E2,A2,B2,C2,m_M[12], 7,MAGIC7); - F2(C1,D1,E1,A1,B1,m_M[ 2],11,MAGIC2); F4(C2,D2,E2,A2,B2,m_M[ 4], 6,MAGIC7); - F2(B1,C1,D1,E1,A1,m_M[14], 7,MAGIC2); F4(B2,C2,D2,E2,A2,m_M[ 9],15,MAGIC7); - F2(A1,B1,C1,D1,E1,m_M[11],13,MAGIC2); F4(A2,B2,C2,D2,E2,m_M[ 1],13,MAGIC7); - F2(E1,A1,B1,C1,D1,m_M[ 8],12,MAGIC2); F4(E2,A2,B2,C2,D2,m_M[ 2],11,MAGIC7); - - F3(D1,E1,A1,B1,C1,m_M[ 3],11,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[15], 9,MAGIC8); - F3(C1,D1,E1,A1,B1,m_M[10],13,MAGIC3); F3(C2,D2,E2,A2,B2,m_M[ 5], 7,MAGIC8); - F3(B1,C1,D1,E1,A1,m_M[14], 6,MAGIC3); F3(B2,C2,D2,E2,A2,m_M[ 1],15,MAGIC8); - F3(A1,B1,C1,D1,E1,m_M[ 4], 7,MAGIC3); F3(A2,B2,C2,D2,E2,m_M[ 3],11,MAGIC8); - F3(E1,A1,B1,C1,D1,m_M[ 9],14,MAGIC3); F3(E2,A2,B2,C2,D2,m_M[ 7], 8,MAGIC8); - F3(D1,E1,A1,B1,C1,m_M[15], 9,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[14], 6,MAGIC8); - F3(C1,D1,E1,A1,B1,m_M[ 8],13,MAGIC3); F3(C2,D2,E2,A2,B2,m_M[ 6], 6,MAGIC8); - F3(B1,C1,D1,E1,A1,m_M[ 1],15,MAGIC3); F3(B2,C2,D2,E2,A2,m_M[ 9],14,MAGIC8); - F3(A1,B1,C1,D1,E1,m_M[ 2],14,MAGIC3); F3(A2,B2,C2,D2,E2,m_M[11],12,MAGIC8); - F3(E1,A1,B1,C1,D1,m_M[ 7], 8,MAGIC3); F3(E2,A2,B2,C2,D2,m_M[ 8],13,MAGIC8); - F3(D1,E1,A1,B1,C1,m_M[ 0],13,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[12], 5,MAGIC8); - F3(C1,D1,E1,A1,B1,m_M[ 6], 6,MAGIC3); F3(C2,D2,E2,A2,B2,m_M[ 2],14,MAGIC8); - F3(B1,C1,D1,E1,A1,m_M[13], 5,MAGIC3); F3(B2,C2,D2,E2,A2,m_M[10],13,MAGIC8); - F3(A1,B1,C1,D1,E1,m_M[11],12,MAGIC3); F3(A2,B2,C2,D2,E2,m_M[ 0],13,MAGIC8); - F3(E1,A1,B1,C1,D1,m_M[ 5], 7,MAGIC3); F3(E2,A2,B2,C2,D2,m_M[ 4], 7,MAGIC8); - F3(D1,E1,A1,B1,C1,m_M[12], 5,MAGIC3); F3(D2,E2,A2,B2,C2,m_M[13], 5,MAGIC8); - - F4(C1,D1,E1,A1,B1,m_M[ 1],11,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[ 8],15,MAGIC9); - F4(B1,C1,D1,E1,A1,m_M[ 9],12,MAGIC4); F2(B2,C2,D2,E2,A2,m_M[ 6], 5,MAGIC9); - F4(A1,B1,C1,D1,E1,m_M[11],14,MAGIC4); F2(A2,B2,C2,D2,E2,m_M[ 4], 8,MAGIC9); - F4(E1,A1,B1,C1,D1,m_M[10],15,MAGIC4); F2(E2,A2,B2,C2,D2,m_M[ 1],11,MAGIC9); - F4(D1,E1,A1,B1,C1,m_M[ 0],14,MAGIC4); F2(D2,E2,A2,B2,C2,m_M[ 3],14,MAGIC9); - F4(C1,D1,E1,A1,B1,m_M[ 8],15,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[11],14,MAGIC9); - F4(B1,C1,D1,E1,A1,m_M[12], 9,MAGIC4); F2(B2,C2,D2,E2,A2,m_M[15], 6,MAGIC9); - F4(A1,B1,C1,D1,E1,m_M[ 4], 8,MAGIC4); F2(A2,B2,C2,D2,E2,m_M[ 0],14,MAGIC9); - F4(E1,A1,B1,C1,D1,m_M[13], 9,MAGIC4); F2(E2,A2,B2,C2,D2,m_M[ 5], 6,MAGIC9); - F4(D1,E1,A1,B1,C1,m_M[ 3],14,MAGIC4); F2(D2,E2,A2,B2,C2,m_M[12], 9,MAGIC9); - F4(C1,D1,E1,A1,B1,m_M[ 7], 5,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[ 2],12,MAGIC9); - F4(B1,C1,D1,E1,A1,m_M[15], 6,MAGIC4); F2(B2,C2,D2,E2,A2,m_M[13], 9,MAGIC9); - F4(A1,B1,C1,D1,E1,m_M[14], 8,MAGIC4); F2(A2,B2,C2,D2,E2,m_M[ 9],12,MAGIC9); - F4(E1,A1,B1,C1,D1,m_M[ 5], 6,MAGIC4); F2(E2,A2,B2,C2,D2,m_M[ 7], 5,MAGIC9); - F4(D1,E1,A1,B1,C1,m_M[ 6], 5,MAGIC4); F2(D2,E2,A2,B2,C2,m_M[10],15,MAGIC9); - F4(C1,D1,E1,A1,B1,m_M[ 2],12,MAGIC4); F2(C2,D2,E2,A2,B2,m_M[14], 8,MAGIC9); - - F5(B1,C1,D1,E1,A1,m_M[ 4], 9,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[12], 8 ); - F5(A1,B1,C1,D1,E1,m_M[ 0],15,MAGIC5); F1(A2,B2,C2,D2,E2,m_M[15], 5 ); - F5(E1,A1,B1,C1,D1,m_M[ 5], 5,MAGIC5); F1(E2,A2,B2,C2,D2,m_M[10],12 ); - F5(D1,E1,A1,B1,C1,m_M[ 9],11,MAGIC5); F1(D2,E2,A2,B2,C2,m_M[ 4], 9 ); - F5(C1,D1,E1,A1,B1,m_M[ 7], 6,MAGIC5); F1(C2,D2,E2,A2,B2,m_M[ 1],12 ); - F5(B1,C1,D1,E1,A1,m_M[12], 8,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[ 5], 5 ); - F5(A1,B1,C1,D1,E1,m_M[ 2],13,MAGIC5); F1(A2,B2,C2,D2,E2,m_M[ 8],14 ); - F5(E1,A1,B1,C1,D1,m_M[10],12,MAGIC5); F1(E2,A2,B2,C2,D2,m_M[ 7], 6 ); - F5(D1,E1,A1,B1,C1,m_M[14], 5,MAGIC5); F1(D2,E2,A2,B2,C2,m_M[ 6], 8 ); - F5(C1,D1,E1,A1,B1,m_M[ 1],12,MAGIC5); F1(C2,D2,E2,A2,B2,m_M[ 2],13 ); - F5(B1,C1,D1,E1,A1,m_M[ 3],13,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[13], 6 ); - F5(A1,B1,C1,D1,E1,m_M[ 8],14,MAGIC5); F1(A2,B2,C2,D2,E2,m_M[14], 5 ); - F5(E1,A1,B1,C1,D1,m_M[11],11,MAGIC5); F1(E2,A2,B2,C2,D2,m_M[ 0],15 ); - F5(D1,E1,A1,B1,C1,m_M[ 6], 8,MAGIC5); F1(D2,E2,A2,B2,C2,m_M[ 3],13 ); - F5(C1,D1,E1,A1,B1,m_M[15], 5,MAGIC5); F1(C2,D2,E2,A2,B2,m_M[ 9],11 ); - F5(B1,C1,D1,E1,A1,m_M[13], 6,MAGIC5); F1(B2,C2,D2,E2,A2,m_M[11],11 ); + uint32_t A1 = m_digest[0], A2 = A1, + B1 = m_digest[1], B2 = B1, + C1 = m_digest[2], C2 = C1, + D1 = m_digest[3], D2 = D1, + E1 = m_digest[4], E2 = E1; + + F1<11>(A1,B1,C1,D1,E1,m_M[ 0] ); F5< 8>(A2,B2,C2,D2,E2,m_M[ 5]+MAGIC6); + F1<14>(E1,A1,B1,C1,D1,m_M[ 1] ); F5< 9>(E2,A2,B2,C2,D2,m_M[14]+MAGIC6); + F1<15>(D1,E1,A1,B1,C1,m_M[ 2] ); F5< 9>(D2,E2,A2,B2,C2,m_M[ 7]+MAGIC6); + F1<12>(C1,D1,E1,A1,B1,m_M[ 3] ); F5<11>(C2,D2,E2,A2,B2,m_M[ 0]+MAGIC6); + F1< 5>(B1,C1,D1,E1,A1,m_M[ 4] ); F5<13>(B2,C2,D2,E2,A2,m_M[ 9]+MAGIC6); + F1< 8>(A1,B1,C1,D1,E1,m_M[ 5] ); F5<15>(A2,B2,C2,D2,E2,m_M[ 2]+MAGIC6); + F1< 7>(E1,A1,B1,C1,D1,m_M[ 6] ); F5<15>(E2,A2,B2,C2,D2,m_M[11]+MAGIC6); + F1< 9>(D1,E1,A1,B1,C1,m_M[ 7] ); F5< 5>(D2,E2,A2,B2,C2,m_M[ 4]+MAGIC6); + F1<11>(C1,D1,E1,A1,B1,m_M[ 8] ); F5< 7>(C2,D2,E2,A2,B2,m_M[13]+MAGIC6); + F1<13>(B1,C1,D1,E1,A1,m_M[ 9] ); F5< 7>(B2,C2,D2,E2,A2,m_M[ 6]+MAGIC6); + F1<14>(A1,B1,C1,D1,E1,m_M[10] ); F5< 8>(A2,B2,C2,D2,E2,m_M[15]+MAGIC6); + F1<15>(E1,A1,B1,C1,D1,m_M[11] ); F5<11>(E2,A2,B2,C2,D2,m_M[ 8]+MAGIC6); + F1< 6>(D1,E1,A1,B1,C1,m_M[12] ); F5<14>(D2,E2,A2,B2,C2,m_M[ 1]+MAGIC6); + F1< 7>(C1,D1,E1,A1,B1,m_M[13] ); F5<14>(C2,D2,E2,A2,B2,m_M[10]+MAGIC6); + F1< 9>(B1,C1,D1,E1,A1,m_M[14] ); F5<12>(B2,C2,D2,E2,A2,m_M[ 3]+MAGIC6); + F1< 8>(A1,B1,C1,D1,E1,m_M[15] ); F5< 6>(A2,B2,C2,D2,E2,m_M[12]+MAGIC6); + + F2< 7>(E1,A1,B1,C1,D1,m_M[ 7]+MAGIC2); F4< 9>(E2,A2,B2,C2,D2,m_M[ 6]+MAGIC7); + F2< 6>(D1,E1,A1,B1,C1,m_M[ 4]+MAGIC2); F4<13>(D2,E2,A2,B2,C2,m_M[11]+MAGIC7); + F2< 8>(C1,D1,E1,A1,B1,m_M[13]+MAGIC2); F4<15>(C2,D2,E2,A2,B2,m_M[ 3]+MAGIC7); + F2<13>(B1,C1,D1,E1,A1,m_M[ 1]+MAGIC2); F4< 7>(B2,C2,D2,E2,A2,m_M[ 7]+MAGIC7); + F2<11>(A1,B1,C1,D1,E1,m_M[10]+MAGIC2); F4<12>(A2,B2,C2,D2,E2,m_M[ 0]+MAGIC7); + F2< 9>(E1,A1,B1,C1,D1,m_M[ 6]+MAGIC2); F4< 8>(E2,A2,B2,C2,D2,m_M[13]+MAGIC7); + F2< 7>(D1,E1,A1,B1,C1,m_M[15]+MAGIC2); F4< 9>(D2,E2,A2,B2,C2,m_M[ 5]+MAGIC7); + F2<15>(C1,D1,E1,A1,B1,m_M[ 3]+MAGIC2); F4<11>(C2,D2,E2,A2,B2,m_M[10]+MAGIC7); + F2< 7>(B1,C1,D1,E1,A1,m_M[12]+MAGIC2); F4< 7>(B2,C2,D2,E2,A2,m_M[14]+MAGIC7); + F2<12>(A1,B1,C1,D1,E1,m_M[ 0]+MAGIC2); F4< 7>(A2,B2,C2,D2,E2,m_M[15]+MAGIC7); + F2<15>(E1,A1,B1,C1,D1,m_M[ 9]+MAGIC2); F4<12>(E2,A2,B2,C2,D2,m_M[ 8]+MAGIC7); + F2< 9>(D1,E1,A1,B1,C1,m_M[ 5]+MAGIC2); F4< 7>(D2,E2,A2,B2,C2,m_M[12]+MAGIC7); + F2<11>(C1,D1,E1,A1,B1,m_M[ 2]+MAGIC2); F4< 6>(C2,D2,E2,A2,B2,m_M[ 4]+MAGIC7); + F2< 7>(B1,C1,D1,E1,A1,m_M[14]+MAGIC2); F4<15>(B2,C2,D2,E2,A2,m_M[ 9]+MAGIC7); + F2<13>(A1,B1,C1,D1,E1,m_M[11]+MAGIC2); F4<13>(A2,B2,C2,D2,E2,m_M[ 1]+MAGIC7); + F2<12>(E1,A1,B1,C1,D1,m_M[ 8]+MAGIC2); F4<11>(E2,A2,B2,C2,D2,m_M[ 2]+MAGIC7); + + F3<11>(D1,E1,A1,B1,C1,m_M[ 3]+MAGIC3); F3< 9>(D2,E2,A2,B2,C2,m_M[15]+MAGIC8); + F3<13>(C1,D1,E1,A1,B1,m_M[10]+MAGIC3); F3< 7>(C2,D2,E2,A2,B2,m_M[ 5]+MAGIC8); + F3< 6>(B1,C1,D1,E1,A1,m_M[14]+MAGIC3); F3<15>(B2,C2,D2,E2,A2,m_M[ 1]+MAGIC8); + F3< 7>(A1,B1,C1,D1,E1,m_M[ 4]+MAGIC3); F3<11>(A2,B2,C2,D2,E2,m_M[ 3]+MAGIC8); + F3<14>(E1,A1,B1,C1,D1,m_M[ 9]+MAGIC3); F3< 8>(E2,A2,B2,C2,D2,m_M[ 7]+MAGIC8); + F3< 9>(D1,E1,A1,B1,C1,m_M[15]+MAGIC3); F3< 6>(D2,E2,A2,B2,C2,m_M[14]+MAGIC8); + F3<13>(C1,D1,E1,A1,B1,m_M[ 8]+MAGIC3); F3< 6>(C2,D2,E2,A2,B2,m_M[ 6]+MAGIC8); + F3<15>(B1,C1,D1,E1,A1,m_M[ 1]+MAGIC3); F3<14>(B2,C2,D2,E2,A2,m_M[ 9]+MAGIC8); + F3<14>(A1,B1,C1,D1,E1,m_M[ 2]+MAGIC3); F3<12>(A2,B2,C2,D2,E2,m_M[11]+MAGIC8); + F3< 8>(E1,A1,B1,C1,D1,m_M[ 7]+MAGIC3); F3<13>(E2,A2,B2,C2,D2,m_M[ 8]+MAGIC8); + F3<13>(D1,E1,A1,B1,C1,m_M[ 0]+MAGIC3); F3< 5>(D2,E2,A2,B2,C2,m_M[12]+MAGIC8); + F3< 6>(C1,D1,E1,A1,B1,m_M[ 6]+MAGIC3); F3<14>(C2,D2,E2,A2,B2,m_M[ 2]+MAGIC8); + F3< 5>(B1,C1,D1,E1,A1,m_M[13]+MAGIC3); F3<13>(B2,C2,D2,E2,A2,m_M[10]+MAGIC8); + F3<12>(A1,B1,C1,D1,E1,m_M[11]+MAGIC3); F3<13>(A2,B2,C2,D2,E2,m_M[ 0]+MAGIC8); + F3< 7>(E1,A1,B1,C1,D1,m_M[ 5]+MAGIC3); F3< 7>(E2,A2,B2,C2,D2,m_M[ 4]+MAGIC8); + F3< 5>(D1,E1,A1,B1,C1,m_M[12]+MAGIC3); F3< 5>(D2,E2,A2,B2,C2,m_M[13]+MAGIC8); + + F4<11>(C1,D1,E1,A1,B1,m_M[ 1]+MAGIC4); F2<15>(C2,D2,E2,A2,B2,m_M[ 8]+MAGIC9); + F4<12>(B1,C1,D1,E1,A1,m_M[ 9]+MAGIC4); F2< 5>(B2,C2,D2,E2,A2,m_M[ 6]+MAGIC9); + F4<14>(A1,B1,C1,D1,E1,m_M[11]+MAGIC4); F2< 8>(A2,B2,C2,D2,E2,m_M[ 4]+MAGIC9); + F4<15>(E1,A1,B1,C1,D1,m_M[10]+MAGIC4); F2<11>(E2,A2,B2,C2,D2,m_M[ 1]+MAGIC9); + F4<14>(D1,E1,A1,B1,C1,m_M[ 0]+MAGIC4); F2<14>(D2,E2,A2,B2,C2,m_M[ 3]+MAGIC9); + F4<15>(C1,D1,E1,A1,B1,m_M[ 8]+MAGIC4); F2<14>(C2,D2,E2,A2,B2,m_M[11]+MAGIC9); + F4< 9>(B1,C1,D1,E1,A1,m_M[12]+MAGIC4); F2< 6>(B2,C2,D2,E2,A2,m_M[15]+MAGIC9); + F4< 8>(A1,B1,C1,D1,E1,m_M[ 4]+MAGIC4); F2<14>(A2,B2,C2,D2,E2,m_M[ 0]+MAGIC9); + F4< 9>(E1,A1,B1,C1,D1,m_M[13]+MAGIC4); F2< 6>(E2,A2,B2,C2,D2,m_M[ 5]+MAGIC9); + F4<14>(D1,E1,A1,B1,C1,m_M[ 3]+MAGIC4); F2< 9>(D2,E2,A2,B2,C2,m_M[12]+MAGIC9); + F4< 5>(C1,D1,E1,A1,B1,m_M[ 7]+MAGIC4); F2<12>(C2,D2,E2,A2,B2,m_M[ 2]+MAGIC9); + F4< 6>(B1,C1,D1,E1,A1,m_M[15]+MAGIC4); F2< 9>(B2,C2,D2,E2,A2,m_M[13]+MAGIC9); + F4< 8>(A1,B1,C1,D1,E1,m_M[14]+MAGIC4); F2<12>(A2,B2,C2,D2,E2,m_M[ 9]+MAGIC9); + F4< 6>(E1,A1,B1,C1,D1,m_M[ 5]+MAGIC4); F2< 5>(E2,A2,B2,C2,D2,m_M[ 7]+MAGIC9); + F4< 5>(D1,E1,A1,B1,C1,m_M[ 6]+MAGIC4); F2<15>(D2,E2,A2,B2,C2,m_M[10]+MAGIC9); + F4<12>(C1,D1,E1,A1,B1,m_M[ 2]+MAGIC4); F2< 8>(C2,D2,E2,A2,B2,m_M[14]+MAGIC9); + + F5< 9>(B1,C1,D1,E1,A1,m_M[ 4]+MAGIC5); F1< 8>(B2,C2,D2,E2,A2,m_M[12] ); + F5<15>(A1,B1,C1,D1,E1,m_M[ 0]+MAGIC5); F1< 5>(A2,B2,C2,D2,E2,m_M[15] ); + F5< 5>(E1,A1,B1,C1,D1,m_M[ 5]+MAGIC5); F1<12>(E2,A2,B2,C2,D2,m_M[10] ); + F5<11>(D1,E1,A1,B1,C1,m_M[ 9]+MAGIC5); F1< 9>(D2,E2,A2,B2,C2,m_M[ 4] ); + F5< 6>(C1,D1,E1,A1,B1,m_M[ 7]+MAGIC5); F1<12>(C2,D2,E2,A2,B2,m_M[ 1] ); + F5< 8>(B1,C1,D1,E1,A1,m_M[12]+MAGIC5); F1< 5>(B2,C2,D2,E2,A2,m_M[ 5] ); + F5<13>(A1,B1,C1,D1,E1,m_M[ 2]+MAGIC5); F1<14>(A2,B2,C2,D2,E2,m_M[ 8] ); + F5<12>(E1,A1,B1,C1,D1,m_M[10]+MAGIC5); F1< 6>(E2,A2,B2,C2,D2,m_M[ 7] ); + F5< 5>(D1,E1,A1,B1,C1,m_M[14]+MAGIC5); F1< 8>(D2,E2,A2,B2,C2,m_M[ 6] ); + F5<12>(C1,D1,E1,A1,B1,m_M[ 1]+MAGIC5); F1<13>(C2,D2,E2,A2,B2,m_M[ 2] ); + F5<13>(B1,C1,D1,E1,A1,m_M[ 3]+MAGIC5); F1< 6>(B2,C2,D2,E2,A2,m_M[13] ); + F5<14>(A1,B1,C1,D1,E1,m_M[ 8]+MAGIC5); F1< 5>(A2,B2,C2,D2,E2,m_M[14] ); + F5<11>(E1,A1,B1,C1,D1,m_M[11]+MAGIC5); F1<15>(E2,A2,B2,C2,D2,m_M[ 0] ); + F5< 8>(D1,E1,A1,B1,C1,m_M[ 6]+MAGIC5); F1<13>(D2,E2,A2,B2,C2,m_M[ 3] ); + F5< 5>(C1,D1,E1,A1,B1,m_M[15]+MAGIC5); F1<11>(C2,D2,E2,A2,B2,m_M[ 9] ); + F5< 6>(B1,C1,D1,E1,A1,m_M[13]+MAGIC5); F1<11>(B2,C2,D2,E2,A2,m_M[11] ); C1 = m_digest[1] + C1 + D2; m_digest[1] = m_digest[2] + D1 + E2; diff --git a/src/lib/hash/sha1/sha160.cpp b/src/lib/hash/sha1/sha160.cpp index fcca67341..8c12a4f04 100644 --- a/src/lib/hash/sha1/sha160.cpp +++ b/src/lib/hash/sha1/sha160.cpp @@ -24,8 +24,8 @@ namespace { */ inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += (D ^ (B & (C ^ D))) + msg + 0x5A827999 + rotate_left(A, 5); - B = rotate_left(B, 30); + E += (D ^ (B & (C ^ D))) + msg + 0x5A827999 + rotl<5>(A); + B = rotl<30>(B); } /* @@ -33,8 +33,8 @@ inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin */ inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += (B ^ C ^ D) + msg + 0x6ED9EBA1 + rotate_left(A, 5); - B = rotate_left(B, 30); + E += (B ^ C ^ D) + msg + 0x6ED9EBA1 + rotl<5>(A); + B = rotl<30>(B); } /* @@ -42,8 +42,8 @@ inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin */ inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += ((B & C) | ((B | C) & D)) + msg + 0x8F1BBCDC + rotate_left(A, 5); - B = rotate_left(B, 30); + E += ((B & C) | ((B | C) & D)) + msg + 0x8F1BBCDC + rotl<5>(A); + B = rotl<30>(B); } /* @@ -51,8 +51,8 @@ inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin */ inline void F4(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += (B ^ C ^ D) + msg + 0xCA62C1D6 + rotate_left(A, 5); - B = rotate_left(B, 30); + E += (B ^ C ^ D) + msg + 0xCA62C1D6 + rotl<5>(A); + B = rotl<30>(B); } } @@ -99,14 +99,14 @@ void SHA_160::compress_n(const uint8_t input[], size_t blocks) for(size_t j = 16; j != 80; j += 8) { - m_W[j ] = rotate_left((m_W[j-3] ^ m_W[j-8] ^ m_W[j-14] ^ m_W[j-16]), 1); - m_W[j+1] = rotate_left((m_W[j-2] ^ m_W[j-7] ^ m_W[j-13] ^ m_W[j-15]), 1); - m_W[j+2] = rotate_left((m_W[j-1] ^ m_W[j-6] ^ m_W[j-12] ^ m_W[j-14]), 1); - m_W[j+3] = rotate_left((m_W[j ] ^ m_W[j-5] ^ m_W[j-11] ^ m_W[j-13]), 1); - m_W[j+4] = rotate_left((m_W[j+1] ^ m_W[j-4] ^ m_W[j-10] ^ m_W[j-12]), 1); - m_W[j+5] = rotate_left((m_W[j+2] ^ m_W[j-3] ^ m_W[j- 9] ^ m_W[j-11]), 1); - m_W[j+6] = rotate_left((m_W[j+3] ^ m_W[j-2] ^ m_W[j- 8] ^ m_W[j-10]), 1); - m_W[j+7] = rotate_left((m_W[j+4] ^ m_W[j-1] ^ m_W[j- 7] ^ m_W[j- 9]), 1); + m_W[j ] = rotl<1>(m_W[j-3] ^ m_W[j-8] ^ m_W[j-14] ^ m_W[j-16]); + m_W[j+1] = rotl<1>(m_W[j-2] ^ m_W[j-7] ^ m_W[j-13] ^ m_W[j-15]); + m_W[j+2] = rotl<1>(m_W[j-1] ^ m_W[j-6] ^ m_W[j-12] ^ m_W[j-14]); + m_W[j+3] = rotl<1>(m_W[j ] ^ m_W[j-5] ^ m_W[j-11] ^ m_W[j-13]); + m_W[j+4] = rotl<1>(m_W[j+1] ^ m_W[j-4] ^ m_W[j-10] ^ m_W[j-12]); + m_W[j+5] = rotl<1>(m_W[j+2] ^ m_W[j-3] ^ m_W[j- 9] ^ m_W[j-11]); + m_W[j+6] = rotl<1>(m_W[j+3] ^ m_W[j-2] ^ m_W[j- 8] ^ m_W[j-10]); + m_W[j+7] = rotl<1>(m_W[j+4] ^ m_W[j-1] ^ m_W[j- 7] ^ m_W[j- 9]); } F1(A, B, C, D, E, m_W[ 0]); F1(E, A, B, C, D, m_W[ 1]); diff --git a/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp b/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp index 8c7785051..0b7f8f837 100644 --- a/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp +++ b/src/lib/hash/sha1/sha1_sse2/sha1_sse2.cpp @@ -113,8 +113,8 @@ W0 = W[t]..W[t+3] */ inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += (D ^ (B & (C ^ D))) + msg + rotate_left(A, 5); - B = rotate_left(B, 30); + E += (D ^ (B & (C ^ D))) + msg + rotl<5>(A); + B = rotl<30>(B); } /* @@ -122,8 +122,8 @@ inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin */ inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += (B ^ C ^ D) + msg + rotate_left(A, 5); - B = rotate_left(B, 30); + E += (B ^ C ^ D) + msg + rotl<5>(A); + B = rotl<30>(B); } /* @@ -131,8 +131,8 @@ inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin */ inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += ((B & C) | ((B | C) & D)) + msg + rotate_left(A, 5); - B = rotate_left(B, 30); + E += ((B & C) | ((B | C) & D)) + msg + rotl<5>(A); + B = rotl<30>(B); } /* @@ -140,8 +140,8 @@ inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uin */ inline void F4(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) { - E += (B ^ C ^ D) + msg + rotate_left(A, 5); - B = rotate_left(B, 30); + E += (B ^ C ^ D) + msg + rotl<5>(A); + B = rotl<30>(B); } } diff --git a/src/lib/hash/sha2_32/sha2_32.cpp b/src/lib/hash/sha2_32/sha2_32.cpp index 281e6ed2b..0710747d0 100644 --- a/src/lib/hash/sha2_32/sha2_32.cpp +++ b/src/lib/hash/sha2_32/sha2_32.cpp @@ -28,10 +28,10 @@ std::unique_ptr<HashFunction> SHA_256::copy_state() const * even though it is much faster if inlined. */ #define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do { \ - uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22); \ - uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25); \ - uint32_t M2_sigma = rotate_right(M2, 17) ^ rotate_right(M2, 19) ^ (M2 >> 10); \ - uint32_t M4_sigma = rotate_right(M4, 7) ^ rotate_right(M4, 18) ^ (M4 >> 3); \ + uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); \ + uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); \ + uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10); \ + uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3); \ H += magic + E_rho + ((E & F) ^ (~E & G)) + M1; \ D += H; \ H += A_rho + ((A & B) | ((A | B) & C)); \ diff --git a/src/lib/hash/sha2_64/sha2_64.cpp b/src/lib/hash/sha2_64/sha2_64.cpp index 8e01b6b4d..45992e996 100644 --- a/src/lib/hash/sha2_64/sha2_64.cpp +++ b/src/lib/hash/sha2_64/sha2_64.cpp @@ -26,44 +26,29 @@ std::unique_ptr<HashFunction> SHA_512_256::copy_state() const namespace { -namespace SHA2_64 { - -/* -* SHA-{384,512} Rho Function -*/ -inline uint64_t rho(uint64_t X, uint32_t rot1, uint32_t rot2, uint32_t rot3) - { - return (rotate_right(X, rot1) ^ rotate_right(X, rot2) ^ - rotate_right(X, rot3)); - } - -/* -* SHA-{384,512} Sigma Function -*/ -inline uint64_t sigma(uint64_t X, uint32_t rot1, uint32_t rot2, uint32_t shift) - { - return (rotate_right(X, rot1) ^ rotate_right(X, rot2) ^ (X >> shift)); - } - /* * SHA-512 F1 Function * * Use a macro as many compilers won't inline a function this big, * even though it is much faster if inlined. */ -#define SHA2_64_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) \ - do { \ - H += magic + rho(E, 14, 18, 41) + ((E & F) ^ (~E & G)) + M1; \ - D += H; \ - H += rho(A, 28, 34, 39) + ((A & B) | ((A | B) & C)); \ - M1 += sigma(M2, 19, 61, 6) + M3 + sigma(M4, 1, 8, 7); \ +#define SHA2_64_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) \ + do { \ + const uint64_t E_rho = rotr<14>(E) ^ rotr<18>(E) ^ rotr<41>(E); \ + const uint64_t A_rho = rotr<28>(A) ^ rotr<34>(A) ^ rotr<39>(A); \ + const uint64_t M2_sigma = rotr<19>(M2) ^ rotr<61>(M2) ^ (M2 >> 6); \ + const uint64_t M4_sigma = rotr<1>(M4) ^ rotr<8>(M4) ^ (M4 >> 7); \ + H += magic + E_rho + ((E & F) ^ (~E & G)) + M1; \ + D += H; \ + H += A_rho + ((A & B) | ((A | B) & C)); \ + M1 += M2_sigma + M3 + M4_sigma; \ } while(0); /* * SHA-{384,512} Compression Function */ -void compress(secure_vector<uint64_t>& digest, - const uint8_t input[], size_t blocks) +void SHA64_compress(secure_vector<uint64_t>& digest, + const uint8_t input[], size_t blocks) { uint64_t A = digest[0], B = digest[1], C = digest[2], D = digest[3], E = digest[4], F = digest[5], @@ -184,21 +169,19 @@ void compress(secure_vector<uint64_t>& digest, } -} - void SHA_512_256::compress_n(const uint8_t input[], size_t blocks) { - SHA2_64::compress(m_digest, input, blocks); + SHA64_compress(m_digest, input, blocks); } void SHA_384::compress_n(const uint8_t input[], size_t blocks) { - SHA2_64::compress(m_digest, input, blocks); + SHA64_compress(m_digest, input, blocks); } void SHA_512::compress_n(const uint8_t input[], size_t blocks) { - SHA2_64::compress(m_digest, input, blocks); + SHA64_compress(m_digest, input, blocks); } void SHA_512_256::copy_out(uint8_t output[]) diff --git a/src/lib/hash/sha3/sha3.cpp b/src/lib/hash/sha3/sha3.cpp index e829c3f70..1556e5498 100644 --- a/src/lib/hash/sha3/sha3.cpp +++ b/src/lib/hash/sha3/sha3.cpp @@ -37,37 +37,37 @@ void SHA_3::permute(uint64_t A[25]) const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23]; const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]; - const uint64_t D0 = rotate_left(C0, 1) ^ C3; - const uint64_t D1 = rotate_left(C1, 1) ^ C4; - const uint64_t D2 = rotate_left(C2, 1) ^ C0; - const uint64_t D3 = rotate_left(C3, 1) ^ C1; - const uint64_t D4 = rotate_left(C4, 1) ^ C2; + const uint64_t D0 = rotl<1>(C0) ^ C3; + const uint64_t D1 = rotl<1>(C1) ^ C4; + const uint64_t D2 = rotl<1>(C2) ^ C0; + const uint64_t D3 = rotl<1>(C3) ^ C1; + const uint64_t D4 = rotl<1>(C4) ^ C2; const uint64_t B00 = A[ 0] ^ D1; - const uint64_t B10 = rotate_left(A[ 1] ^ D2, 1); - const uint64_t B20 = rotate_left(A[ 2] ^ D3, 62); - const uint64_t B05 = rotate_left(A[ 3] ^ D4, 28); - const uint64_t B15 = rotate_left(A[ 4] ^ D0, 27); - const uint64_t B16 = rotate_left(A[ 5] ^ D1, 36); - const uint64_t B01 = rotate_left(A[ 6] ^ D2, 44); - const uint64_t B11 = rotate_left(A[ 7] ^ D3, 6); - const uint64_t B21 = rotate_left(A[ 8] ^ D4, 55); - const uint64_t B06 = rotate_left(A[ 9] ^ D0, 20); - const uint64_t B07 = rotate_left(A[10] ^ D1, 3); - const uint64_t B17 = rotate_left(A[11] ^ D2, 10); - const uint64_t B02 = rotate_left(A[12] ^ D3, 43); - const uint64_t B12 = rotate_left(A[13] ^ D4, 25); - const uint64_t B22 = rotate_left(A[14] ^ D0, 39); - const uint64_t B23 = rotate_left(A[15] ^ D1, 41); - const uint64_t B08 = rotate_left(A[16] ^ D2, 45); - const uint64_t B18 = rotate_left(A[17] ^ D3, 15); - const uint64_t B03 = rotate_left(A[18] ^ D4, 21); - const uint64_t B13 = rotate_left(A[19] ^ D0, 8); - const uint64_t B14 = rotate_left(A[20] ^ D1, 18); - const uint64_t B24 = rotate_left(A[21] ^ D2, 2); - const uint64_t B09 = rotate_left(A[22] ^ D3, 61); - const uint64_t B19 = rotate_left(A[23] ^ D4, 56); - const uint64_t B04 = rotate_left(A[24] ^ D0, 14); + const uint64_t B10 = rotl<1>(A[ 1] ^ D2); + const uint64_t B20 = rotl<62>(A[ 2] ^ D3); + const uint64_t B05 = rotl<28>(A[ 3] ^ D4); + const uint64_t B15 = rotl<27>(A[ 4] ^ D0); + const uint64_t B16 = rotl<36>(A[ 5] ^ D1); + const uint64_t B01 = rotl<44>(A[ 6] ^ D2); + const uint64_t B11 = rotl<6>(A[ 7] ^ D3); + const uint64_t B21 = rotl<55>(A[ 8] ^ D4); + const uint64_t B06 = rotl<20>(A[ 9] ^ D0); + const uint64_t B07 = rotl<3>(A[10] ^ D1); + const uint64_t B17 = rotl<10>(A[11] ^ D2); + const uint64_t B02 = rotl<43>(A[12] ^ D3); + const uint64_t B12 = rotl<25>(A[13] ^ D4); + const uint64_t B22 = rotl<39>(A[14] ^ D0); + const uint64_t B23 = rotl<41>(A[15] ^ D1); + const uint64_t B08 = rotl<45>(A[16] ^ D2); + const uint64_t B18 = rotl<15>(A[17] ^ D3); + const uint64_t B03 = rotl<21>(A[18] ^ D4); + const uint64_t B13 = rotl<8>(A[19] ^ D0); + const uint64_t B14 = rotl<18>(A[20] ^ D1); + const uint64_t B24 = rotl<2>(A[21] ^ D2); + const uint64_t B09 = rotl<61>(A[22] ^ D3); + const uint64_t B19 = rotl<56>(A[23] ^ D4); + const uint64_t B04 = rotl<14>(A[24] ^ D0); A[ 0] = B00 ^ (~B01 & B02); A[ 1] = B01 ^ (~B02 & B03); diff --git a/src/lib/hash/sm3/sm3.cpp b/src/lib/hash/sm3/sm3.cpp index aeb8f2e47..c3220d243 100644 --- a/src/lib/hash/sm3/sm3.cpp +++ b/src/lib/hash/sm3/sm3.cpp @@ -23,12 +23,12 @@ const uint32_t SM3_IV[] = { inline uint32_t P0(uint32_t X) { - return X ^ rotate_left(X, 9) ^ rotate_left(X, 17); + return X ^ rotl<9>(X) ^ rotl<17>(X); } inline uint32_t P1(uint32_t X) { - return X ^ rotate_left(X, 15) ^ rotate_left(X, 23); + return X ^ rotl<15>(X) ^ rotl<23>(X); } inline uint32_t FF1(uint32_t X, uint32_t Y, uint32_t Z) @@ -47,14 +47,14 @@ inline void R1(uint32_t A, uint32_t& B, uint32_t C, uint32_t& D, uint32_t E, uint32_t& F, uint32_t G, uint32_t& H, uint32_t TJ, uint32_t Wi, uint32_t Wj) { - const uint32_t A12 = rotate_left(A, 12); - const uint32_t SS1 = rotate_left(A12 + E + TJ, 7); + const uint32_t A12 = rotl<12>(A); + const uint32_t SS1 = rotl<7>(A12 + E + TJ); const uint32_t TT1 = (A ^ B ^ C) + D + (SS1 ^ A12) + Wj; const uint32_t TT2 = (E ^ F ^ G) + H + SS1 + Wi; - B = rotate_left(B, 9); + B = rotl<9>(B); D = TT1; - F = rotate_left(F, 19); + F = rotl<19>(F); H = P0(TT2); } @@ -62,14 +62,14 @@ inline void R2(uint32_t A, uint32_t& B, uint32_t C, uint32_t& D, uint32_t E, uint32_t& F, uint32_t G, uint32_t& H, uint32_t TJ, uint32_t Wi, uint32_t Wj) { - const uint32_t A12 = rotate_left(A, 12); - const uint32_t SS1 = rotate_left(A12 + E + TJ, 7); + const uint32_t A12 = rotl<12>(A); + const uint32_t SS1 = rotl<7>(A12 + E + TJ); const uint32_t TT1 = FF1(A, B, C) + D + (SS1 ^ A12) + Wj; const uint32_t TT2 = GG1(E, F, G) + H + SS1 + Wi; - B = rotate_left(B, 9); + B = rotl<9>(B); D = TT1; - F = rotate_left(F, 19); + F = rotl<19>(F); H = P0(TT2); } @@ -105,58 +105,58 @@ void SM3::compress_n(const uint8_t input[], size_t blocks) W[15] = load_be<uint32_t>(input, 15); // Message Extension (b) - W[16] = P1(W[ 0] ^ W[ 7] ^ rotate_left(W[13], 15)) ^ rotate_left(W[ 3], 7) ^ W[10]; - W[17] = P1(W[ 1] ^ W[ 8] ^ rotate_left(W[14], 15)) ^ rotate_left(W[ 4], 7) ^ W[11]; - W[18] = P1(W[ 2] ^ W[ 9] ^ rotate_left(W[15], 15)) ^ rotate_left(W[ 5], 7) ^ W[12]; - W[19] = P1(W[ 3] ^ W[10] ^ rotate_left(W[16], 15)) ^ rotate_left(W[ 6], 7) ^ W[13]; - W[20] = P1(W[ 4] ^ W[11] ^ rotate_left(W[17], 15)) ^ rotate_left(W[ 7], 7) ^ W[14]; - W[21] = P1(W[ 5] ^ W[12] ^ rotate_left(W[18], 15)) ^ rotate_left(W[ 8], 7) ^ W[15]; - W[22] = P1(W[ 6] ^ W[13] ^ rotate_left(W[19], 15)) ^ rotate_left(W[ 9], 7) ^ W[16]; - W[23] = P1(W[ 7] ^ W[14] ^ rotate_left(W[20], 15)) ^ rotate_left(W[10], 7) ^ W[17]; - W[24] = P1(W[ 8] ^ W[15] ^ rotate_left(W[21], 15)) ^ rotate_left(W[11], 7) ^ W[18]; - W[25] = P1(W[ 9] ^ W[16] ^ rotate_left(W[22], 15)) ^ rotate_left(W[12], 7) ^ W[19]; - W[26] = P1(W[10] ^ W[17] ^ rotate_left(W[23], 15)) ^ rotate_left(W[13], 7) ^ W[20]; - W[27] = P1(W[11] ^ W[18] ^ rotate_left(W[24], 15)) ^ rotate_left(W[14], 7) ^ W[21]; - W[28] = P1(W[12] ^ W[19] ^ rotate_left(W[25], 15)) ^ rotate_left(W[15], 7) ^ W[22]; - W[29] = P1(W[13] ^ W[20] ^ rotate_left(W[26], 15)) ^ rotate_left(W[16], 7) ^ W[23]; - W[30] = P1(W[14] ^ W[21] ^ rotate_left(W[27], 15)) ^ rotate_left(W[17], 7) ^ W[24]; - W[31] = P1(W[15] ^ W[22] ^ rotate_left(W[28], 15)) ^ rotate_left(W[18], 7) ^ W[25]; - W[32] = P1(W[16] ^ W[23] ^ rotate_left(W[29], 15)) ^ rotate_left(W[19], 7) ^ W[26]; - W[33] = P1(W[17] ^ W[24] ^ rotate_left(W[30], 15)) ^ rotate_left(W[20], 7) ^ W[27]; - W[34] = P1(W[18] ^ W[25] ^ rotate_left(W[31], 15)) ^ rotate_left(W[21], 7) ^ W[28]; - W[35] = P1(W[19] ^ W[26] ^ rotate_left(W[32], 15)) ^ rotate_left(W[22], 7) ^ W[29]; - W[36] = P1(W[20] ^ W[27] ^ rotate_left(W[33], 15)) ^ rotate_left(W[23], 7) ^ W[30]; - W[37] = P1(W[21] ^ W[28] ^ rotate_left(W[34], 15)) ^ rotate_left(W[24], 7) ^ W[31]; - W[38] = P1(W[22] ^ W[29] ^ rotate_left(W[35], 15)) ^ rotate_left(W[25], 7) ^ W[32]; - W[39] = P1(W[23] ^ W[30] ^ rotate_left(W[36], 15)) ^ rotate_left(W[26], 7) ^ W[33]; - W[40] = P1(W[24] ^ W[31] ^ rotate_left(W[37], 15)) ^ rotate_left(W[27], 7) ^ W[34]; - W[41] = P1(W[25] ^ W[32] ^ rotate_left(W[38], 15)) ^ rotate_left(W[28], 7) ^ W[35]; - W[42] = P1(W[26] ^ W[33] ^ rotate_left(W[39], 15)) ^ rotate_left(W[29], 7) ^ W[36]; - W[43] = P1(W[27] ^ W[34] ^ rotate_left(W[40], 15)) ^ rotate_left(W[30], 7) ^ W[37]; - W[44] = P1(W[28] ^ W[35] ^ rotate_left(W[41], 15)) ^ rotate_left(W[31], 7) ^ W[38]; - W[45] = P1(W[29] ^ W[36] ^ rotate_left(W[42], 15)) ^ rotate_left(W[32], 7) ^ W[39]; - W[46] = P1(W[30] ^ W[37] ^ rotate_left(W[43], 15)) ^ rotate_left(W[33], 7) ^ W[40]; - W[47] = P1(W[31] ^ W[38] ^ rotate_left(W[44], 15)) ^ rotate_left(W[34], 7) ^ W[41]; - W[48] = P1(W[32] ^ W[39] ^ rotate_left(W[45], 15)) ^ rotate_left(W[35], 7) ^ W[42]; - W[49] = P1(W[33] ^ W[40] ^ rotate_left(W[46], 15)) ^ rotate_left(W[36], 7) ^ W[43]; - W[50] = P1(W[34] ^ W[41] ^ rotate_left(W[47], 15)) ^ rotate_left(W[37], 7) ^ W[44]; - W[51] = P1(W[35] ^ W[42] ^ rotate_left(W[48], 15)) ^ rotate_left(W[38], 7) ^ W[45]; - W[52] = P1(W[36] ^ W[43] ^ rotate_left(W[49], 15)) ^ rotate_left(W[39], 7) ^ W[46]; - W[53] = P1(W[37] ^ W[44] ^ rotate_left(W[50], 15)) ^ rotate_left(W[40], 7) ^ W[47]; - W[54] = P1(W[38] ^ W[45] ^ rotate_left(W[51], 15)) ^ rotate_left(W[41], 7) ^ W[48]; - W[55] = P1(W[39] ^ W[46] ^ rotate_left(W[52], 15)) ^ rotate_left(W[42], 7) ^ W[49]; - W[56] = P1(W[40] ^ W[47] ^ rotate_left(W[53], 15)) ^ rotate_left(W[43], 7) ^ W[50]; - W[57] = P1(W[41] ^ W[48] ^ rotate_left(W[54], 15)) ^ rotate_left(W[44], 7) ^ W[51]; - W[58] = P1(W[42] ^ W[49] ^ rotate_left(W[55], 15)) ^ rotate_left(W[45], 7) ^ W[52]; - W[59] = P1(W[43] ^ W[50] ^ rotate_left(W[56], 15)) ^ rotate_left(W[46], 7) ^ W[53]; - W[60] = P1(W[44] ^ W[51] ^ rotate_left(W[57], 15)) ^ rotate_left(W[47], 7) ^ W[54]; - W[61] = P1(W[45] ^ W[52] ^ rotate_left(W[58], 15)) ^ rotate_left(W[48], 7) ^ W[55]; - W[62] = P1(W[46] ^ W[53] ^ rotate_left(W[59], 15)) ^ rotate_left(W[49], 7) ^ W[56]; - W[63] = P1(W[47] ^ W[54] ^ rotate_left(W[60], 15)) ^ rotate_left(W[50], 7) ^ W[57]; - W[64] = P1(W[48] ^ W[55] ^ rotate_left(W[61], 15)) ^ rotate_left(W[51], 7) ^ W[58]; - W[65] = P1(W[49] ^ W[56] ^ rotate_left(W[62], 15)) ^ rotate_left(W[52], 7) ^ W[59]; - W[66] = P1(W[50] ^ W[57] ^ rotate_left(W[63], 15)) ^ rotate_left(W[53], 7) ^ W[60]; - W[67] = P1(W[51] ^ W[58] ^ rotate_left(W[64], 15)) ^ rotate_left(W[54], 7) ^ W[61]; + W[16] = P1(W[ 0] ^ W[ 7] ^ rotl<15>(W[13])) ^ rotl<7>(W[ 3]) ^ W[10]; + W[17] = P1(W[ 1] ^ W[ 8] ^ rotl<15>(W[14])) ^ rotl<7>(W[ 4]) ^ W[11]; + W[18] = P1(W[ 2] ^ W[ 9] ^ rotl<15>(W[15])) ^ rotl<7>(W[ 5]) ^ W[12]; + W[19] = P1(W[ 3] ^ W[10] ^ rotl<15>(W[16])) ^ rotl<7>(W[ 6]) ^ W[13]; + W[20] = P1(W[ 4] ^ W[11] ^ rotl<15>(W[17])) ^ rotl<7>(W[ 7]) ^ W[14]; + W[21] = P1(W[ 5] ^ W[12] ^ rotl<15>(W[18])) ^ rotl<7>(W[ 8]) ^ W[15]; + W[22] = P1(W[ 6] ^ W[13] ^ rotl<15>(W[19])) ^ rotl<7>(W[ 9]) ^ W[16]; + W[23] = P1(W[ 7] ^ W[14] ^ rotl<15>(W[20])) ^ rotl<7>(W[10]) ^ W[17]; + W[24] = P1(W[ 8] ^ W[15] ^ rotl<15>(W[21])) ^ rotl<7>(W[11]) ^ W[18]; + W[25] = P1(W[ 9] ^ W[16] ^ rotl<15>(W[22])) ^ rotl<7>(W[12]) ^ W[19]; + W[26] = P1(W[10] ^ W[17] ^ rotl<15>(W[23])) ^ rotl<7>(W[13]) ^ W[20]; + W[27] = P1(W[11] ^ W[18] ^ rotl<15>(W[24])) ^ rotl<7>(W[14]) ^ W[21]; + W[28] = P1(W[12] ^ W[19] ^ rotl<15>(W[25])) ^ rotl<7>(W[15]) ^ W[22]; + W[29] = P1(W[13] ^ W[20] ^ rotl<15>(W[26])) ^ rotl<7>(W[16]) ^ W[23]; + W[30] = P1(W[14] ^ W[21] ^ rotl<15>(W[27])) ^ rotl<7>(W[17]) ^ W[24]; + W[31] = P1(W[15] ^ W[22] ^ rotl<15>(W[28])) ^ rotl<7>(W[18]) ^ W[25]; + W[32] = P1(W[16] ^ W[23] ^ rotl<15>(W[29])) ^ rotl<7>(W[19]) ^ W[26]; + W[33] = P1(W[17] ^ W[24] ^ rotl<15>(W[30])) ^ rotl<7>(W[20]) ^ W[27]; + W[34] = P1(W[18] ^ W[25] ^ rotl<15>(W[31])) ^ rotl<7>(W[21]) ^ W[28]; + W[35] = P1(W[19] ^ W[26] ^ rotl<15>(W[32])) ^ rotl<7>(W[22]) ^ W[29]; + W[36] = P1(W[20] ^ W[27] ^ rotl<15>(W[33])) ^ rotl<7>(W[23]) ^ W[30]; + W[37] = P1(W[21] ^ W[28] ^ rotl<15>(W[34])) ^ rotl<7>(W[24]) ^ W[31]; + W[38] = P1(W[22] ^ W[29] ^ rotl<15>(W[35])) ^ rotl<7>(W[25]) ^ W[32]; + W[39] = P1(W[23] ^ W[30] ^ rotl<15>(W[36])) ^ rotl<7>(W[26]) ^ W[33]; + W[40] = P1(W[24] ^ W[31] ^ rotl<15>(W[37])) ^ rotl<7>(W[27]) ^ W[34]; + W[41] = P1(W[25] ^ W[32] ^ rotl<15>(W[38])) ^ rotl<7>(W[28]) ^ W[35]; + W[42] = P1(W[26] ^ W[33] ^ rotl<15>(W[39])) ^ rotl<7>(W[29]) ^ W[36]; + W[43] = P1(W[27] ^ W[34] ^ rotl<15>(W[40])) ^ rotl<7>(W[30]) ^ W[37]; + W[44] = P1(W[28] ^ W[35] ^ rotl<15>(W[41])) ^ rotl<7>(W[31]) ^ W[38]; + W[45] = P1(W[29] ^ W[36] ^ rotl<15>(W[42])) ^ rotl<7>(W[32]) ^ W[39]; + W[46] = P1(W[30] ^ W[37] ^ rotl<15>(W[43])) ^ rotl<7>(W[33]) ^ W[40]; + W[47] = P1(W[31] ^ W[38] ^ rotl<15>(W[44])) ^ rotl<7>(W[34]) ^ W[41]; + W[48] = P1(W[32] ^ W[39] ^ rotl<15>(W[45])) ^ rotl<7>(W[35]) ^ W[42]; + W[49] = P1(W[33] ^ W[40] ^ rotl<15>(W[46])) ^ rotl<7>(W[36]) ^ W[43]; + W[50] = P1(W[34] ^ W[41] ^ rotl<15>(W[47])) ^ rotl<7>(W[37]) ^ W[44]; + W[51] = P1(W[35] ^ W[42] ^ rotl<15>(W[48])) ^ rotl<7>(W[38]) ^ W[45]; + W[52] = P1(W[36] ^ W[43] ^ rotl<15>(W[49])) ^ rotl<7>(W[39]) ^ W[46]; + W[53] = P1(W[37] ^ W[44] ^ rotl<15>(W[50])) ^ rotl<7>(W[40]) ^ W[47]; + W[54] = P1(W[38] ^ W[45] ^ rotl<15>(W[51])) ^ rotl<7>(W[41]) ^ W[48]; + W[55] = P1(W[39] ^ W[46] ^ rotl<15>(W[52])) ^ rotl<7>(W[42]) ^ W[49]; + W[56] = P1(W[40] ^ W[47] ^ rotl<15>(W[53])) ^ rotl<7>(W[43]) ^ W[50]; + W[57] = P1(W[41] ^ W[48] ^ rotl<15>(W[54])) ^ rotl<7>(W[44]) ^ W[51]; + W[58] = P1(W[42] ^ W[49] ^ rotl<15>(W[55])) ^ rotl<7>(W[45]) ^ W[52]; + W[59] = P1(W[43] ^ W[50] ^ rotl<15>(W[56])) ^ rotl<7>(W[46]) ^ W[53]; + W[60] = P1(W[44] ^ W[51] ^ rotl<15>(W[57])) ^ rotl<7>(W[47]) ^ W[54]; + W[61] = P1(W[45] ^ W[52] ^ rotl<15>(W[58])) ^ rotl<7>(W[48]) ^ W[55]; + W[62] = P1(W[46] ^ W[53] ^ rotl<15>(W[59])) ^ rotl<7>(W[49]) ^ W[56]; + W[63] = P1(W[47] ^ W[54] ^ rotl<15>(W[60])) ^ rotl<7>(W[50]) ^ W[57]; + W[64] = P1(W[48] ^ W[55] ^ rotl<15>(W[61])) ^ rotl<7>(W[51]) ^ W[58]; + W[65] = P1(W[49] ^ W[56] ^ rotl<15>(W[62])) ^ rotl<7>(W[52]) ^ W[59]; + W[66] = P1(W[50] ^ W[57] ^ rotl<15>(W[63])) ^ rotl<7>(W[53]) ^ W[60]; + W[67] = P1(W[51] ^ W[58] ^ rotl<15>(W[64])) ^ rotl<7>(W[54]) ^ W[61]; R1(A, B, C, D, E, F, G, H, 0x79CC4519, W[ 0], W[ 0] ^ W[ 4]); R1(D, A, B, C, H, E, F, G, 0xF3988A32, W[ 1], W[ 1] ^ W[ 5]); diff --git a/src/lib/mac/siphash/siphash.cpp b/src/lib/mac/siphash/siphash.cpp index 54adcd5a5..255a35493 100644 --- a/src/lib/mac/siphash/siphash.cpp +++ b/src/lib/mac/siphash/siphash.cpp @@ -19,16 +19,16 @@ void SipRounds(uint64_t M, secure_vector<uint64_t>& V, size_t r) for(size_t i = 0; i != r; ++i) { V0 += V1; V2 += V3; - V1 = rotate_left(V1, 13); - V3 = rotate_left(V3, 16); + V1 = rotl<13>(V1); + V3 = rotl<16>(V3); V1 ^= V0; V3 ^= V2; - V0 = rotate_left(V0, 32); + V0 = rotl<32>(V0); V2 += V1; V0 += V3; - V1 = rotate_left(V1, 17); - V3 = rotate_left(V3, 21); + V1 = rotl<17>(V1); + V3 = rotl<21>(V3); V1 ^= V2; V3 ^= V0; - V2 = rotate_left(V2, 32); + V2 = rotl<32>(V2); } V0 ^= M; diff --git a/src/lib/stream/chacha/chacha.cpp b/src/lib/stream/chacha/chacha.cpp index d56f9e60a..0bbb47bcb 100644 --- a/src/lib/stream/chacha/chacha.cpp +++ b/src/lib/stream/chacha/chacha.cpp @@ -49,12 +49,12 @@ void ChaCha::chacha_x4(uint8_t output[64*4], uint32_t input[16], size_t rounds) x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11], x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15]; -#define CHACHA_QUARTER_ROUND(a, b, c, d) \ - do { \ - a += b; d ^= a; d = rotate_left(d, 16); \ - c += d; b ^= c; b = rotate_left(b, 12); \ - a += b; d ^= a; d = rotate_left(d, 8); \ - c += d; b ^= c; b = rotate_left(b, 7); \ +#define CHACHA_QUARTER_ROUND(a, b, c, d) \ + do { \ + a += b; d ^= a; d = rotl<16>(d); \ + c += d; b ^= c; b = rotl<12>(b); \ + a += b; d ^= a; d = rotl<8>(d); \ + c += d; b ^= c; b = rotl<7>(b); \ } while(0) for(size_t r = 0; r != rounds / 2; ++r) diff --git a/src/lib/stream/salsa20/salsa20.cpp b/src/lib/stream/salsa20/salsa20.cpp index 1c8846183..e27b2d2bb 100644 --- a/src/lib/stream/salsa20/salsa20.cpp +++ b/src/lib/stream/salsa20/salsa20.cpp @@ -14,10 +14,10 @@ namespace { #define SALSA20_QUARTER_ROUND(x1, x2, x3, x4) \ do { \ - x2 ^= rotate_left(x1 + x4, 7); \ - x3 ^= rotate_left(x2 + x1, 9); \ - x4 ^= rotate_left(x3 + x2, 13); \ - x1 ^= rotate_left(x4 + x3, 18); \ + x2 ^= rotl<7>(x1 + x4); \ + x3 ^= rotl<9>(x2 + x1); \ + x4 ^= rotl<13>(x3 + x2); \ + x1 ^= rotl<18>(x4 + x3); \ } while(0) /* @@ -26,9 +26,9 @@ namespace { void hsalsa20(uint32_t output[8], const uint32_t input[16]) { uint32_t x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3], - x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7], - x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11], - x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15]; + x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7], + x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11], + x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15]; for(size_t i = 0; i != 10; ++i) { @@ -59,9 +59,9 @@ void hsalsa20(uint32_t output[8], const uint32_t input[16]) void salsa20(uint8_t output[64], const uint32_t input[16]) { uint32_t x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3], - x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7], - x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11], - x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15]; + x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7], + x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11], + x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15]; for(size_t i = 0; i != 10; ++i) { diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h index 23b3113ce..c1aa8b594 100644 --- a/src/lib/utils/bswap.h +++ b/src/lib/utils/bswap.h @@ -19,7 +19,7 @@ namespace Botan { */ inline uint16_t reverse_bytes(uint16_t val) { - return rotate_left(val, 8); + return rotl<8>(val); } /** @@ -64,8 +64,8 @@ inline uint32_t reverse_bytes(uint32_t val) #else // Generic implementation - return (rotate_right(val, 8) & 0xFF00FF00) | - (rotate_left (val, 8) & 0x00FF00FF); + return (rotr<8>(val) & 0xFF00FF00) | + (rotl<8>(val) & 0x00FF00FF); #endif } diff --git a/src/lib/utils/rotate.h b/src/lib/utils/rotate.h index cb92daf96..4bb76c9ed 100644 --- a/src/lib/utils/rotate.h +++ b/src/lib/utils/rotate.h @@ -1,6 +1,6 @@ /* * Word Rotation Operations -* (C) 1999-2008 Jack Lloyd +* (C) 1999-2008,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -13,27 +13,90 @@ namespace Botan { /** -* Bit rotation left +* Bit rotation left by a compile-time constant amount * @param input the input word -* @param rot the number of bits to rotate +* @return input rotated left by ROT bits +*/ +template<size_t ROT, typename T> +inline T rotl(T input) + { + static_assert(ROT > 0 && ROT < 8*sizeof(T), "Invalid rotation constant"); + return static_cast<T>((input << ROT) | (input >> (8*sizeof(T) - ROT))); + } + +/** +* Bit rotation right by a compile-time constant amount +* @param input the input word +* @return input rotated right by ROT bits +*/ +template<size_t ROT, typename T> +inline T rotr(T input) + { + static_assert(ROT > 0 && ROT < 8*sizeof(T), "Invalid rotation constant"); + return static_cast<T>((input >> ROT) | (input << (8*sizeof(T) - ROT))); + } + +/** +* Bit rotation left, variable rotation amount +* @param input the input word +* @param rot the number of bits to rotate, must be between 0 and sizeof(T)*8-1 * @return input rotated left by rot bits */ -template<typename T> inline T rotate_left(T input, size_t rot) +template<typename T> +inline T rotl_var(T input, size_t rot) { - rot %= 8 * sizeof(T); - return (rot == 0) ? input : static_cast<T>((input << rot) | (input >> (8*sizeof(T)-rot)));; + return rot ? static_cast<T>((input << rot) | (input >> (sizeof(T)*8 - rot))) : input; } /** -* Bit rotation right +* Bit rotation right, variable rotation amount * @param input the input word -* @param rot the number of bits to rotate +* @param rot the number of bits to rotate, must be between 0 and sizeof(T)*8-1 * @return input rotated right by rot bits */ -template<typename T> inline T rotate_right(T input, size_t rot) +template<typename T> +inline T rotr_var(T input, size_t rot) + { + return rot ? static_cast<T>((input >> rot) | (input << (sizeof(T)*8 - rot))) : input; + } + +#if BOTAN_USE_GCC_INLINE_ASM + +#if defined(BOTAN_TARGET_ARCH_IS_X86_64) || defined(BOTAN_TARGET_ARCH_IS_X86_32) + +template<> +inline uint32_t rotl_var(uint32_t input, size_t rot) + { + asm("roll %1,%0" : "+r" (input) : "c" (static_cast<uint8_t>(rot))); + return input; + } + +template<> +inline uint32_t rotr_var(uint32_t input, size_t rot) + { + asm("rorl %1,%0" : "+r" (input) : "c" (static_cast<uint8_t>(rot))); + return input; + } + +#endif + +#endif + + +template<typename T> +BOTAN_DEPRECATED("Use rotl<N> or rotl_var") +inline T rotate_left(T input, size_t rot) + { + // rotl_var does not reduce + return rotl_var(input, rot % (8 * sizeof(T))); + } + +template<typename T> +BOTAN_DEPRECATED("Use rotr<N> or rotr_var") +inline T rotate_right(T input, size_t rot) { - rot %= 8 * sizeof(T); - return (rot == 0) ? input : static_cast<T>((input >> rot) | (input << (8*sizeof(T)-rot))); + // rotr_var does not reduce + return rotr_var(input, rot % (8 * sizeof(T))); } } diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h index 2a177b388..aca7a8e9a 100644 --- a/src/lib/utils/simd/simd_32.h +++ b/src/lib/utils/simd/simd_32.h @@ -281,56 +281,57 @@ class SIMD_4x32 final /* - Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3) + * This is used for SHA-2/SHACAL2 + * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3) */ - SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const + template<size_t ROT1, size_t ROT2, size_t ROT3> + SIMD_4x32 rho() const { SIMD_4x32 res; #if defined(BOTAN_SIMD_USE_SSE2) - res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)), - _mm_srli_epi32(m_sse, static_cast<int>(rot1))); + res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT1)), + _mm_srli_epi32(m_sse, static_cast<int>(ROT1))); res.m_sse = _mm_xor_si128( res.m_sse, - _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)), - _mm_srli_epi32(m_sse, static_cast<int>(rot2)))); + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT2)), + _mm_srli_epi32(m_sse, static_cast<int>(ROT2)))); res.m_sse = _mm_xor_si128( res.m_sse, - _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)), - _mm_srli_epi32(m_sse, static_cast<int>(rot3)))); + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT3)), + _mm_srli_epi32(m_sse, static_cast<int>(ROT3)))); #elif defined(BOTAN_SIMD_USE_ALTIVEC) - const unsigned int r1 = static_cast<unsigned int>(32-rot1); - const unsigned int r2 = static_cast<unsigned int>(32-rot2); - const unsigned int r3 = static_cast<unsigned int>(32-rot3); + const unsigned int r1 = static_cast<unsigned int>(32-ROT1); + const unsigned int r2 = static_cast<unsigned int>(32-ROT2); + const unsigned int r3 = static_cast<unsigned int>(32-ROT3); res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1}); res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2})); res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3})); #elif defined(BOTAN_SIMD_USE_NEON) - res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)), - vshrq_n_u32(m_neon, static_cast<int>(rot1))); + res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT1)), + vshrq_n_u32(m_neon, static_cast<int>(ROT1))); res.m_neon = veorq_u32( res.m_neon, - vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)), - vshrq_n_u32(m_neon, static_cast<int>(rot2)))); + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT2)), + vshrq_n_u32(m_neon, static_cast<int>(ROT2)))); res.m_neon = veorq_u32( res.m_neon, - vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)), - vshrq_n_u32(m_neon, static_cast<int>(rot3)))); + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT3)), + vshrq_n_u32(m_neon, static_cast<int>(ROT3)))); #else for(size_t i = 0; i != 4; ++i) { - res.m_scalar[i] = - Botan::rotate_right(m_scalar[i], rot1) ^ - Botan::rotate_right(m_scalar[i], rot2) ^ - Botan::rotate_right(m_scalar[i], rot3); + res.m_scalar[i] = Botan::rotr<ROT1>(m_scalar[i]) ^ + Botan::rotr<ROT2>(m_scalar[i]) ^ + Botan::rotr<ROT3>(m_scalar[i]); } #endif @@ -338,38 +339,42 @@ class SIMD_4x32 final } /** - * Rotate each element of SIMD register n bits left + * Left rotation by a compile time constant */ - void rotate_left(size_t rot) + template<size_t ROT> + SIMD_4x32 rotl() const { + static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); + #if defined(BOTAN_SIMD_USE_SSE2) - m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(rot)), - _mm_srli_epi32(m_sse, static_cast<int>(32-rot))); + return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(ROT)), + _mm_srli_epi32(m_sse, static_cast<int>(32-ROT)))); #elif defined(BOTAN_SIMD_USE_ALTIVEC) - const unsigned int r = static_cast<unsigned int>(rot); - m_vmx = vec_rl(m_vmx, (__vector unsigned int){r, r, r, r}); + const unsigned int r = static_cast<unsigned int>(ROT); + return SIMD_4x32(vec_rl(m_vmx, (__vector unsigned int){r, r, r, r})); #elif defined(BOTAN_SIMD_USE_NEON) - m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(rot)), - vshrq_n_u32(m_neon, static_cast<int>(32-rot))); + return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(ROT)), + vshrq_n_u32(m_neon, static_cast<int>(32-ROT)))); #else - m_scalar[0] = Botan::rotate_left(m_scalar[0], rot); - m_scalar[1] = Botan::rotate_left(m_scalar[1], rot); - m_scalar[2] = Botan::rotate_left(m_scalar[2], rot); - m_scalar[3] = Botan::rotate_left(m_scalar[3], rot); + return SIMD_4x32(Botan::rotl<ROT>(m_scalar[0]), + Botan::rotl<ROT>(m_scalar[1]), + Botan::rotl<ROT>(m_scalar[2]), + Botan::rotl<ROT>(m_scalar[3])); #endif } /** - * Rotate each element of SIMD register n bits right + * Right rotation by a compile time constant */ - void rotate_right(size_t rot) + template<size_t ROT> + SIMD_4x32 rotr() const { - rotate_left(32 - rot); + return this->rotl<32-ROT>(); } /** @@ -596,13 +601,11 @@ class SIMD_4x32 final //return SIMD_4x32(vrev64q_u32(m_neon)); // FIXME this is really slow - SIMD_4x32 ror8(m_neon); - ror8.rotate_right(8); - SIMD_4x32 rol8(m_neon); - rol8.rotate_left(8); + SIMD_4x32 ror8 = this->rotr<8>(); + SIMD_4x32 rol8 = this->rotl<8>(); - SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00); - SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF); + const SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00); + const SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF); return (ror8 & mask1) | (rol8 & mask2); #else // scalar |