diff options
author | Daniel Wyatt <[email protected]> | 2017-04-03 17:54:08 -0400 |
---|---|---|
committer | Daniel Wyatt <[email protected]> | 2017-04-03 17:54:08 -0400 |
commit | 05c6178a434c4e62e87470dda945be8dbf6b103c (patch) | |
tree | 49f6e2acd47d87c6b4f116b26058a8f99791532c /src | |
parent | 7fcd74ea901948418febdf7f3d8254f1eae17c24 (diff) |
Unroll loops in SM3 hash compress_n.
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/hash/sm3/sm3.cpp | 299 | ||||
-rw-r--r-- | src/lib/hash/sm3/sm3.h | 7 |
2 files changed, 241 insertions, 65 deletions
diff --git a/src/lib/hash/sm3/sm3.cpp b/src/lib/hash/sm3/sm3.cpp index cb4039789..453b59c05 100644 --- a/src/lib/hash/sm3/sm3.cpp +++ b/src/lib/hash/sm3/sm3.cpp @@ -49,6 +49,36 @@ inline uint32_t GG1(uint32_t X, uint32_t Y, uint32_t Z) return (X & Y) | (~X & Z); } +#define SM3_CF0(j) \ + T[(j)] = SM3_TJ_0_15; \ + SS1 = rotate_left(rotate_left(A, 12) + E + rotate_left(T[(j)], (j)), 7); \ + SS2 = SS1 ^ rotate_left(A, 12); \ + TT1 = FF0(A, B, C) + D + SS2 + W1[(j)]; \ + TT2 = GG0(E, F, G) + H + SS1 + W[(j)]; \ + D = C; \ + C = rotate_left(B, 9); \ + B = A; \ + A = TT1; \ + H = G; \ + G = rotate_left(F, 19); \ + F = E; \ + E = P0(TT2); + +#define SM3_CF1(j) \ + T[(j)] = SM3_TJ_16_63; \ + SS1 = rotate_left(rotate_left(A, 12) + E + rotate_left(T[(j)], (j)), 7); \ + SS2 = SS1 ^ rotate_left(A, 12); \ + TT1 = FF1(A, B, C) + D + SS2 + W1[(j)]; \ + TT2 = GG1(E, F, G) + H + SS1 + W[(j)]; \ + D = C; \ + C = rotate_left(B, 9); \ + B = A; \ + A = TT1; \ + H = G; \ + G = rotate_left(F, 19); \ + F = E; \ + E = P0(TT2); + } /* @@ -56,74 +86,226 @@ inline uint32_t GG1(uint32_t X, uint32_t Y, uint32_t Z) */ void SM3::compress_n(const uint8_t input[], size_t blocks) { + uint32_t A = m_digest[0], B = m_digest[1], C = m_digest[2], D = m_digest[3], + E = m_digest[4], F = m_digest[5], G = m_digest[6], H = m_digest[7]; uint32_t W[68], W1[64]; uint32_t SS1, SS2, TT1, TT2, T[64]; for(size_t i = 0; i != blocks; ++i) { - uint32_t A = m_digest[0], B = m_digest[1], C = m_digest[2], D = m_digest[3], - E = m_digest[4], F = m_digest[5], G = m_digest[6], H = m_digest[7]; - - load_be(m_M.data(), input, m_M.size()); // Message Extension (a) - for (size_t j = 0; j < 16; j++) - { - W[j] = m_M[j]; - } + W[ 0] = load_be<uint32_t>(input, 0); + W[ 1] = load_be<uint32_t>(input, 1); + W[ 2] = load_be<uint32_t>(input, 2); + W[ 3] = load_be<uint32_t>(input, 3); + W[ 4] = load_be<uint32_t>(input, 4); + W[ 5] = load_be<uint32_t>(input, 5); + W[ 6] = load_be<uint32_t>(input, 6); + W[ 7] = load_be<uint32_t>(input, 7); + W[ 8] = load_be<uint32_t>(input, 8); + W[ 9] = load_be<uint32_t>(input, 9); + W[10] = load_be<uint32_t>(input, 10); + W[11] = load_be<uint32_t>(input, 11); + W[12] = load_be<uint32_t>(input, 12); + W[13] = load_be<uint32_t>(input, 13); + W[14] = load_be<uint32_t>(input, 14); + W[15] = load_be<uint32_t>(input, 15); + // Message Extension (b) - for (size_t j = 16; j < 68; j++) - { - W[j] = P1(W[j-16] ^ W[j-9] ^ rotate_left(W[j-3], 15)) ^ rotate_left(W[j-13], 7) ^ W[j-6]; - } + W[16] = P1(W[ 0] ^ W[ 7] ^ rotate_left(W[13], 15)) ^ rotate_left(W[ 3], 7) ^ W[10]; + W[17] = P1(W[ 1] ^ W[ 8] ^ rotate_left(W[14], 15)) ^ rotate_left(W[ 4], 7) ^ W[11]; + W[18] = P1(W[ 2] ^ W[ 9] ^ rotate_left(W[15], 15)) ^ rotate_left(W[ 5], 7) ^ W[12]; + W[19] = P1(W[ 3] ^ W[10] ^ rotate_left(W[16], 15)) ^ rotate_left(W[ 6], 7) ^ W[13]; + W[20] = P1(W[ 4] ^ W[11] ^ rotate_left(W[17], 15)) ^ rotate_left(W[ 7], 7) ^ W[14]; + W[21] = P1(W[ 5] ^ W[12] ^ rotate_left(W[18], 15)) ^ rotate_left(W[ 8], 7) ^ W[15]; + W[22] = P1(W[ 6] ^ W[13] ^ rotate_left(W[19], 15)) ^ rotate_left(W[ 9], 7) ^ W[16]; + W[23] = P1(W[ 7] ^ W[14] ^ rotate_left(W[20], 15)) ^ rotate_left(W[10], 7) ^ W[17]; + W[24] = P1(W[ 8] ^ W[15] ^ rotate_left(W[21], 15)) ^ rotate_left(W[11], 7) ^ W[18]; + W[25] = P1(W[ 9] ^ W[16] ^ rotate_left(W[22], 15)) ^ rotate_left(W[12], 7) ^ W[19]; + W[26] = P1(W[10] ^ W[17] ^ rotate_left(W[23], 15)) ^ rotate_left(W[13], 7) ^ W[20]; + W[27] = P1(W[11] ^ W[18] ^ rotate_left(W[24], 15)) ^ rotate_left(W[14], 7) ^ W[21]; + W[28] = P1(W[12] ^ W[19] ^ rotate_left(W[25], 15)) ^ rotate_left(W[15], 7) ^ W[22]; + W[29] = P1(W[13] ^ W[20] ^ rotate_left(W[26], 15)) ^ rotate_left(W[16], 7) ^ W[23]; + W[30] = P1(W[14] ^ W[21] ^ rotate_left(W[27], 15)) ^ rotate_left(W[17], 7) ^ W[24]; + W[31] = P1(W[15] ^ W[22] ^ rotate_left(W[28], 15)) ^ rotate_left(W[18], 7) ^ W[25]; + W[32] = P1(W[16] ^ W[23] ^ rotate_left(W[29], 15)) ^ rotate_left(W[19], 7) ^ W[26]; + W[33] = P1(W[17] ^ W[24] ^ rotate_left(W[30], 15)) ^ rotate_left(W[20], 7) ^ W[27]; + W[34] = P1(W[18] ^ W[25] ^ rotate_left(W[31], 15)) ^ rotate_left(W[21], 7) ^ W[28]; + W[35] = P1(W[19] ^ W[26] ^ rotate_left(W[32], 15)) ^ rotate_left(W[22], 7) ^ W[29]; + W[36] = P1(W[20] ^ W[27] ^ rotate_left(W[33], 15)) ^ rotate_left(W[23], 7) ^ W[30]; + W[37] = P1(W[21] ^ W[28] ^ rotate_left(W[34], 15)) ^ rotate_left(W[24], 7) ^ W[31]; + W[38] = P1(W[22] ^ W[29] ^ rotate_left(W[35], 15)) ^ rotate_left(W[25], 7) ^ W[32]; + W[39] = P1(W[23] ^ W[30] ^ rotate_left(W[36], 15)) ^ rotate_left(W[26], 7) ^ W[33]; + W[40] = P1(W[24] ^ W[31] ^ rotate_left(W[37], 15)) ^ rotate_left(W[27], 7) ^ W[34]; + W[41] = P1(W[25] ^ W[32] ^ rotate_left(W[38], 15)) ^ rotate_left(W[28], 7) ^ W[35]; + W[42] = P1(W[26] ^ W[33] ^ rotate_left(W[39], 15)) ^ rotate_left(W[29], 7) ^ W[36]; + W[43] = P1(W[27] ^ W[34] ^ rotate_left(W[40], 15)) ^ rotate_left(W[30], 7) ^ W[37]; + W[44] = P1(W[28] ^ W[35] ^ rotate_left(W[41], 15)) ^ rotate_left(W[31], 7) ^ W[38]; + W[45] = P1(W[29] ^ W[36] ^ rotate_left(W[42], 15)) ^ rotate_left(W[32], 7) ^ W[39]; + W[46] = P1(W[30] ^ W[37] ^ rotate_left(W[43], 15)) ^ rotate_left(W[33], 7) ^ W[40]; + W[47] = P1(W[31] ^ W[38] ^ rotate_left(W[44], 15)) ^ rotate_left(W[34], 7) ^ W[41]; + W[48] = P1(W[32] ^ W[39] ^ rotate_left(W[45], 15)) ^ rotate_left(W[35], 7) ^ W[42]; + W[49] = P1(W[33] ^ W[40] ^ rotate_left(W[46], 15)) ^ rotate_left(W[36], 7) ^ W[43]; + W[50] = P1(W[34] ^ W[41] ^ rotate_left(W[47], 15)) ^ rotate_left(W[37], 7) ^ W[44]; + W[51] = P1(W[35] ^ W[42] ^ rotate_left(W[48], 15)) ^ rotate_left(W[38], 7) ^ W[45]; + W[52] = P1(W[36] ^ W[43] ^ rotate_left(W[49], 15)) ^ rotate_left(W[39], 7) ^ W[46]; + W[53] = P1(W[37] ^ W[44] ^ rotate_left(W[50], 15)) ^ rotate_left(W[40], 7) ^ W[47]; + W[54] = P1(W[38] ^ W[45] ^ rotate_left(W[51], 15)) ^ rotate_left(W[41], 7) ^ W[48]; + W[55] = P1(W[39] ^ W[46] ^ rotate_left(W[52], 15)) ^ rotate_left(W[42], 7) ^ W[49]; + W[56] = P1(W[40] ^ W[47] ^ rotate_left(W[53], 15)) ^ rotate_left(W[43], 7) ^ W[50]; + W[57] = P1(W[41] ^ W[48] ^ rotate_left(W[54], 15)) ^ rotate_left(W[44], 7) ^ W[51]; + W[58] = P1(W[42] ^ W[49] ^ rotate_left(W[55], 15)) ^ rotate_left(W[45], 7) ^ W[52]; + W[59] = P1(W[43] ^ W[50] ^ rotate_left(W[56], 15)) ^ rotate_left(W[46], 7) ^ W[53]; + W[60] = P1(W[44] ^ W[51] ^ rotate_left(W[57], 15)) ^ rotate_left(W[47], 7) ^ W[54]; + W[61] = P1(W[45] ^ W[52] ^ rotate_left(W[58], 15)) ^ rotate_left(W[48], 7) ^ W[55]; + W[62] = P1(W[46] ^ W[53] ^ rotate_left(W[59], 15)) ^ rotate_left(W[49], 7) ^ W[56]; + W[63] = P1(W[47] ^ W[54] ^ rotate_left(W[60], 15)) ^ rotate_left(W[50], 7) ^ W[57]; + W[64] = P1(W[48] ^ W[55] ^ rotate_left(W[61], 15)) ^ rotate_left(W[51], 7) ^ W[58]; + W[65] = P1(W[49] ^ W[56] ^ rotate_left(W[62], 15)) ^ rotate_left(W[52], 7) ^ W[59]; + W[66] = P1(W[50] ^ W[57] ^ rotate_left(W[63], 15)) ^ rotate_left(W[53], 7) ^ W[60]; + W[67] = P1(W[51] ^ W[58] ^ rotate_left(W[64], 15)) ^ rotate_left(W[54], 7) ^ W[61]; + // Message Extension (c) - for (size_t j = 0; j < 64; j++) - { - W1[j] = W[j] ^ W[j+4]; - } - - for (size_t j = 0; j < 16; j++) - { - T[j] = SM3_TJ_0_15; - SS1 = rotate_left(rotate_left(A, 12) + E + rotate_left(T[j], j), 7); - SS2 = SS1 ^ rotate_left(A, 12); - TT1 = FF0(A, B, C) + D + SS2 + W1[j]; - TT2 = GG0(E, F, G) + H + SS1 + W[j]; - D = C; - C = rotate_left(B, 9); - B = A; - A = TT1; - H = G; - G = rotate_left(F, 19); - F = E; - E = P0(TT2); - } - - for (size_t j = 16; j < 64; j++) - { - T[j] = SM3_TJ_16_63; - SS1 = rotate_left(rotate_left(A, 12) + E + rotate_left(T[j], j), 7); - SS2 = SS1 ^ rotate_left(A, 12); - TT1 = FF1(A, B, C) + D + SS2 + W1[j]; - TT2 = GG1(E, F, G) + H + SS1 + W[j]; - D = C; - C = rotate_left(B, 9); - B = A; - A = TT1; - H = G; - G = rotate_left(F, 19); - F = E; - E = P0(TT2); - } + W1[ 0] = W[ 0] ^ W[ 4]; + W1[ 1] = W[ 1] ^ W[ 5]; + W1[ 2] = W[ 2] ^ W[ 6]; + W1[ 3] = W[ 3] ^ W[ 7]; + W1[ 4] = W[ 4] ^ W[ 8]; + W1[ 5] = W[ 5] ^ W[ 9]; + W1[ 6] = W[ 6] ^ W[10]; + W1[ 7] = W[ 7] ^ W[11]; + W1[ 8] = W[ 8] ^ W[12]; + W1[ 9] = W[ 9] ^ W[13]; + W1[10] = W[10] ^ W[14]; + W1[11] = W[11] ^ W[15]; + W1[12] = W[12] ^ W[16]; + W1[13] = W[13] ^ W[17]; + W1[14] = W[14] ^ W[18]; + W1[15] = W[15] ^ W[19]; + W1[16] = W[16] ^ W[20]; + W1[17] = W[17] ^ W[21]; + W1[18] = W[18] ^ W[22]; + W1[19] = W[19] ^ W[23]; + W1[20] = W[20] ^ W[24]; + W1[21] = W[21] ^ W[25]; + W1[22] = W[22] ^ W[26]; + W1[23] = W[23] ^ W[27]; + W1[24] = W[24] ^ W[28]; + W1[25] = W[25] ^ W[29]; + W1[26] = W[26] ^ W[30]; + W1[27] = W[27] ^ W[31]; + W1[28] = W[28] ^ W[32]; + W1[29] = W[29] ^ W[33]; + W1[30] = W[30] ^ W[34]; + W1[31] = W[31] ^ W[35]; + W1[32] = W[32] ^ W[36]; + W1[33] = W[33] ^ W[37]; + W1[34] = W[34] ^ W[38]; + W1[35] = W[35] ^ W[39]; + W1[36] = W[36] ^ W[40]; + W1[37] = W[37] ^ W[41]; + W1[38] = W[38] ^ W[42]; + W1[39] = W[39] ^ W[43]; + W1[40] = W[40] ^ W[44]; + W1[41] = W[41] ^ W[45]; + W1[42] = W[42] ^ W[46]; + W1[43] = W[43] ^ W[47]; + W1[44] = W[44] ^ W[48]; + W1[45] = W[45] ^ W[49]; + W1[46] = W[46] ^ W[50]; + W1[47] = W[47] ^ W[51]; + W1[48] = W[48] ^ W[52]; + W1[49] = W[49] ^ W[53]; + W1[50] = W[50] ^ W[54]; + W1[51] = W[51] ^ W[55]; + W1[52] = W[52] ^ W[56]; + W1[53] = W[53] ^ W[57]; + W1[54] = W[54] ^ W[58]; + W1[55] = W[55] ^ W[59]; + W1[56] = W[56] ^ W[60]; + W1[57] = W[57] ^ W[61]; + W1[58] = W[58] ^ W[62]; + W1[59] = W[59] ^ W[63]; + W1[60] = W[60] ^ W[64]; + W1[61] = W[61] ^ W[65]; + W1[62] = W[62] ^ W[66]; + W1[63] = W[63] ^ W[67]; + + SM3_CF0( 0); + SM3_CF0( 1); + SM3_CF0( 2); + SM3_CF0( 3); + SM3_CF0( 4); + SM3_CF0( 5); + SM3_CF0( 6); + SM3_CF0( 7); + SM3_CF0( 8); + SM3_CF0( 9); + SM3_CF0(10); + SM3_CF0(11); + SM3_CF0(12); + SM3_CF0(13); + SM3_CF0(14); + SM3_CF0(15); + + SM3_CF1(16); + SM3_CF1(17); + SM3_CF1(18); + SM3_CF1(19); + SM3_CF1(20); + SM3_CF1(21); + SM3_CF1(22); + SM3_CF1(23); + SM3_CF1(24); + SM3_CF1(25); + SM3_CF1(26); + SM3_CF1(27); + SM3_CF1(28); + SM3_CF1(29); + SM3_CF1(30); + SM3_CF1(31); + SM3_CF1(32); + SM3_CF1(33); + SM3_CF1(34); + SM3_CF1(35); + SM3_CF1(36); + SM3_CF1(37); + SM3_CF1(38); + SM3_CF1(39); + SM3_CF1(40); + SM3_CF1(41); + SM3_CF1(42); + SM3_CF1(43); + SM3_CF1(44); + SM3_CF1(45); + SM3_CF1(46); + SM3_CF1(47); + SM3_CF1(48); + SM3_CF1(49); + SM3_CF1(50); + SM3_CF1(51); + SM3_CF1(52); + SM3_CF1(53); + SM3_CF1(54); + SM3_CF1(55); + SM3_CF1(56); + SM3_CF1(57); + SM3_CF1(58); + SM3_CF1(59); + SM3_CF1(60); + SM3_CF1(61); + SM3_CF1(62); + SM3_CF1(63); - m_digest[0] ^= A; - m_digest[1] ^= B; - m_digest[2] ^= C; - m_digest[3] ^= D; - m_digest[4] ^= E; - m_digest[5] ^= F; - m_digest[6] ^= G; - m_digest[7] ^= H; + A = (m_digest[0] ^= A); + B = (m_digest[1] ^= B); + C = (m_digest[2] ^= C); + D = (m_digest[3] ^= D); + E = (m_digest[4] ^= E); + F = (m_digest[5] ^= F); + G = (m_digest[6] ^= G); + H = (m_digest[7] ^= H); input += hash_block_size(); } @@ -143,7 +325,6 @@ void SM3::copy_out(uint8_t output[]) void SM3::clear() { MDx_HashFunction::clear(); - zeroise(m_M); std::copy(std::begin(SM3_IV), std::end(SM3_IV), m_digest.begin()); } diff --git a/src/lib/hash/sm3/sm3.h b/src/lib/hash/sm3/sm3.h index 896482332..c5dacdf3a 100644 --- a/src/lib/hash/sm3/sm3.h +++ b/src/lib/hash/sm3/sm3.h @@ -29,7 +29,7 @@ class BOTAN_DLL SM3 final : public MDx_HashFunction void clear() override; - SM3() : MDx_HashFunction(SM3_BLOCK_BYTES, true, true), m_M(16), m_digest(SM3_DIGEST_BYTES) + SM3() : MDx_HashFunction(SM3_BLOCK_BYTES, true, true), m_digest(SM3_DIGEST_BYTES) { clear(); } protected: void compress_n(const uint8_t[], size_t blocks) override; @@ -37,11 +37,6 @@ class BOTAN_DLL SM3 final : public MDx_HashFunction private: /** - * The message buffer - */ - secure_vector<uint32_t> m_M; - - /** * The digest value */ secure_vector<uint32_t> m_digest; |