aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2019-02-21 10:04:28 -0500
committerJack Lloyd <[email protected]>2019-02-21 10:04:28 -0500
commit331551087a341f0fdd758045c3f37d8d96a5d049 (patch)
tree13dffeb134d62fd8d25fe4187ef742974756f451
parent86126a29a356cfe7d9132ddcfba376ccc460d8d3 (diff)
Unroll SHA-3
Improves performance by about 10-12%
-rw-r--r--src/lib/hash/sha3/sha3.cpp145
-rw-r--r--src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp145
-rw-r--r--src/tests/data/hash/sha2_64.vec2
-rw-r--r--src/tests/data/hash/sha3.vec2
4 files changed, 160 insertions, 134 deletions
diff --git a/src/lib/hash/sha3/sha3.cpp b/src/lib/hash/sha3/sha3.cpp
index 837768f85..d01d33635 100644
--- a/src/lib/hash/sha3/sha3.cpp
+++ b/src/lib/hash/sha3/sha3.cpp
@@ -12,6 +12,80 @@
namespace Botan {
+namespace {
+
+inline void SHA3_round(uint64_t T[25], const uint64_t A[25], uint64_t RC)
+ {
+ const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
+ const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
+ const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
+ const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
+ const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
+
+ const uint64_t D0 = rotl<1>(C0) ^ C3;
+ const uint64_t D1 = rotl<1>(C1) ^ C4;
+ const uint64_t D2 = rotl<1>(C2) ^ C0;
+ const uint64_t D3 = rotl<1>(C3) ^ C1;
+ const uint64_t D4 = rotl<1>(C4) ^ C2;
+
+ const uint64_t B00 = A[ 0] ^ D1;
+ const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
+ const uint64_t B02 = rotl<43>(A[12] ^ D3);
+ const uint64_t B03 = rotl<21>(A[18] ^ D4);
+ const uint64_t B04 = rotl<14>(A[24] ^ D0);
+ T[ 0] = B00 ^ (~B01 & B02) ^ RC;
+ T[ 1] = B01 ^ (~B02 & B03);
+ T[ 2] = B02 ^ (~B03 & B04);
+ T[ 3] = B03 ^ (~B04 & B00);
+ T[ 4] = B04 ^ (~B00 & B01);
+
+ const uint64_t B05 = rotl<28>(A[ 3] ^ D4);
+ const uint64_t B06 = rotl<20>(A[ 9] ^ D0);
+ const uint64_t B07 = rotl< 3>(A[10] ^ D1);
+ const uint64_t B08 = rotl<45>(A[16] ^ D2);
+ const uint64_t B09 = rotl<61>(A[22] ^ D3);
+ T[ 5] = B05 ^ (~B06 & B07);
+ T[ 6] = B06 ^ (~B07 & B08);
+ T[ 7] = B07 ^ (~B08 & B09);
+ T[ 8] = B08 ^ (~B09 & B05);
+ T[ 9] = B09 ^ (~B05 & B06);
+
+ const uint64_t B10 = rotl< 1>(A[ 1] ^ D2);
+ const uint64_t B11 = rotl< 6>(A[ 7] ^ D3);
+ const uint64_t B12 = rotl<25>(A[13] ^ D4);
+ const uint64_t B13 = rotl< 8>(A[19] ^ D0);
+ const uint64_t B14 = rotl<18>(A[20] ^ D1);
+ T[10] = B10 ^ (~B11 & B12);
+ T[11] = B11 ^ (~B12 & B13);
+ T[12] = B12 ^ (~B13 & B14);
+ T[13] = B13 ^ (~B14 & B10);
+ T[14] = B14 ^ (~B10 & B11);
+
+ const uint64_t B15 = rotl<27>(A[ 4] ^ D0);
+ const uint64_t B16 = rotl<36>(A[ 5] ^ D1);
+ const uint64_t B17 = rotl<10>(A[11] ^ D2);
+ const uint64_t B18 = rotl<15>(A[17] ^ D3);
+ const uint64_t B19 = rotl<56>(A[23] ^ D4);
+ T[15] = B15 ^ (~B16 & B17);
+ T[16] = B16 ^ (~B17 & B18);
+ T[17] = B17 ^ (~B18 & B19);
+ T[18] = B18 ^ (~B19 & B15);
+ T[19] = B19 ^ (~B15 & B16);
+
+ const uint64_t B20 = rotl<62>(A[ 2] ^ D3);
+ const uint64_t B21 = rotl<55>(A[ 8] ^ D4);
+ const uint64_t B22 = rotl<39>(A[14] ^ D0);
+ const uint64_t B23 = rotl<41>(A[15] ^ D1);
+ const uint64_t B24 = rotl< 2>(A[21] ^ D2);
+ T[20] = B20 ^ (~B21 & B22);
+ T[21] = B21 ^ (~B22 & B23);
+ T[22] = B22 ^ (~B23 & B24);
+ T[23] = B23 ^ (~B24 & B20);
+ T[24] = B24 ^ (~B20 & B21);
+ }
+
+}
+
//static
void SHA_3::permute(uint64_t A[25])
{
@@ -33,73 +107,12 @@ void SHA_3::permute(uint64_t A[25])
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
- for(size_t i = 0; i != 24; ++i)
+ uint64_t T[25];
+
+ for(size_t i = 0; i != 24; i += 2)
{
- const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
- const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
- const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
- const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
- const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
-
- const uint64_t D0 = rotl<1>(C0) ^ C3;
- const uint64_t D1 = rotl<1>(C1) ^ C4;
- const uint64_t D2 = rotl<1>(C2) ^ C0;
- const uint64_t D3 = rotl<1>(C3) ^ C1;
- const uint64_t D4 = rotl<1>(C4) ^ C2;
-
- const uint64_t B00 = A[ 0] ^ D1;
- const uint64_t B10 = rotl< 1>(A[ 1] ^ D2);
- const uint64_t B20 = rotl<62>(A[ 2] ^ D3);
- const uint64_t B05 = rotl<28>(A[ 3] ^ D4);
- const uint64_t B15 = rotl<27>(A[ 4] ^ D0);
- const uint64_t B16 = rotl<36>(A[ 5] ^ D1);
- const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
- const uint64_t B11 = rotl< 6>(A[ 7] ^ D3);
- const uint64_t B21 = rotl<55>(A[ 8] ^ D4);
- const uint64_t B06 = rotl<20>(A[ 9] ^ D0);
- const uint64_t B07 = rotl< 3>(A[10] ^ D1);
- const uint64_t B17 = rotl<10>(A[11] ^ D2);
- const uint64_t B02 = rotl<43>(A[12] ^ D3);
- const uint64_t B12 = rotl<25>(A[13] ^ D4);
- const uint64_t B22 = rotl<39>(A[14] ^ D0);
- const uint64_t B23 = rotl<41>(A[15] ^ D1);
- const uint64_t B08 = rotl<45>(A[16] ^ D2);
- const uint64_t B18 = rotl<15>(A[17] ^ D3);
- const uint64_t B03 = rotl<21>(A[18] ^ D4);
- const uint64_t B13 = rotl< 8>(A[19] ^ D0);
- const uint64_t B14 = rotl<18>(A[20] ^ D1);
- const uint64_t B24 = rotl< 2>(A[21] ^ D2);
- const uint64_t B09 = rotl<61>(A[22] ^ D3);
- const uint64_t B19 = rotl<56>(A[23] ^ D4);
- const uint64_t B04 = rotl<14>(A[24] ^ D0);
-
- A[ 0] = B00 ^ (~B01 & B02);
- A[ 1] = B01 ^ (~B02 & B03);
- A[ 2] = B02 ^ (~B03 & B04);
- A[ 3] = B03 ^ (~B04 & B00);
- A[ 4] = B04 ^ (~B00 & B01);
- A[ 5] = B05 ^ (~B06 & B07);
- A[ 6] = B06 ^ (~B07 & B08);
- A[ 7] = B07 ^ (~B08 & B09);
- A[ 8] = B08 ^ (~B09 & B05);
- A[ 9] = B09 ^ (~B05 & B06);
- A[10] = B10 ^ (~B11 & B12);
- A[11] = B11 ^ (~B12 & B13);
- A[12] = B12 ^ (~B13 & B14);
- A[13] = B13 ^ (~B14 & B10);
- A[14] = B14 ^ (~B10 & B11);
- A[15] = B15 ^ (~B16 & B17);
- A[16] = B16 ^ (~B17 & B18);
- A[17] = B17 ^ (~B18 & B19);
- A[18] = B18 ^ (~B19 & B15);
- A[19] = B19 ^ (~B15 & B16);
- A[20] = B20 ^ (~B21 & B22);
- A[21] = B21 ^ (~B22 & B23);
- A[22] = B22 ^ (~B23 & B24);
- A[23] = B23 ^ (~B24 & B20);
- A[24] = B24 ^ (~B20 & B21);
-
- A[0] ^= RC[i];
+ SHA3_round(T, A, RC[i+0]);
+ SHA3_round(A, T, RC[i+1]);
}
}
diff --git a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
index f2161b9ba..420a90f9f 100644
--- a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
+++ b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
@@ -10,6 +10,80 @@
namespace Botan {
+namespace {
+
+inline void SHA3_round(uint64_t T[25], const uint64_t A[25], uint64_t RC)
+ {
+ const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
+ const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
+ const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
+ const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
+ const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
+
+ const uint64_t D0 = rotl<1>(C0) ^ C3;
+ const uint64_t D1 = rotl<1>(C1) ^ C4;
+ const uint64_t D2 = rotl<1>(C2) ^ C0;
+ const uint64_t D3 = rotl<1>(C3) ^ C1;
+ const uint64_t D4 = rotl<1>(C4) ^ C2;
+
+ const uint64_t B00 = A[ 0] ^ D1;
+ const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
+ const uint64_t B02 = rotl<43>(A[12] ^ D3);
+ const uint64_t B03 = rotl<21>(A[18] ^ D4);
+ const uint64_t B04 = rotl<14>(A[24] ^ D0);
+ T[ 0] = B00 ^ (~B01 & B02) ^ RC;
+ T[ 1] = B01 ^ (~B02 & B03);
+ T[ 2] = B02 ^ (~B03 & B04);
+ T[ 3] = B03 ^ (~B04 & B00);
+ T[ 4] = B04 ^ (~B00 & B01);
+
+ const uint64_t B05 = rotl<28>(A[ 3] ^ D4);
+ const uint64_t B06 = rotl<20>(A[ 9] ^ D0);
+ const uint64_t B07 = rotl< 3>(A[10] ^ D1);
+ const uint64_t B08 = rotl<45>(A[16] ^ D2);
+ const uint64_t B09 = rotl<61>(A[22] ^ D3);
+ T[ 5] = B05 ^ (~B06 & B07);
+ T[ 6] = B06 ^ (~B07 & B08);
+ T[ 7] = B07 ^ (~B08 & B09);
+ T[ 8] = B08 ^ (~B09 & B05);
+ T[ 9] = B09 ^ (~B05 & B06);
+
+ const uint64_t B10 = rotl< 1>(A[ 1] ^ D2);
+ const uint64_t B11 = rotl< 6>(A[ 7] ^ D3);
+ const uint64_t B12 = rotl<25>(A[13] ^ D4);
+ const uint64_t B13 = rotl< 8>(A[19] ^ D0);
+ const uint64_t B14 = rotl<18>(A[20] ^ D1);
+ T[10] = B10 ^ (~B11 & B12);
+ T[11] = B11 ^ (~B12 & B13);
+ T[12] = B12 ^ (~B13 & B14);
+ T[13] = B13 ^ (~B14 & B10);
+ T[14] = B14 ^ (~B10 & B11);
+
+ const uint64_t B15 = rotl<27>(A[ 4] ^ D0);
+ const uint64_t B16 = rotl<36>(A[ 5] ^ D1);
+ const uint64_t B17 = rotl<10>(A[11] ^ D2);
+ const uint64_t B18 = rotl<15>(A[17] ^ D3);
+ const uint64_t B19 = rotl<56>(A[23] ^ D4);
+ T[15] = B15 ^ (~B16 & B17);
+ T[16] = B16 ^ (~B17 & B18);
+ T[17] = B17 ^ (~B18 & B19);
+ T[18] = B18 ^ (~B19 & B15);
+ T[19] = B19 ^ (~B15 & B16);
+
+ const uint64_t B20 = rotl<62>(A[ 2] ^ D3);
+ const uint64_t B21 = rotl<55>(A[ 8] ^ D4);
+ const uint64_t B22 = rotl<39>(A[14] ^ D0);
+ const uint64_t B23 = rotl<41>(A[15] ^ D1);
+ const uint64_t B24 = rotl< 2>(A[21] ^ D2);
+ T[20] = B20 ^ (~B21 & B22);
+ T[21] = B21 ^ (~B22 & B23);
+ T[22] = B22 ^ (~B23 & B24);
+ T[23] = B23 ^ (~B24 & B20);
+ T[24] = B24 ^ (~B20 & B21);
+ }
+
+}
+
void SHA_3::permute_bmi2(uint64_t A[25])
{
static const uint64_t RC[24] = {
@@ -23,73 +97,12 @@ void SHA_3::permute_bmi2(uint64_t A[25])
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
- for(size_t i = 0; i != 24; ++i)
- {
- const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
- const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
- const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
- const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
- const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
-
- const uint64_t D0 = rotl<1>(C0) ^ C3;
- const uint64_t D1 = rotl<1>(C1) ^ C4;
- const uint64_t D2 = rotl<1>(C2) ^ C0;
- const uint64_t D3 = rotl<1>(C3) ^ C1;
- const uint64_t D4 = rotl<1>(C4) ^ C2;
-
- const uint64_t B00 = A[ 0] ^ D1;
- const uint64_t B10 = rotl< 1>(A[ 1] ^ D2);
- const uint64_t B20 = rotl<62>(A[ 2] ^ D3);
- const uint64_t B05 = rotl<28>(A[ 3] ^ D4);
- const uint64_t B15 = rotl<27>(A[ 4] ^ D0);
- const uint64_t B16 = rotl<36>(A[ 5] ^ D1);
- const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
- const uint64_t B11 = rotl< 6>(A[ 7] ^ D3);
- const uint64_t B21 = rotl<55>(A[ 8] ^ D4);
- const uint64_t B06 = rotl<20>(A[ 9] ^ D0);
- const uint64_t B07 = rotl< 3>(A[10] ^ D1);
- const uint64_t B17 = rotl<10>(A[11] ^ D2);
- const uint64_t B02 = rotl<43>(A[12] ^ D3);
- const uint64_t B12 = rotl<25>(A[13] ^ D4);
- const uint64_t B22 = rotl<39>(A[14] ^ D0);
- const uint64_t B23 = rotl<41>(A[15] ^ D1);
- const uint64_t B08 = rotl<45>(A[16] ^ D2);
- const uint64_t B18 = rotl<15>(A[17] ^ D3);
- const uint64_t B03 = rotl<21>(A[18] ^ D4);
- const uint64_t B13 = rotl< 8>(A[19] ^ D0);
- const uint64_t B14 = rotl<18>(A[20] ^ D1);
- const uint64_t B24 = rotl< 2>(A[21] ^ D2);
- const uint64_t B09 = rotl<61>(A[22] ^ D3);
- const uint64_t B19 = rotl<56>(A[23] ^ D4);
- const uint64_t B04 = rotl<14>(A[24] ^ D0);
+ uint64_t T[25];
- A[ 0] = B00 ^ (~B01 & B02);
- A[ 1] = B01 ^ (~B02 & B03);
- A[ 2] = B02 ^ (~B03 & B04);
- A[ 3] = B03 ^ (~B04 & B00);
- A[ 4] = B04 ^ (~B00 & B01);
- A[ 5] = B05 ^ (~B06 & B07);
- A[ 6] = B06 ^ (~B07 & B08);
- A[ 7] = B07 ^ (~B08 & B09);
- A[ 8] = B08 ^ (~B09 & B05);
- A[ 9] = B09 ^ (~B05 & B06);
- A[10] = B10 ^ (~B11 & B12);
- A[11] = B11 ^ (~B12 & B13);
- A[12] = B12 ^ (~B13 & B14);
- A[13] = B13 ^ (~B14 & B10);
- A[14] = B14 ^ (~B10 & B11);
- A[15] = B15 ^ (~B16 & B17);
- A[16] = B16 ^ (~B17 & B18);
- A[17] = B17 ^ (~B18 & B19);
- A[18] = B18 ^ (~B19 & B15);
- A[19] = B19 ^ (~B15 & B16);
- A[20] = B20 ^ (~B21 & B22);
- A[21] = B21 ^ (~B22 & B23);
- A[22] = B22 ^ (~B23 & B24);
- A[23] = B23 ^ (~B24 & B20);
- A[24] = B24 ^ (~B20 & B21);
-
- A[0] ^= RC[i];
+ for(size_t i = 0; i != 24; i += 2)
+ {
+ SHA3_round(T, A, RC[i+0]);
+ SHA3_round(A, T, RC[i+1]);
}
}
diff --git a/src/tests/data/hash/sha2_64.vec b/src/tests/data/hash/sha2_64.vec
index 85eae0867..f52af36ce 100644
--- a/src/tests/data/hash/sha2_64.vec
+++ b/src/tests/data/hash/sha2_64.vec
@@ -1,4 +1,4 @@
-#cpuid bmi2
+#test cpuid bmi2
[SHA-384]
In =
diff --git a/src/tests/data/hash/sha3.vec b/src/tests/data/hash/sha3.vec
index 0b36b6166..9b423fe71 100644
--- a/src/tests/data/hash/sha3.vec
+++ b/src/tests/data/hash/sha3.vec
@@ -1,4 +1,4 @@
-#cpuid bmi2
+#test cpuid bmi2
[SHA-3(224)]
In =