aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/hash
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2018-05-27 11:38:30 -0400
committerJack Lloyd <[email protected]>2018-05-27 12:01:55 -0400
commit8df48e74987fb2ab3c97adb2b48c2cafc0ea381b (patch)
tree624df6b225783f611e893a16ffc1fe23f730fd2d /src/lib/hash
parentf256e603ab93e2587fe0e38bcf06c437fcd41abf (diff)
Add BMI2-specific SHA-256
Currently just a copy of the baseline compression function, but compiled with BMI2 flags. On Skylake improves performance by about 40%.
Diffstat (limited to 'src/lib/hash')
-rw-r--r--src/lib/hash/sha2_32/sha2_32.cpp14
-rw-r--r--src/lib/hash/sha2_32/sha2_32.h6
-rw-r--r--src/lib/hash/sha2_32/sha2_32_bmi2/info.txt10
-rw-r--r--src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp139
4 files changed, 167 insertions, 2 deletions
diff --git a/src/lib/hash/sha2_32/sha2_32.cpp b/src/lib/hash/sha2_32/sha2_32.cpp
index 0710747d0..99cc2a6ff 100644
--- a/src/lib/hash/sha2_32/sha2_32.cpp
+++ b/src/lib/hash/sha2_32/sha2_32.cpp
@@ -51,6 +51,13 @@ void SHA_256::compress_digest(secure_vector<uint32_t>& digest,
}
#endif
+#if defined(BOTAN_HAS_SHA2_32_X86_BMI2)
+ if(CPUID::has_bmi2())
+ {
+ return SHA_256::compress_digest_x86_bmi2(digest, input, blocks);
+ }
+#endif
+
#if defined(BOTAN_HAS_SHA2_32_ARMV8)
if(CPUID::has_arm_sha2())
{
@@ -59,8 +66,8 @@ void SHA_256::compress_digest(secure_vector<uint32_t>& digest,
#endif
uint32_t A = digest[0], B = digest[1], C = digest[2],
- D = digest[3], E = digest[4], F = digest[5],
- G = digest[6], H = digest[7];
+ D = digest[3], E = digest[4], F = digest[5],
+ G = digest[6], H = digest[7];
for(size_t i = 0; i != blocks; ++i)
{
@@ -97,6 +104,7 @@ void SHA_256::compress_digest(secure_vector<uint32_t>& digest,
SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
+
SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
@@ -113,6 +121,7 @@ void SHA_256::compress_digest(secure_vector<uint32_t>& digest,
SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
+
SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
@@ -129,6 +138,7 @@ void SHA_256::compress_digest(secure_vector<uint32_t>& digest,
SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
+
SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
diff --git a/src/lib/hash/sha2_32/sha2_32.h b/src/lib/hash/sha2_32/sha2_32.h
index 6a0d87ac7..bc883f77a 100644
--- a/src/lib/hash/sha2_32/sha2_32.h
+++ b/src/lib/hash/sha2_32/sha2_32.h
@@ -66,6 +66,12 @@ class BOTAN_PUBLIC_API(2,0) SHA_256 final : public MDx_HashFunction
size_t blocks);
#endif
+#if defined(BOTAN_HAS_SHA2_32_X86_BMI2)
+ static void compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
+ const uint8_t input[],
+ size_t blocks);
+#endif
+
#if defined(BOTAN_HAS_SHA2_32_X86)
static void compress_digest_x86(secure_vector<uint32_t>& digest,
const uint8_t input[],
diff --git a/src/lib/hash/sha2_32/sha2_32_bmi2/info.txt b/src/lib/hash/sha2_32/sha2_32_bmi2/info.txt
new file mode 100644
index 000000000..dc7349716
--- /dev/null
+++ b/src/lib/hash/sha2_32/sha2_32_bmi2/info.txt
@@ -0,0 +1,10 @@
+<defines>
+SHA2_32_X86_BMI2 -> 20180526
+</defines>
+
+need_isa bmi2
+
+<cc>
+gcc
+clang
+</cc>
diff --git a/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp b/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
new file mode 100644
index 000000000..12ceb11c4
--- /dev/null
+++ b/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
@@ -0,0 +1,139 @@
+/*
+* (C) 2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/sha2_32.h>
+#include <botan/rotate.h>
+
+namespace Botan {
+
+/*
+Your eyes do not decieve you; this is currently just a copy of the
+baseline SHA-256 implementation. Because we compile it with BMI2
+flags, GCC and Clang use the BMI2 instructions without further help.
+
+Likely instruction scheduling could be improved by using inline asm.
+*/
+
+#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do { \
+ uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); \
+ uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); \
+ uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10); \
+ uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3); \
+ H += magic + E_rho + ((E & F) ^ (~E & G)) + M1; \
+ D += H; \
+ H += A_rho + ((A & B) | ((A | B) & C)); \
+ M1 += M2_sigma + M3 + M4_sigma; \
+ } while(0);
+
+void SHA_256::compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
+ const uint8_t input[],
+ size_t blocks)
+ {
+ uint32_t A = digest[0], B = digest[1], C = digest[2],
+ D = digest[3], E = digest[4], F = digest[5],
+ G = digest[6], H = digest[7];
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint32_t W00 = load_be<uint32_t>(input, 0);
+ uint32_t W01 = load_be<uint32_t>(input, 1);
+ uint32_t W02 = load_be<uint32_t>(input, 2);
+ uint32_t W03 = load_be<uint32_t>(input, 3);
+ uint32_t W04 = load_be<uint32_t>(input, 4);
+ uint32_t W05 = load_be<uint32_t>(input, 5);
+ uint32_t W06 = load_be<uint32_t>(input, 6);
+ uint32_t W07 = load_be<uint32_t>(input, 7);
+ uint32_t W08 = load_be<uint32_t>(input, 8);
+ uint32_t W09 = load_be<uint32_t>(input, 9);
+ uint32_t W10 = load_be<uint32_t>(input, 10);
+ uint32_t W11 = load_be<uint32_t>(input, 11);
+ uint32_t W12 = load_be<uint32_t>(input, 12);
+ uint32_t W13 = load_be<uint32_t>(input, 13);
+ uint32_t W14 = load_be<uint32_t>(input, 14);
+ uint32_t W15 = load_be<uint32_t>(input, 15);
+
+ SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x71374491);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCF);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA5);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25B);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5);
+ SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B01);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
+
+ SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DC);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA);
+ SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C8);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF3);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
+
+ SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D13);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A7354);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C85);
+ SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A1);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664B);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A3);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
+
+ SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4A);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3);
+ SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE);
+ SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F);
+ SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814);
+ SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC70208);
+ SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA);
+ SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEB);
+ SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7);
+ SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2);
+
+ A = (digest[0] += A);
+ B = (digest[1] += B);
+ C = (digest[2] += C);
+ D = (digest[3] += D);
+ E = (digest[4] += E);
+ F = (digest[5] += F);
+ G = (digest[6] += G);
+ H = (digest[7] += H);
+
+ input += 64;
+ }
+ }
+
+}