aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2019-01-18 11:18:09 -0500
committerJack Lloyd <[email protected]>2019-01-18 11:18:09 -0500
commit4cbd51eb23a4122659a3afcee35e4bb79f4ff803 (patch)
tree60512b1ee2f1ec9ce7bb1007418b4767017de166 /src/lib
parent9ca22335edcb9800158f4691de20fa5d0f9cc849 (diff)
Add BMI2 variants for SHA-512 and SHA-3
Both about 33% faster on Skylake
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/hash/sha2_32/sha2_32.cpp40
-rw-r--r--src/lib/hash/sha2_32/sha2_32.h4
-rw-r--r--src/lib/hash/sha2_64/sha2_64.cpp58
-rw-r--r--src/lib/hash/sha2_64/sha2_64.h18
-rw-r--r--src/lib/hash/sha2_64/sha2_64_bmi2/info.txt15
-rw-r--r--src/lib/hash/sha2_64/sha2_64_bmi2/sha2_64_bmi2.cpp152
-rw-r--r--src/lib/hash/sha3/sha3.cpp20
-rw-r--r--src/lib/hash/sha3/sha3.h5
-rw-r--r--src/lib/hash/sha3/sha3_bmi2/info.txt15
-rw-r--r--src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp96
10 files changed, 413 insertions, 10 deletions
diff --git a/src/lib/hash/sha2_32/sha2_32.cpp b/src/lib/hash/sha2_32/sha2_32.cpp
index 9f1cfd2dc..1f1ab69f4 100644
--- a/src/lib/hash/sha2_32/sha2_32.cpp
+++ b/src/lib/hash/sha2_32/sha2_32.cpp
@@ -12,6 +12,36 @@
namespace Botan {
+namespace {
+
+std::string sha256_provider()
+ {
+#if defined(BOTAN_HAS_SHA2_32_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return "shani";
+ }
+#endif
+
+#if defined(BOTAN_HAS_SHA2_32_X86_BMI2)
+ if(CPUID::has_bmi2())
+ {
+ return "bmi2";
+ }
+#endif
+
+#if defined(BOTAN_HAS_SHA2_32_ARMV8)
+ if(CPUID::has_arm_sha2())
+ {
+ return "armv8";
+ }
+#endif
+
+ return "base";
+ }
+
+}
+
std::unique_ptr<HashFunction> SHA_224::copy_state() const
{
return std::unique_ptr<HashFunction>(new SHA_224(*this));
@@ -170,6 +200,16 @@ void SHA_256::compress_digest(secure_vector<uint32_t>& digest,
}
}
+std::string SHA_224::provider() const
+ {
+ return sha256_provider();
+ }
+
+std::string SHA_256::provider() const
+ {
+ return sha256_provider();
+ }
+
/*
* SHA-224 compression function
*/
diff --git a/src/lib/hash/sha2_32/sha2_32.h b/src/lib/hash/sha2_32/sha2_32.h
index bc883f77a..7a3fce9d2 100644
--- a/src/lib/hash/sha2_32/sha2_32.h
+++ b/src/lib/hash/sha2_32/sha2_32.h
@@ -26,6 +26,8 @@ class BOTAN_PUBLIC_API(2,0) SHA_224 final : public MDx_HashFunction
void clear() override;
+ std::string provider() const override;
+
SHA_224() : MDx_HashFunction(64, true, true), m_digest(8)
{ clear(); }
private:
@@ -48,6 +50,8 @@ class BOTAN_PUBLIC_API(2,0) SHA_256 final : public MDx_HashFunction
void clear() override;
+ std::string provider() const override;
+
SHA_256() : MDx_HashFunction(64, true, true), m_digest(8)
{ clear(); }
diff --git a/src/lib/hash/sha2_64/sha2_64.cpp b/src/lib/hash/sha2_64/sha2_64.cpp
index e614e6b28..e554b3aa5 100644
--- a/src/lib/hash/sha2_64/sha2_64.cpp
+++ b/src/lib/hash/sha2_64/sha2_64.cpp
@@ -7,9 +7,26 @@
#include <botan/sha2_64.h>
#include <botan/rotate.h>
+#include <botan/cpuid.h>
namespace Botan {
+namespace {
+
+std::string sha512_provider()
+ {
+#if defined(BOTAN_HAS_SHA2_64_BMI2)
+ if(CPUID::has_bmi2())
+ {
+ return "bmi2";
+ }
+#endif
+
+ return "base";
+ }
+
+}
+
std::unique_ptr<HashFunction> SHA_384::copy_state() const
{
return std::unique_ptr<HashFunction>(new SHA_384(*this));
@@ -25,8 +42,6 @@ std::unique_ptr<HashFunction> SHA_512_256::copy_state() const
return std::unique_ptr<HashFunction>(new SHA_512_256(*this));
}
-namespace {
-
/*
* SHA-512 F1 Function
*
@@ -48,12 +63,20 @@ namespace {
/*
* SHA-{384,512} Compression Function
*/
-void SHA64_compress(secure_vector<uint64_t>& digest,
- const uint8_t input[], size_t blocks)
+//static
+void SHA_512::compress_digest(secure_vector<uint64_t>& digest,
+ const uint8_t input[], size_t blocks)
{
+#if defined(BOTAN_HAS_SHA2_64_BMI2)
+ if(CPUID::has_bmi2())
+ {
+ return compress_digest_bmi2(digest, input, blocks);
+ }
+#endif
+
uint64_t A = digest[0], B = digest[1], C = digest[2],
- D = digest[3], E = digest[4], F = digest[5],
- G = digest[6], H = digest[7];
+ D = digest[3], E = digest[4], F = digest[5],
+ G = digest[6], H = digest[7];
for(size_t i = 0; i != blocks; ++i)
{
@@ -168,21 +191,36 @@ void SHA64_compress(secure_vector<uint64_t>& digest,
}
}
-}
+#undef SHA2_64_F
+
+std::string SHA_512_256::provider() const
+ {
+ return sha512_provider();
+ }
+
+std::string SHA_384::provider() const
+ {
+ return sha512_provider();
+ }
+
+std::string SHA_512::provider() const
+ {
+ return sha512_provider();
+ }
void SHA_512_256::compress_n(const uint8_t input[], size_t blocks)
{
- SHA64_compress(m_digest, input, blocks);
+ SHA_512::compress_digest(m_digest, input, blocks);
}
void SHA_384::compress_n(const uint8_t input[], size_t blocks)
{
- SHA64_compress(m_digest, input, blocks);
+ SHA_512::compress_digest(m_digest, input, blocks);
}
void SHA_512::compress_n(const uint8_t input[], size_t blocks)
{
- SHA64_compress(m_digest, input, blocks);
+ SHA_512::compress_digest(m_digest, input, blocks);
}
void SHA_512_256::copy_out(uint8_t output[])
diff --git a/src/lib/hash/sha2_64/sha2_64.h b/src/lib/hash/sha2_64/sha2_64.h
index cbe1ad70b..ec3512dfc 100644
--- a/src/lib/hash/sha2_64/sha2_64.h
+++ b/src/lib/hash/sha2_64/sha2_64.h
@@ -22,6 +22,7 @@ class BOTAN_PUBLIC_API(2,0) SHA_384 final : public MDx_HashFunction
size_t output_length() const override { return 48; }
HashFunction* clone() const override { return new SHA_384; }
std::unique_ptr<HashFunction> copy_state() const override;
+ std::string provider() const override;
void clear() override;
@@ -44,15 +45,31 @@ class BOTAN_PUBLIC_API(2,0) SHA_512 final : public MDx_HashFunction
size_t output_length() const override { return 64; }
HashFunction* clone() const override { return new SHA_512; }
std::unique_ptr<HashFunction> copy_state() const override;
+ std::string provider() const override;
void clear() override;
+ /*
+ * Perform a SHA-512 compression. For internal use
+ */
+ static void compress_digest(secure_vector<uint64_t>& digest,
+ const uint8_t input[],
+ size_t blocks);
+
SHA_512() : MDx_HashFunction(128, true, true, 16), m_digest(8)
{ clear(); }
private:
void compress_n(const uint8_t[], size_t blocks) override;
void copy_out(uint8_t[]) override;
+ static const uint64_t K[80];
+
+#if defined(BOTAN_HAS_SHA2_64_BMI2)
+ static void compress_digest_bmi2(secure_vector<uint64_t>& digest,
+ const uint8_t input[],
+ size_t blocks);
+#endif
+
secure_vector<uint64_t> m_digest;
};
@@ -66,6 +83,7 @@ class BOTAN_PUBLIC_API(2,0) SHA_512_256 final : public MDx_HashFunction
size_t output_length() const override { return 32; }
HashFunction* clone() const override { return new SHA_512_256; }
std::unique_ptr<HashFunction> copy_state() const override;
+ std::string provider() const override;
void clear() override;
diff --git a/src/lib/hash/sha2_64/sha2_64_bmi2/info.txt b/src/lib/hash/sha2_64/sha2_64_bmi2/info.txt
new file mode 100644
index 000000000..08e97e172
--- /dev/null
+++ b/src/lib/hash/sha2_64/sha2_64_bmi2/info.txt
@@ -0,0 +1,15 @@
+<defines>
+SHA2_64_BMI2 -> 20190117
+</defines>
+
+need_isa bmi2
+
+# Needs 64-bit registers to be useful
+<arch>
+x86_64
+</arch>
+
+<cc>
+gcc
+clang
+</cc>
diff --git a/src/lib/hash/sha2_64/sha2_64_bmi2/sha2_64_bmi2.cpp b/src/lib/hash/sha2_64/sha2_64_bmi2/sha2_64_bmi2.cpp
new file mode 100644
index 000000000..202dadbbe
--- /dev/null
+++ b/src/lib/hash/sha2_64/sha2_64_bmi2/sha2_64_bmi2.cpp
@@ -0,0 +1,152 @@
+/*
+* (C) 2019 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/sha2_64.h>
+#include <botan/rotate.h>
+
+namespace Botan {
+
+/*
+* SHA-512 F1 Function
+*
+* Use a macro as many compilers won't inline a function this big,
+* even though it is much faster if inlined.
+*/
+#define SHA2_64_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) \
+ do { \
+ const uint64_t E_rho = rotr<14>(E) ^ rotr<18>(E) ^ rotr<41>(E); \
+ const uint64_t A_rho = rotr<28>(A) ^ rotr<34>(A) ^ rotr<39>(A); \
+ const uint64_t M2_sigma = rotr<19>(M2) ^ rotr<61>(M2) ^ (M2 >> 6); \
+ const uint64_t M4_sigma = rotr<1>(M4) ^ rotr<8>(M4) ^ (M4 >> 7); \
+ H += magic + E_rho + ((E & F) ^ (~E & G)) + M1; \
+ D += H; \
+ H += A_rho + ((A & B) | ((A | B) & C)); \
+ M1 += M2_sigma + M3 + M4_sigma; \
+ } while(0);
+
+void SHA_512::compress_digest_bmi2(secure_vector<uint64_t>& digest,
+ const uint8_t input[], size_t blocks)
+ {
+ uint64_t A = digest[0], B = digest[1], C = digest[2],
+ D = digest[3], E = digest[4], F = digest[5],
+ G = digest[6], H = digest[7];
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint64_t W00 = load_be<uint64_t>(input, 0);
+ uint64_t W01 = load_be<uint64_t>(input, 1);
+ uint64_t W02 = load_be<uint64_t>(input, 2);
+ uint64_t W03 = load_be<uint64_t>(input, 3);
+ uint64_t W04 = load_be<uint64_t>(input, 4);
+ uint64_t W05 = load_be<uint64_t>(input, 5);
+ uint64_t W06 = load_be<uint64_t>(input, 6);
+ uint64_t W07 = load_be<uint64_t>(input, 7);
+ uint64_t W08 = load_be<uint64_t>(input, 8);
+ uint64_t W09 = load_be<uint64_t>(input, 9);
+ uint64_t W10 = load_be<uint64_t>(input, 10);
+ uint64_t W11 = load_be<uint64_t>(input, 11);
+ uint64_t W12 = load_be<uint64_t>(input, 12);
+ uint64_t W13 = load_be<uint64_t>(input, 13);
+ uint64_t W14 = load_be<uint64_t>(input, 14);
+ uint64_t W15 = load_be<uint64_t>(input, 15);
+
+ SHA2_64_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98D728AE22);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x7137449123EF65CD);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCFEC4D3B2F);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA58189DBBC);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25BF348B538);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1B605D019);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4AF194F9B);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5DA6D8118);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98A3030242);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B0145706FBE);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE4EE4B28C);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3D5FFB4E2);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74F27B896F);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE3B1696B1);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A725C71235);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174CF692694);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C19EF14AD2);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786384F25E3);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC68B8CD5B5);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC77AC9C65);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F592B0275);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA6EA6E483);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DCBD41FBD4);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA831153B5);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152EE66DFAB);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D2DB43210);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C898FB213F);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7BEEF0EE4);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF33DA88FC2);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147930AA725);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351E003826F);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x142929670A0E6E70);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A8546D22FFC);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B21385C26C926);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC5AC42AED);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D139D95B3DF);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A73548BAF63DE);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB3C77B2A8);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E47EDAEE6);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C851482353B);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A14CF10364);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664BBC423001);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70D0F89791);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A30654BE30);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819D6EF5218);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD69906245565A910);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E35855771202A);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA07032BBD1B8);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116B8D2D0C8);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C085141AB53);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774CDF8EEB99);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5E19B48A8);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3C5C95A63);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4AE3418ACB);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F7763E373);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3D6B2B8A3);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE5DEFB2FC);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F43172F60);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814A1F0AB72);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC702081A6439EC);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA23631E28);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEBDE82BDE9);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7B2C67915);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2E372532B);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xCA273ECEEA26619C);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xD186B8C721C0C207);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xEADA7DD6CDE0EB1E);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xF57D4F7FEE6ED178);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x06F067AA72176FBA);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x0A637DC5A2C898A6);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x113F9804BEF90DAE);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x1B710B35131C471B);
+ SHA2_64_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x28DB77F523047D84);
+ SHA2_64_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x32CAAB7B40C72493);
+ SHA2_64_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x3C9EBE0A15C9BEBC);
+ SHA2_64_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x431D67C49C100D4C);
+ SHA2_64_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x4CC5D4BECB3E42B6);
+ SHA2_64_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x597F299CFC657E2A);
+ SHA2_64_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x5FCB6FAB3AD6FAEC);
+ SHA2_64_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x6C44198C4A475817);
+
+ A = (digest[0] += A);
+ B = (digest[1] += B);
+ C = (digest[2] += C);
+ D = (digest[3] += D);
+ E = (digest[4] += E);
+ F = (digest[5] += F);
+ G = (digest[6] += G);
+ H = (digest[7] += H);
+
+ input += 128;
+ }
+ }
+
+#undef SHA2_64_F
+
+}
diff --git a/src/lib/hash/sha3/sha3.cpp b/src/lib/hash/sha3/sha3.cpp
index 09c2d8c1b..837768f85 100644
--- a/src/lib/hash/sha3/sha3.cpp
+++ b/src/lib/hash/sha3/sha3.cpp
@@ -8,12 +8,20 @@
#include <botan/sha3.h>
#include <botan/rotate.h>
#include <botan/exceptn.h>
+#include <botan/cpuid.h>
namespace Botan {
//static
void SHA_3::permute(uint64_t A[25])
{
+#if defined(BOTAN_HAS_SHA3_BMI2)
+ if(CPUID::has_bmi2())
+ {
+ return permute_bmi2(A);
+ }
+#endif
+
static const uint64_t RC[24] = {
0x0000000000000001, 0x0000000000008082, 0x800000000000808A,
0x8000000080008000, 0x000000000000808B, 0x0000000080000001,
@@ -198,6 +206,18 @@ std::string SHA_3::name() const
return "SHA-3(" + std::to_string(m_output_bits) + ")";
}
+std::string SHA_3::provider() const
+ {
+#if defined(BOTAN_HAS_SHA3_BMI2)
+ if(CPUID::has_bmi2())
+ {
+ return "bmi2";
+ }
+#endif
+
+ return "base";
+ }
+
std::unique_ptr<HashFunction> SHA_3::copy_state() const
{
return std::unique_ptr<HashFunction>(new SHA_3(*this));
diff --git a/src/lib/hash/sha3/sha3.h b/src/lib/hash/sha3/sha3.h
index a3a666971..310468599 100644
--- a/src/lib/hash/sha3/sha3.h
+++ b/src/lib/hash/sha3/sha3.h
@@ -34,6 +34,7 @@ class BOTAN_PUBLIC_API(2,0) SHA_3 : public HashFunction
std::unique_ptr<HashFunction> copy_state() const override;
std::string name() const override;
void clear() override;
+ std::string provider() const override;
// Static functions for internal usage
@@ -83,6 +84,10 @@ class BOTAN_PUBLIC_API(2,0) SHA_3 : public HashFunction
void add_data(const uint8_t input[], size_t length) override;
void final_result(uint8_t out[]) override;
+#if defined(BOTAN_HAS_SHA3_BMI2)
+ static void permute_bmi2(uint64_t A[25]);
+#endif
+
size_t m_output_bits, m_bitrate;
secure_vector<uint64_t> m_S;
size_t m_S_pos;
diff --git a/src/lib/hash/sha3/sha3_bmi2/info.txt b/src/lib/hash/sha3/sha3_bmi2/info.txt
new file mode 100644
index 000000000..46a5e7234
--- /dev/null
+++ b/src/lib/hash/sha3/sha3_bmi2/info.txt
@@ -0,0 +1,15 @@
+<defines>
+SHA3_BMI2 -> 20190117
+</defines>
+
+need_isa bmi2
+
+# Needs 64-bit registers to be useful
+<arch>
+x86_64
+</arch>
+
+<cc>
+gcc
+clang
+</cc>
diff --git a/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
new file mode 100644
index 000000000..f2161b9ba
--- /dev/null
+++ b/src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
@@ -0,0 +1,96 @@
+/*
+* SHA-3
+* (C) 2019 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/sha3.h>
+#include <botan/rotate.h>
+
+namespace Botan {
+
+void SHA_3::permute_bmi2(uint64_t A[25])
+ {
+ static const uint64_t RC[24] = {
+ 0x0000000000000001, 0x0000000000008082, 0x800000000000808A,
+ 0x8000000080008000, 0x000000000000808B, 0x0000000080000001,
+ 0x8000000080008081, 0x8000000000008009, 0x000000000000008A,
+ 0x0000000000000088, 0x0000000080008009, 0x000000008000000A,
+ 0x000000008000808B, 0x800000000000008B, 0x8000000000008089,
+ 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+ 0x000000000000800A, 0x800000008000000A, 0x8000000080008081,
+ 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+ };
+
+ for(size_t i = 0; i != 24; ++i)
+ {
+ const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
+ const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
+ const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
+ const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
+ const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];
+
+ const uint64_t D0 = rotl<1>(C0) ^ C3;
+ const uint64_t D1 = rotl<1>(C1) ^ C4;
+ const uint64_t D2 = rotl<1>(C2) ^ C0;
+ const uint64_t D3 = rotl<1>(C3) ^ C1;
+ const uint64_t D4 = rotl<1>(C4) ^ C2;
+
+ const uint64_t B00 = A[ 0] ^ D1;
+ const uint64_t B10 = rotl< 1>(A[ 1] ^ D2);
+ const uint64_t B20 = rotl<62>(A[ 2] ^ D3);
+ const uint64_t B05 = rotl<28>(A[ 3] ^ D4);
+ const uint64_t B15 = rotl<27>(A[ 4] ^ D0);
+ const uint64_t B16 = rotl<36>(A[ 5] ^ D1);
+ const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
+ const uint64_t B11 = rotl< 6>(A[ 7] ^ D3);
+ const uint64_t B21 = rotl<55>(A[ 8] ^ D4);
+ const uint64_t B06 = rotl<20>(A[ 9] ^ D0);
+ const uint64_t B07 = rotl< 3>(A[10] ^ D1);
+ const uint64_t B17 = rotl<10>(A[11] ^ D2);
+ const uint64_t B02 = rotl<43>(A[12] ^ D3);
+ const uint64_t B12 = rotl<25>(A[13] ^ D4);
+ const uint64_t B22 = rotl<39>(A[14] ^ D0);
+ const uint64_t B23 = rotl<41>(A[15] ^ D1);
+ const uint64_t B08 = rotl<45>(A[16] ^ D2);
+ const uint64_t B18 = rotl<15>(A[17] ^ D3);
+ const uint64_t B03 = rotl<21>(A[18] ^ D4);
+ const uint64_t B13 = rotl< 8>(A[19] ^ D0);
+ const uint64_t B14 = rotl<18>(A[20] ^ D1);
+ const uint64_t B24 = rotl< 2>(A[21] ^ D2);
+ const uint64_t B09 = rotl<61>(A[22] ^ D3);
+ const uint64_t B19 = rotl<56>(A[23] ^ D4);
+ const uint64_t B04 = rotl<14>(A[24] ^ D0);
+
+ A[ 0] = B00 ^ (~B01 & B02);
+ A[ 1] = B01 ^ (~B02 & B03);
+ A[ 2] = B02 ^ (~B03 & B04);
+ A[ 3] = B03 ^ (~B04 & B00);
+ A[ 4] = B04 ^ (~B00 & B01);
+ A[ 5] = B05 ^ (~B06 & B07);
+ A[ 6] = B06 ^ (~B07 & B08);
+ A[ 7] = B07 ^ (~B08 & B09);
+ A[ 8] = B08 ^ (~B09 & B05);
+ A[ 9] = B09 ^ (~B05 & B06);
+ A[10] = B10 ^ (~B11 & B12);
+ A[11] = B11 ^ (~B12 & B13);
+ A[12] = B12 ^ (~B13 & B14);
+ A[13] = B13 ^ (~B14 & B10);
+ A[14] = B14 ^ (~B10 & B11);
+ A[15] = B15 ^ (~B16 & B17);
+ A[16] = B16 ^ (~B17 & B18);
+ A[17] = B17 ^ (~B18 & B19);
+ A[18] = B18 ^ (~B19 & B15);
+ A[19] = B19 ^ (~B15 & B16);
+ A[20] = B20 ^ (~B21 & B22);
+ A[21] = B21 ^ (~B22 & B23);
+ A[22] = B22 ^ (~B23 & B24);
+ A[23] = B23 ^ (~B24 & B20);
+ A[24] = B24 ^ (~B20 & B21);
+
+ A[0] ^= RC[i];
+ }
+ }
+
+}