aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-08-15 14:40:52 -0400
committerJack Lloyd <[email protected]>2017-08-15 14:40:52 -0400
commit0f530047200524d23e8107dc16dd5a5e84014071 (patch)
treea60da7cf73cf8972eeb58a027a7ea6bbec1bb9d1 /src/lib
parent2266362024009f0364a07dd1bcff5115180f40a7 (diff)
parent9ab8ec3de32cad721b6b52401be67c5219c9f77b (diff)
Merge GH #1151 Add SHACAL2
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/block/aes/aes.cpp16
-rw-r--r--src/lib/block/aes/aes.h6
-rw-r--r--src/lib/block/block_cipher.cpp11
-rw-r--r--src/lib/block/idea/idea.cpp12
-rw-r--r--src/lib/block/idea/idea.h2
-rw-r--r--src/lib/block/noekeon/noekeon.cpp12
-rw-r--r--src/lib/block/noekeon/noekeon.h2
-rw-r--r--src/lib/block/shacal2/info.txt5
-rw-r--r--src/lib/block/shacal2/shacal2.cpp235
-rw-r--r--src/lib/block/shacal2/shacal2.h47
-rw-r--r--src/lib/block/shacal2/shacal2_simd/info.txt8
-rw-r--r--src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp119
-rw-r--r--src/lib/block/shacal2/shacal2_x86/info.txt16
-rw-r--r--src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp137
-rw-r--r--src/lib/block/threefish/threefish.cpp12
-rw-r--r--src/lib/block/threefish/threefish.h2
-rw-r--r--src/lib/utils/simd/simd_32.h58
17 files changed, 700 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp
index 21228e0c1..75591bfd2 100644
--- a/src/lib/block/aes/aes.cpp
+++ b/src/lib/block/aes/aes.cpp
@@ -414,6 +414,18 @@ void aes_key_schedule(const uint8_t key[], size_t length,
copy_mem(DK.data(), XDK.data(), DK.size());
}
+size_t aes_parallelism()
+ {
+#if defined(BOTAN_HAS_AES_NI)
+ if(CPUID::has_aes_ni())
+ {
+ return 4;
+ }
+#endif
+
+ return 1;
+ }
+
const char* aes_provider()
{
#if defined(BOTAN_HAS_AES_NI)
@@ -439,6 +451,10 @@ std::string AES_128::provider() const { return aes_provider(); }
std::string AES_192::provider() const { return aes_provider(); }
std::string AES_256::provider() const { return aes_provider(); }
+size_t AES_128::parallelism() const { return aes_parallelism(); }
+size_t AES_192::parallelism() const { return aes_parallelism(); }
+size_t AES_256::parallelism() const { return aes_parallelism(); }
+
void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
#if defined(BOTAN_HAS_AES_NI)
diff --git a/src/lib/block/aes/aes.h b/src/lib/block/aes/aes.h
index 52f877e36..a74280947 100644
--- a/src/lib/block/aes/aes.h
+++ b/src/lib/block/aes/aes.h
@@ -26,6 +26,8 @@ class BOTAN_DLL AES_128 final : public Block_Cipher_Fixed_Params<16, 16>
std::string provider() const override;
std::string name() const override { return "AES-128"; }
BlockCipher* clone() const override { return new AES_128; }
+ size_t parallelism() const override;
+
private:
void key_schedule(const uint8_t key[], size_t length) override;
@@ -59,6 +61,8 @@ class BOTAN_DLL AES_192 final : public Block_Cipher_Fixed_Params<16, 24>
std::string provider() const override;
std::string name() const override { return "AES-192"; }
BlockCipher* clone() const override { return new AES_192; }
+ size_t parallelism() const override;
+
private:
#if defined(BOTAN_HAS_AES_SSSE3)
void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
@@ -93,6 +97,8 @@ class BOTAN_DLL AES_256 final : public Block_Cipher_Fixed_Params<16, 32>
std::string name() const override { return "AES-256"; }
BlockCipher* clone() const override { return new AES_256; }
+ size_t parallelism() const override;
+
private:
#if defined(BOTAN_HAS_AES_SSSE3)
void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
diff --git a/src/lib/block/block_cipher.cpp b/src/lib/block/block_cipher.cpp
index b56f1c571..a0eafbd11 100644
--- a/src/lib/block/block_cipher.cpp
+++ b/src/lib/block/block_cipher.cpp
@@ -66,6 +66,10 @@
#include <botan/serpent.h>
#endif
+#if defined(BOTAN_HAS_SHACAL2)
+ #include <botan/shacal2.h>
+#endif
+
#if defined(BOTAN_HAS_SM4)
#include <botan/sm4.h>
#endif
@@ -135,6 +139,13 @@ BlockCipher::create(const std::string& algo,
}
#endif
+#if defined(BOTAN_HAS_SHACAL2)
+ if(algo == "SHACAL2")
+ {
+ return std::unique_ptr<BlockCipher>(new SHACAL2);
+ }
+#endif
+
#if defined(BOTAN_HAS_TWOFISH)
if(algo == "Twofish")
{
diff --git a/src/lib/block/idea/idea.cpp b/src/lib/block/idea/idea.cpp
index 4eab6a4f3..2be15be2e 100644
--- a/src/lib/block/idea/idea.cpp
+++ b/src/lib/block/idea/idea.cpp
@@ -107,6 +107,18 @@ void idea_op(const uint8_t in[], uint8_t out[], size_t blocks, const uint16_t K[
}
+size_t IDEA::parallelism() const
+ {
+#if defined(BOTAN_HAS_IDEA_SSE2)
+ if(CPUID::has_sse2())
+ {
+ return 8;
+ }
+#endif
+
+ return 1;
+ }
+
std::string IDEA::provider() const
{
#if defined(BOTAN_HAS_IDEA_SSE2)
diff --git a/src/lib/block/idea/idea.h b/src/lib/block/idea/idea.h
index 5a718867b..eaef5deee 100644
--- a/src/lib/block/idea/idea.h
+++ b/src/lib/block/idea/idea.h
@@ -26,6 +26,8 @@ class BOTAN_DLL IDEA final : public Block_Cipher_Fixed_Params<8, 16>
std::string provider() const override;
std::string name() const override { return "IDEA"; }
BlockCipher* clone() const override { return new IDEA; }
+ size_t parallelism() const override;
+
private:
#if defined(BOTAN_HAS_IDEA_SSE2)
void sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const;
diff --git a/src/lib/block/noekeon/noekeon.cpp b/src/lib/block/noekeon/noekeon.cpp
index e8bd7b308..c82badd4c 100644
--- a/src/lib/block/noekeon/noekeon.cpp
+++ b/src/lib/block/noekeon/noekeon.cpp
@@ -73,6 +73,18 @@ inline void gamma(uint32_t& A0, uint32_t& A1, uint32_t& A2, uint32_t& A3)
}
+size_t Noekeon::parallelism() const
+ {
+#if defined(BOTAN_HAS_NOEKEON_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ return 4;
+ }
+#endif
+
+ return 1;
+ }
+
std::string Noekeon::provider() const
{
#if defined(BOTAN_HAS_NOEKEON_SIMD)
diff --git a/src/lib/block/noekeon/noekeon.h b/src/lib/block/noekeon/noekeon.h
index 83af6d8d7..de49d658f 100644
--- a/src/lib/block/noekeon/noekeon.h
+++ b/src/lib/block/noekeon/noekeon.h
@@ -25,6 +25,8 @@ class BOTAN_DLL Noekeon final : public Block_Cipher_Fixed_Params<16, 16>
void clear() override;
std::string name() const override { return "Noekeon"; }
BlockCipher* clone() const override { return new Noekeon; }
+ size_t parallelism() const override;
+
private:
#if defined(BOTAN_HAS_NOEKEON_SIMD)
void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const;
diff --git a/src/lib/block/shacal2/info.txt b/src/lib/block/shacal2/info.txt
new file mode 100644
index 000000000..62e00503f
--- /dev/null
+++ b/src/lib/block/shacal2/info.txt
@@ -0,0 +1,5 @@
+<defines>
+SHACAL2 -> 20170813
+</defines>
+
+
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp
new file mode 100644
index 000000000..30ad711db
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2.cpp
@@ -0,0 +1,235 @@
+/*
+* SHACAL-2
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/shacal2.h>
+#include <botan/loadstor.h>
+#include <botan/cpuid.h>
+
+namespace Botan {
+
+namespace {
+
+inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
+ uint32_t E, uint32_t F, uint32_t G, uint32_t& H,
+ uint32_t RK)
+ {
+ const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22);
+ const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25);
+
+ H += E_rho + ((E & F) ^ (~E & G)) + RK;
+ D += H;
+ H += A_rho + ((A & B) | ((A | B) & C));
+ }
+
+inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
+ uint32_t E, uint32_t F, uint32_t G, uint32_t& H,
+ uint32_t RK)
+ {
+ const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22);
+ const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25);
+
+ H -= A_rho + ((A & B) | ((A | B) & C));
+ D -= H;
+ H -= E_rho + ((E & F) ^ (~E & G)) + RK;
+ }
+
+}
+
+/*
+* SHACAL2 Encryption
+*/
+void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return x86_encrypt_blocks(in, out, blocks);
+ }
+#endif
+
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ while(blocks >= 4)
+ {
+ simd_encrypt_4(in, out);
+ in += 4*BLOCK_SIZE;
+ out += 4*BLOCK_SIZE;
+ blocks -= 4;
+ }
+ }
+#endif
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint32_t A = load_be<uint32_t>(in, 0);
+ uint32_t B = load_be<uint32_t>(in, 1);
+ uint32_t C = load_be<uint32_t>(in, 2);
+ uint32_t D = load_be<uint32_t>(in, 3);
+ uint32_t E = load_be<uint32_t>(in, 4);
+ uint32_t F = load_be<uint32_t>(in, 5);
+ uint32_t G = load_be<uint32_t>(in, 6);
+ uint32_t H = load_be<uint32_t>(in, 7);
+
+ for(size_t r = 0; r != 64; r += 8)
+ {
+ SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]);
+ SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]);
+ SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]);
+ SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]);
+ SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]);
+ SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]);
+ SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]);
+ SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]);
+ }
+
+ store_be(out, A, B, C, D, E, F, G, H);
+
+ in += BLOCK_SIZE;
+ out += BLOCK_SIZE;
+ }
+ }
+
+/*
+* SHACAL2 Encryption
+*/
+void SHACAL2::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ while(blocks >= 4)
+ {
+ simd_decrypt_4(in, out);
+ in += 4*BLOCK_SIZE;
+ out += 4*BLOCK_SIZE;
+ blocks -= 4;
+ }
+ }
+#endif
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint32_t A = load_be<uint32_t>(in, 0);
+ uint32_t B = load_be<uint32_t>(in, 1);
+ uint32_t C = load_be<uint32_t>(in, 2);
+ uint32_t D = load_be<uint32_t>(in, 3);
+ uint32_t E = load_be<uint32_t>(in, 4);
+ uint32_t F = load_be<uint32_t>(in, 5);
+ uint32_t G = load_be<uint32_t>(in, 6);
+ uint32_t H = load_be<uint32_t>(in, 7);
+
+ for(size_t r = 0; r != 64; r += 8)
+ {
+ SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]);
+ SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]);
+ SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]);
+ SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]);
+ SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]);
+ SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]);
+ SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]);
+ SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]);
+ }
+
+ store_be(out, A, B, C, D, E, F, G, H);
+
+ in += BLOCK_SIZE;
+ out += BLOCK_SIZE;
+ }
+ }
+
+/*
+* SHACAL2 Key Schedule
+*/
+void SHACAL2::key_schedule(const uint8_t key[], size_t len)
+ {
+ const uint32_t RC[64] = {
+ 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
+ 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
+ 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
+ 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
+ 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
+ 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
+ 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
+ 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
+ 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
+ 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
+ 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
+ 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
+ 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
+ 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
+ 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
+ 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
+ };
+
+ if(m_RK.empty())
+ m_RK.resize(64);
+ else
+ clear_mem(m_RK.data(), m_RK.size());
+
+ load_be(m_RK.data(), key, len/4);
+
+ for(size_t i = 16; i != 64; ++i)
+ {
+ const uint32_t sigma0_15 = rotate_right(m_RK[i-15], 7) ^ rotate_right(m_RK[i-15], 18) ^ (m_RK[i-15] >> 3);
+ const uint32_t sigma1_2 = rotate_right(m_RK[i-2], 17) ^ rotate_right(m_RK[i-2], 19) ^ (m_RK[i-2] >> 10);
+ m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2;
+ }
+
+ for(size_t i = 0; i != 64; ++i)
+ {
+ m_RK[i] += RC[i];
+ }
+ }
+
+size_t SHACAL2::parallelism() const
+ {
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return 4;
+ }
+#endif
+
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ return 4;
+ }
+#endif
+
+ return 1;
+ }
+
+std::string SHACAL2::provider() const
+ {
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return "intel_sha";
+ }
+#endif
+
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ return "simd";
+ }
+#endif
+
+ return "base";
+ }
+
+/*
+* Clear memory of sensitive data
+*/
+void SHACAL2::clear()
+ {
+ zap(m_RK);
+ }
+
+}
diff --git a/src/lib/block/shacal2/shacal2.h b/src/lib/block/shacal2/shacal2.h
new file mode 100644
index 000000000..d63c1453d
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2.h
@@ -0,0 +1,47 @@
+/*
+* SHACAL-2
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SHACAL2_H__
+#define BOTAN_SHACAL2_H__
+
+#include <botan/block_cipher.h>
+
+namespace Botan {
+
+/**
+* SHACAL2
+*/
+class BOTAN_DLL SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4>
+ {
+ public:
+ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+ void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+
+ std::string provider() const override;
+ void clear() override;
+ std::string name() const override { return "SHACAL2"; }
+ BlockCipher* clone() const override { return new SHACAL2; }
+ size_t parallelism() const override;
+
+ private:
+ void key_schedule(const uint8_t[], size_t) override;
+
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const;
+ void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const;
+#endif
+
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ void x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
+ secure_vector<uint32_t> m_RK;
+ };
+
+}
+
+#endif
diff --git a/src/lib/block/shacal2/shacal2_simd/info.txt b/src/lib/block/shacal2/shacal2_simd/info.txt
new file mode 100644
index 000000000..8d715c668
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_simd/info.txt
@@ -0,0 +1,8 @@
+<defines>
+SHACAL2_SIMD -> 20170813
+</defines>
+
+<requires>
+shacal2
+simd
+</requires>
diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
new file mode 100644
index 000000000..a4324c8fb
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
@@ -0,0 +1,119 @@
+/*
+* SHACAL-2 using SIMD
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/shacal2.h>
+#include <botan/internal/simd_32.h>
+
+namespace Botan {
+
+namespace {
+
+inline
+void SHACAL2_Fwd(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32& D,
+ const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H,
+ uint32_t RK)
+ {
+ H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+ D += H;
+ H += A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+ }
+
+inline
+void SHACAL2_Rev(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32& D,
+ const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H,
+ uint32_t RK)
+ {
+ H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+ D -= H;
+ H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+ }
+
+}
+
+void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const
+ {
+ SIMD_4x32 A = SIMD_4x32::load_be(in);
+ SIMD_4x32 E = SIMD_4x32::load_be(in+16);
+ SIMD_4x32 B = SIMD_4x32::load_be(in+32);
+ SIMD_4x32 F = SIMD_4x32::load_be(in+48);
+
+ SIMD_4x32 C = SIMD_4x32::load_be(in+64);
+ SIMD_4x32 G = SIMD_4x32::load_be(in+80);
+ SIMD_4x32 D = SIMD_4x32::load_be(in+96);
+ SIMD_4x32 H = SIMD_4x32::load_be(in+112);
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ for(size_t r = 0; r != 64; r += 8)
+ {
+ SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]);
+ SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]);
+ SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]);
+ SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]);
+ SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]);
+ SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]);
+ SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]);
+ SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]);
+ }
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ A.store_be(out);
+ E.store_be(out+16);
+ B.store_be(out+32);
+ F.store_be(out+48);
+
+ C.store_be(out+64);
+ G.store_be(out+80);
+ D.store_be(out+96);
+ H.store_be(out+112);
+ }
+
+void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const
+ {
+ SIMD_4x32 A = SIMD_4x32::load_be(in);
+ SIMD_4x32 E = SIMD_4x32::load_be(in+16);
+ SIMD_4x32 B = SIMD_4x32::load_be(in+32);
+ SIMD_4x32 F = SIMD_4x32::load_be(in+48);
+
+ SIMD_4x32 C = SIMD_4x32::load_be(in+64);
+ SIMD_4x32 G = SIMD_4x32::load_be(in+80);
+ SIMD_4x32 D = SIMD_4x32::load_be(in+96);
+ SIMD_4x32 H = SIMD_4x32::load_be(in+112);
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ for(size_t r = 0; r != 64; r += 8)
+ {
+ SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]);
+ SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]);
+ SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]);
+ SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]);
+ SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]);
+ SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]);
+ SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]);
+ SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]);
+ }
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ A.store_be(out);
+ E.store_be(out+16);
+ B.store_be(out+32);
+ F.store_be(out+48);
+
+ C.store_be(out+64);
+ G.store_be(out+80);
+ D.store_be(out+96);
+ H.store_be(out+112);
+ }
+
+}
diff --git a/src/lib/block/shacal2/shacal2_x86/info.txt b/src/lib/block/shacal2/shacal2_x86/info.txt
new file mode 100644
index 000000000..b8d6a50b7
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_x86/info.txt
@@ -0,0 +1,16 @@
+<defines>
+SHACAL2_X86 -> 20170814
+</defines>
+
+<requires>
+shacal2
+</requires>
+
+need_isa sha,sse4.1
+
+<cc>
+gcc:5.0
+clang:3.9
+msvc:19.0 # MSVS 2015
+</cc>
+
diff --git a/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp
new file mode 100644
index 000000000..a917955e3
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp
@@ -0,0 +1,137 @@
+/*
+* SHACAL-2 using x86 SHA extensions
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/shacal2.h>
+#include <immintrin.h>
+
+namespace Botan {
+
+/*
+Only encryption is supported since the inverse round function would
+require a different instruction
+*/
+
+void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ const __m128i BSWAP_MASK = _mm_set_epi64x(0x0C0D0E0F08090A0B, 0x0405060700010203);
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ while(blocks >= 2)
+ {
+ __m128i B0_0 = _mm_loadu_si128(in_mm);
+ __m128i B0_1 = _mm_loadu_si128(in_mm+1);
+ __m128i B1_0 = _mm_loadu_si128(in_mm+2);
+ __m128i B1_1 = _mm_loadu_si128(in_mm+3);
+
+ B0_0 = _mm_shuffle_epi8(B0_0, BSWAP_MASK);
+ B0_1 = _mm_shuffle_epi8(B0_1, BSWAP_MASK);
+ B1_0 = _mm_shuffle_epi8(B1_0, BSWAP_MASK);
+ B1_1 = _mm_shuffle_epi8(B1_1, BSWAP_MASK);
+
+ B0_0 = _mm_shuffle_epi32(B0_0, 0xB1); // CDAB
+ B0_1 = _mm_shuffle_epi32(B0_1, 0x1B); // EFGH
+ B1_0 = _mm_shuffle_epi32(B1_0, 0xB1); // CDAB
+ B1_1 = _mm_shuffle_epi32(B1_1, 0x1B); // EFGH
+
+ __m128i TMP = _mm_alignr_epi8(B0_0, B0_1, 8); // ABEF
+ B0_1 = _mm_blend_epi16(B0_1, B0_0, 0xF0); // CDGH
+ B0_0 = TMP;
+
+ TMP = _mm_alignr_epi8(B1_0, B1_1, 8); // ABEF
+ B1_1 = _mm_blend_epi16(B1_1, B1_0, 0xF0); // CDGH
+ B1_0 = TMP;
+
+ for(size_t i = 0; i != 8; ++i)
+ {
+ const __m128i RK0 = _mm_set_epi32(0,0,m_RK[8*i+1],m_RK[8*i+0]);
+ const __m128i RK1 = _mm_set_epi32(0,0,m_RK[8*i+3],m_RK[8*i+2]);
+ const __m128i RK2 = _mm_set_epi32(0,0,m_RK[8*i+5],m_RK[8*i+4]);
+ const __m128i RK3 = _mm_set_epi32(0,0,m_RK[8*i+7],m_RK[8*i+6]);
+
+ B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK0);
+ B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK0);
+
+ B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK1);
+ B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK1);
+
+ B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK2);
+ B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK2);
+
+ B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK3);
+ B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK3);
+ }
+
+ TMP = _mm_shuffle_epi32(B0_0, 0x1B); // FEBA
+ B0_1 = _mm_shuffle_epi32(B0_1, 0xB1); // DCHG
+ B0_0 = _mm_blend_epi16(TMP, B0_1, 0xF0); // DCBA
+ B0_1 = _mm_alignr_epi8(B0_1, TMP, 8); // ABEF
+
+ TMP = _mm_shuffle_epi32(B1_0, 0x1B); // FEBA
+ B1_1 = _mm_shuffle_epi32(B1_1, 0xB1); // DCHG
+ B1_0 = _mm_blend_epi16(TMP, B1_1, 0xF0); // DCBA
+ B1_1 = _mm_alignr_epi8(B1_1, TMP, 8); // ABEF
+
+ B0_0 = _mm_shuffle_epi8(B0_0, BSWAP_MASK);
+ B0_1 = _mm_shuffle_epi8(B0_1, BSWAP_MASK);
+ B1_0 = _mm_shuffle_epi8(B1_0, BSWAP_MASK);
+ B1_1 = _mm_shuffle_epi8(B1_1, BSWAP_MASK);
+
+ // Save state
+ _mm_storeu_si128(out_mm + 0, B0_0);
+ _mm_storeu_si128(out_mm + 1, B0_1);
+ _mm_storeu_si128(out_mm + 2, B1_0);
+ _mm_storeu_si128(out_mm + 3, B1_1);
+
+ blocks -= 2;
+ in_mm += 4;
+ out_mm += 4;
+ }
+
+ while(blocks)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm);
+ __m128i B1 = _mm_loadu_si128(in_mm+1);
+
+ B0 = _mm_shuffle_epi8(B0, BSWAP_MASK);
+ B1 = _mm_shuffle_epi8(B1, BSWAP_MASK);
+
+ B0 = _mm_shuffle_epi32(B0, 0xB1); // CDAB
+ B1 = _mm_shuffle_epi32(B1, 0x1B); // EFGH
+
+ __m128i TMP = _mm_alignr_epi8(B0, B1, 8); // ABEF
+ B1 = _mm_blend_epi16(B1, B0, 0xF0); // CDGH
+ B0 = TMP;
+
+ for(size_t i = 0; i != 8; ++i)
+ {
+ B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+1],m_RK[8*i+0]));
+ B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+3],m_RK[8*i+2]));
+ B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+5],m_RK[8*i+4]));
+ B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+7],m_RK[8*i+6]));
+ }
+
+ TMP = _mm_shuffle_epi32(B0, 0x1B); // FEBA
+ B1 = _mm_shuffle_epi32(B1, 0xB1); // DCHG
+ B0 = _mm_blend_epi16(TMP, B1, 0xF0); // DCBA
+ B1 = _mm_alignr_epi8(B1, TMP, 8); // ABEF
+
+ B0 = _mm_shuffle_epi8(B0, BSWAP_MASK);
+ B1 = _mm_shuffle_epi8(B1, BSWAP_MASK);
+
+ // Save state
+ _mm_storeu_si128(out_mm, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+
+ blocks--;
+ in_mm += 2;
+ out_mm += 2;
+ }
+ }
+
+}
diff --git a/src/lib/block/threefish/threefish.cpp b/src/lib/block/threefish/threefish.cpp
index 28a144fb6..99ce135d5 100644
--- a/src/lib/block/threefish/threefish.cpp
+++ b/src/lib/block/threefish/threefish.cpp
@@ -98,6 +98,18 @@ void Threefish_512::skein_feedfwd(const secure_vector<uint64_t>& M,
m_K[4] ^ m_K[5] ^ m_K[6] ^ m_K[7] ^ 0x1BD11BDAA9FC1A22;
}
+size_t Threefish_512::parallelism() const
+ {
+#if defined(BOTAN_HAS_THREEFISH_512_AVX2)
+ if(CPUID::has_avx2())
+ {
+ return 2;
+ }
+#endif
+
+ return 1;
+ }
+
std::string Threefish_512::provider() const
{
#if defined(BOTAN_HAS_THREEFISH_512_AVX2)
diff --git a/src/lib/block/threefish/threefish.h b/src/lib/block/threefish/threefish.h
index 8fe690f52..cdd27cb11 100644
--- a/src/lib/block/threefish/threefish.h
+++ b/src/lib/block/threefish/threefish.h
@@ -27,6 +27,8 @@ class BOTAN_DLL Threefish_512 final : public Block_Cipher_Fixed_Params<64, 64>
std::string provider() const override;
std::string name() const override { return "Threefish-512"; }
BlockCipher* clone() const override { return new Threefish_512; }
+ size_t parallelism() const override;
+
protected:
const secure_vector<uint64_t>& get_T() const { return m_T; }
const secure_vector<uint64_t>& get_K() const { return m_K; }
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h
index 01b6a7ef0..def933f4a 100644
--- a/src/lib/utils/simd/simd_32.h
+++ b/src/lib/utils/simd/simd_32.h
@@ -282,6 +282,64 @@ class SIMD_4x32 final
#endif
}
+
+ /*
+ Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3)
+ */
+ SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const
+ {
+ SIMD_4x32 res;
+
+#if defined(BOTAN_SIMD_USE_SSE2)
+
+ res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)),
+ _mm_srli_epi32(m_sse, static_cast<int>(rot1)));
+ res.m_sse = _mm_xor_si128(
+ res.m_sse,
+ _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)),
+ _mm_srli_epi32(m_sse, static_cast<int>(rot2))));
+ res.m_sse = _mm_xor_si128(
+ res.m_sse,
+ _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)),
+ _mm_srli_epi32(m_sse, static_cast<int>(rot3))));
+
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+
+ const unsigned int r1 = static_cast<unsigned int>(32-rot1);
+ const unsigned int r2 = static_cast<unsigned int>(32-rot2);
+ const unsigned int r3 = static_cast<unsigned int>(32-rot3);
+ res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1});
+ res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2}));
+ res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3}));
+
+#elif defined(BOTAN_SIMD_USE_NEON)
+ res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)),
+ vshrq_n_u32(m_neon, static_cast<int>(rot1)));
+
+ res.m_neon = veorq_u32(
+ res.m_neon,
+ vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)),
+ vshrq_n_u32(m_neon, static_cast<int>(rot2))));
+
+ res.m_neon = veorq_u32(
+ res.m_neon,
+ vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)),
+ vshrq_n_u32(m_neon, static_cast<int>(rot3))));
+
+#else
+
+ for(size_t i = 0; i != 4; ++i)
+ {
+ res.m_scalar[i] =
+ Botan::rotate_right(m_scalar[i], rot1) ^
+ Botan::rotate_right(m_scalar[i], rot2) ^
+ Botan::rotate_right(m_scalar[i], rot3);
+ }
+#endif
+
+ return res;
+ }
+
/**
* Rotate each element of SIMD register n bits left
*/