aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-08-14 07:40:49 -0400
committerJack Lloyd <[email protected]>2017-08-14 07:40:49 -0400
commitb8a691fb0d44eb67886179810768ac9453963cea (patch)
treeae597cbd061951be669e8feae5001ca94c42b13e /src/lib
parent7e2c92527209d47098c0b7b9712fafcd2455590e (diff)
Add support for SHACAL2 using x86 SHA extensions
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/block/shacal2/shacal2.cpp21
-rw-r--r--src/lib/block/shacal2/shacal2.h4
-rw-r--r--src/lib/block/shacal2/shacal2_x86/info.txt16
-rw-r--r--src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp68
4 files changed, 109 insertions, 0 deletions
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp
index faf0a2d81..30ad711db 100644
--- a/src/lib/block/shacal2/shacal2.cpp
+++ b/src/lib/block/shacal2/shacal2.cpp
@@ -44,6 +44,13 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
*/
void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return x86_encrypt_blocks(in, out, blocks);
+ }
+#endif
+
#if defined(BOTAN_HAS_SHACAL2_SIMD)
if(CPUID::has_simd_32())
{
@@ -181,6 +188,13 @@ void SHACAL2::key_schedule(const uint8_t key[], size_t len)
size_t SHACAL2::parallelism() const
{
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return 4;
+ }
+#endif
+
#if defined(BOTAN_HAS_SHACAL2_SIMD)
if(CPUID::has_simd_32())
{
@@ -193,6 +207,13 @@ size_t SHACAL2::parallelism() const
std::string SHACAL2::provider() const
{
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ if(CPUID::has_intel_sha())
+ {
+ return "intel_sha";
+ }
+#endif
+
#if defined(BOTAN_HAS_SHACAL2_SIMD)
if(CPUID::has_simd_32())
{
diff --git a/src/lib/block/shacal2/shacal2.h b/src/lib/block/shacal2/shacal2.h
index 7e1fa4ac0..d63c1453d 100644
--- a/src/lib/block/shacal2/shacal2.h
+++ b/src/lib/block/shacal2/shacal2.h
@@ -35,6 +35,10 @@ class BOTAN_DLL SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4>
void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const;
#endif
+#if defined(BOTAN_HAS_SHACAL2_X86)
+ void x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
secure_vector<uint32_t> m_RK;
};
diff --git a/src/lib/block/shacal2/shacal2_x86/info.txt b/src/lib/block/shacal2/shacal2_x86/info.txt
new file mode 100644
index 000000000..b8d6a50b7
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_x86/info.txt
@@ -0,0 +1,16 @@
+<defines>
+SHACAL2_X86 -> 20170814
+</defines>
+
+<requires>
+shacal2
+</requires>
+
+need_isa sha,sse4.1
+
+<cc>
+gcc:5.0
+clang:3.9
+msvc:19.0 # MSVS 2015
+</cc>
+
diff --git a/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp
new file mode 100644
index 000000000..5a346698a
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp
@@ -0,0 +1,68 @@
+/*
+* SHACAL-2 using x86 SHA extensions
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/shacal2.h>
+#include <immintrin.h>
+
+namespace Botan {
+
+/*
+Only encryption is supported since the inverse round function would
+require a different instruction
+*/
+
+void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ // TODO x4 unrolling
+
+ const __m128i BSWAP_MASK = _mm_set_epi64x(0x0C0D0E0F08090A0B, 0x0405060700010203);
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ while(blocks)
+ {
+ __m128i B0 = _mm_loadu_si128(in_mm);
+ __m128i B1 = _mm_loadu_si128(in_mm+1);
+
+ B0 = _mm_shuffle_epi8(B0, BSWAP_MASK);
+ B1 = _mm_shuffle_epi8(B1, BSWAP_MASK);
+
+ B0 = _mm_shuffle_epi32(B0, 0xB1); // CDAB
+ B1 = _mm_shuffle_epi32(B1, 0x1B); // EFGH
+
+ __m128i TMP = _mm_alignr_epi8(B0, B1, 8); // ABEF
+ B1 = _mm_blend_epi16(B1, B0, 0xF0); // CDGH
+ B0 = TMP;
+
+ for(size_t i = 0; i != 8; ++i)
+ {
+ B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+1],m_RK[8*i+0]));
+ B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+3],m_RK[8*i+2]));
+ B1 = _mm_sha256rnds2_epu32(B1, B0, _mm_set_epi32(0,0,m_RK[8*i+5],m_RK[8*i+4]));
+ B0 = _mm_sha256rnds2_epu32(B0, B1, _mm_set_epi32(0,0,m_RK[8*i+7],m_RK[8*i+6]));
+ }
+
+ TMP = _mm_shuffle_epi32(B0, 0x1B); // FEBA
+ B1 = _mm_shuffle_epi32(B1, 0xB1); // DCHG
+ B0 = _mm_blend_epi16(TMP, B1, 0xF0); // DCBA
+ B1 = _mm_alignr_epi8(B1, TMP, 8); // ABEF
+
+ B0 = _mm_shuffle_epi8(B0, BSWAP_MASK);
+ B1 = _mm_shuffle_epi8(B1, BSWAP_MASK);
+
+ // Save state
+ _mm_storeu_si128(out_mm, B0);
+ _mm_storeu_si128(out_mm + 1, B1);
+
+ blocks--;
+ in_mm += 2;
+ out_mm += 2;
+ }
+ }
+
+}