aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/block/shacal2
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-08-13 08:38:26 -0400
committerJack Lloyd <[email protected]>2017-08-13 08:38:26 -0400
commit8804eeece5af90b728e337c7e0877549ea74eec9 (patch)
tree558c692afe04770efc3077bba0f3247e99188231 /src/lib/block/shacal2
parent61a7a5757bfe19ff1a686055ab75cae1fc9f485e (diff)
Add SHACAL2 in generic SIMD
Bit over 2x faster on my desktop
Diffstat (limited to 'src/lib/block/shacal2')
-rw-r--r--src/lib/block/shacal2/shacal2.cpp27
-rw-r--r--src/lib/block/shacal2/shacal2.h5
-rw-r--r--src/lib/block/shacal2/shacal2_simd/info.txt8
-rw-r--r--src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp117
4 files changed, 157 insertions, 0 deletions
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp
index 1b24d0cef..ea3cd9319 100644
--- a/src/lib/block/shacal2/shacal2.cpp
+++ b/src/lib/block/shacal2/shacal2.cpp
@@ -7,6 +7,7 @@
#include <botan/shacal2.h>
#include <botan/loadstor.h>
+#include <botan/cpuid.h>
namespace Botan {
@@ -43,6 +44,19 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
*/
void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ while(blocks >= 4)
+ {
+ simd_encrypt_4(in, out);
+ in += 4*BLOCK_SIZE;
+ out += 4*BLOCK_SIZE;
+ blocks -= 4;
+ }
+ }
+#endif
+
for(size_t i = 0; i != blocks; ++i)
{
uint32_t A = load_be<uint32_t>(in, 0);
@@ -78,6 +92,19 @@ void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
*/
void SHACAL2::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ while(blocks >= 4)
+ {
+ simd_decrypt_4(in, out);
+ in += 4*BLOCK_SIZE;
+ out += 4*BLOCK_SIZE;
+ blocks -= 4;
+ }
+ }
+#endif
+
for(size_t i = 0; i != blocks; ++i)
{
uint32_t A = load_be<uint32_t>(in, 0);
diff --git a/src/lib/block/shacal2/shacal2.h b/src/lib/block/shacal2/shacal2.h
index 14fde2fbc..f0ade5e50 100644
--- a/src/lib/block/shacal2/shacal2.h
+++ b/src/lib/block/shacal2/shacal2.h
@@ -28,6 +28,11 @@ class BOTAN_DLL SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4>
private:
void key_schedule(const uint8_t[], size_t) override;
+#if defined(BOTAN_HAS_SHACAL2_SIMD)
+ void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const;
+ void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const;
+#endif
+
secure_vector<uint32_t> m_RK;
};
diff --git a/src/lib/block/shacal2/shacal2_simd/info.txt b/src/lib/block/shacal2/shacal2_simd/info.txt
new file mode 100644
index 000000000..8d715c668
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_simd/info.txt
@@ -0,0 +1,8 @@
+<defines>
+SHACAL2_SIMD -> 20170813
+</defines>
+
+<requires>
+shacal2
+simd
+</requires>
diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
new file mode 100644
index 000000000..349fe5722
--- /dev/null
+++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
@@ -0,0 +1,117 @@
+/*
+* SHACAL-2 using SIMD
+* (C) 2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/shacal2.h>
+#include <botan/internal/simd_32.h>
+
+namespace Botan {
+
+namespace {
+
+inline void SHACAL2_Fwd(SIMD_32 A, SIMD_32 B, SIMD_32 C, SIMD_32& D,
+ SIMD_32 E, SIMD_32 F, SIMD_32 G, SIMD_32& H,
+ uint32_t RK)
+ {
+ H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+ D += H;
+ H += A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+ }
+
+inline void SHACAL2_Rev(SIMD_32 A, SIMD_32 B, SIMD_32 C, SIMD_32& D,
+ SIMD_32 E, SIMD_32 F, SIMD_32 G, SIMD_32& H,
+ uint32_t RK)
+ {
+ H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+ D -= H;
+ H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+ }
+
+}
+
+void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const
+ {
+ SIMD_4x32 A = SIMD_4x32::load_be(in);
+ SIMD_4x32 E = SIMD_4x32::load_be(in+16);
+ SIMD_4x32 B = SIMD_4x32::load_be(in+32);
+ SIMD_4x32 F = SIMD_4x32::load_be(in+48);
+
+ SIMD_4x32 C = SIMD_4x32::load_be(in+64);
+ SIMD_4x32 G = SIMD_4x32::load_be(in+80);
+ SIMD_4x32 D = SIMD_4x32::load_be(in+96);
+ SIMD_4x32 H = SIMD_4x32::load_be(in+112);
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ for(size_t r = 0; r != 64; r += 8)
+ {
+ SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]);
+ SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]);
+ SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]);
+ SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]);
+ SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]);
+ SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]);
+ SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]);
+ SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]);
+ }
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ A.store_be(out);
+ E.store_be(out+16);
+ B.store_be(out+32);
+ F.store_be(out+48);
+
+ C.store_be(out+64);
+ G.store_be(out+80);
+ D.store_be(out+96);
+ H.store_be(out+112);
+ }
+
+void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const
+ {
+ SIMD_4x32 A = SIMD_4x32::load_be(in);
+ SIMD_4x32 E = SIMD_4x32::load_be(in+16);
+ SIMD_4x32 B = SIMD_4x32::load_be(in+32);
+ SIMD_4x32 F = SIMD_4x32::load_be(in+48);
+
+ SIMD_4x32 C = SIMD_4x32::load_be(in+64);
+ SIMD_4x32 G = SIMD_4x32::load_be(in+80);
+ SIMD_4x32 D = SIMD_4x32::load_be(in+96);
+ SIMD_4x32 H = SIMD_4x32::load_be(in+112);
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ for(size_t r = 0; r != 64; r += 8)
+ {
+ SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]);
+ SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]);
+ SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]);
+ SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]);
+ SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]);
+ SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]);
+ SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]);
+ SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]);
+ }
+
+ SIMD_4x32::transpose(A, B, C, D);
+ SIMD_4x32::transpose(E, F, G, H);
+
+ A.store_be(out);
+ E.store_be(out+16);
+ B.store_be(out+32);
+ F.store_be(out+48);
+
+ C.store_be(out+64);
+ G.store_be(out+80);
+ D.store_be(out+96);
+ H.store_be(out+112);
+ }
+
+}