aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/stream/chacha
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2016-08-31 17:09:23 -0400
committerJack Lloyd <[email protected]>2016-09-01 13:20:05 -0400
commit858e3be10396e082901b612ee8c5e18cd3e47286 (patch)
tree6df7679900e75f1fee8c44340ac29fc742a92831 /src/lib/stream/chacha
parente4656be6a8e601b64c759906bacf543388b3cf22 (diff)
SSE2 ChaCha
Diffstat (limited to 'src/lib/stream/chacha')
-rw-r--r--src/lib/stream/chacha/chacha.cpp18
-rw-r--r--src/lib/stream/chacha/chacha.h6
-rw-r--r--src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp90
-rw-r--r--src/lib/stream/chacha/chacha_sse2/info.txt3
4 files changed, 111 insertions, 6 deletions
diff --git a/src/lib/stream/chacha/chacha.cpp b/src/lib/stream/chacha/chacha.cpp
index 40da93029..97b6465f9 100644
--- a/src/lib/stream/chacha/chacha.cpp
+++ b/src/lib/stream/chacha/chacha.cpp
@@ -7,6 +7,7 @@
#include <botan/chacha.h>
#include <botan/loadstor.h>
+#include <botan/cpuid.h>
namespace Botan {
@@ -16,12 +17,18 @@ ChaCha::ChaCha(size_t rounds) : m_rounds(rounds)
throw Invalid_Argument("ChaCha only supports 8, 12 or 20 rounds");
}
-namespace {
-
-void chacha(byte output[64], const u32bit input[16], size_t rounds)
+//static
+void ChaCha::chacha(byte output[64], const u32bit input[16], size_t rounds)
{
BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
+ #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
+ if(CPUID::has_sse2())
+ {
+ return ChaCha::chacha_sse2(output, input, rounds);
+ }
+ #endif
+
u32bit x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3],
x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
@@ -67,7 +74,6 @@ void chacha(byte output[64], const u32bit input[16], size_t rounds)
store_le(x14 + input[14], output + 4 * 14);
store_le(x15 + input[15], output + 4 * 15);
}
-}
/*
* Combine cipher stream with message
@@ -80,7 +86,7 @@ void ChaCha::cipher(const byte in[], byte out[], size_t length)
length -= (m_buffer.size() - m_position);
in += (m_buffer.size() - m_position);
out += (m_buffer.size() - m_position);
- chacha(m_buffer.data(), m_state.data(), m_rounds);
+ chacha_sse2(m_buffer.data(), m_state.data(), m_rounds);
++m_state[12];
m_state[13] += (m_state[12] == 0);
@@ -176,7 +182,7 @@ void ChaCha::seek(u64bit offset)
{
if (m_state.size() == 0 && m_buffer.size() == 0)
{
- throw Invalid_State("You have to setup the stream cipher (key and iv)");
+ throw Invalid_State("You have to setup the stream cipher (key and iv)");
}
m_position = offset % m_buffer.size();
diff --git a/src/lib/stream/chacha/chacha.h b/src/lib/stream/chacha/chacha.h
index f8f42e41d..ab28f9563 100644
--- a/src/lib/stream/chacha/chacha.h
+++ b/src/lib/stream/chacha/chacha.h
@@ -47,6 +47,12 @@ class BOTAN_DLL ChaCha final : public StreamCipher
private:
void key_schedule(const byte key[], size_t key_len) override;
+ void chacha(byte output[64], const u32bit input[16], size_t rounds);
+
+#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
+ void chacha_sse2(byte output[64], const u32bit input[16], size_t rounds);
+#endif
+
size_t m_rounds;
secure_vector<u32bit> m_state;
secure_vector<byte> m_buffer;
diff --git a/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp b/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp
new file mode 100644
index 000000000..aa1ca45ff
--- /dev/null
+++ b/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp
@@ -0,0 +1,90 @@
+/*
+* SSE2 ChaCha
+* (C) 2016 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/chacha.h>
+#include <emmintrin.h>
+
+namespace Botan {
+
+//static
+void ChaCha::chacha_sse2(byte output[64], const u32bit input[16], size_t rounds)
+ {
+ BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
+
+ const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
+
+ const __m128i input0 = _mm_loadu_si128(input_mm);
+ const __m128i input1 = _mm_loadu_si128(input_mm + 1);
+ const __m128i input2 = _mm_loadu_si128(input_mm + 2);
+ const __m128i input3 = _mm_loadu_si128(input_mm + 3);
+
+ __m128i r0 = input0;
+ __m128i r1 = input1;
+ __m128i r2 = input2;
+ __m128i r3 = input3;
+
+#define mm_rotl(r, n) \
+ _mm_or_si128(_mm_slli_epi32(r, n), _mm_srli_epi32(r, 32-n))
+
+ for(size_t i = 0; i != rounds / 2; ++i)
+ {
+ r0 = _mm_add_epi32(r0, r1);
+ r3 = _mm_xor_si128(r3, r0);
+ r3 = mm_rotl(r3, 16);
+
+ r2 = _mm_add_epi32(r2, r3);
+ r1 = _mm_xor_si128(r1, r2);
+ r1 = mm_rotl(r1, 12);
+
+ r0 = _mm_add_epi32(r0, r1);
+ r3 = _mm_xor_si128(r3, r0);
+ r3 = mm_rotl(r3, 8);
+
+ r2 = _mm_add_epi32(r2, r3);
+ r1 = _mm_xor_si128(r1, r2);
+ r1 = mm_rotl(r1, 7);
+
+ r1 = _mm_shuffle_epi32(r1, _MM_SHUFFLE(0, 3, 2, 1));
+ r2 = _mm_shuffle_epi32(r2, _MM_SHUFFLE(1, 0, 3, 2));
+ r3 = _mm_shuffle_epi32(r3, _MM_SHUFFLE(2, 1, 0, 3));
+
+ r0 = _mm_add_epi32(r0, r1);
+ r3 = _mm_xor_si128(r3, r0);
+ r3 = mm_rotl(r3, 16);
+
+ r2 = _mm_add_epi32(r2, r3);
+ r1 = _mm_xor_si128(r1, r2);
+ r1 = mm_rotl(r1, 12);
+
+ r0 = _mm_add_epi32(r0, r1);
+ r3 = _mm_xor_si128(r3, r0);
+ r3 = mm_rotl(r3, 8);
+
+ r2 = _mm_add_epi32(r2, r3);
+ r1 = _mm_xor_si128(r1, r2);
+ r1 = mm_rotl(r1, 7);
+
+ r1 = _mm_shuffle_epi32(r1, _MM_SHUFFLE(2, 1, 0, 3));
+ r2 = _mm_shuffle_epi32(r2, _MM_SHUFFLE(1, 0, 3, 2));
+ r3 = _mm_shuffle_epi32(r3, _MM_SHUFFLE(0, 3, 2, 1));
+ }
+
+#undef mm_rotl
+
+ r0 = _mm_add_epi32(r0, input0);
+ r1 = _mm_add_epi32(r1, input1);
+ r2 = _mm_add_epi32(r2, input2);
+ r3 = _mm_add_epi32(r3, input3);
+
+ __m128i* output_mm = reinterpret_cast<__m128i*>(output);
+ _mm_storeu_si128(output_mm , r0);
+ _mm_storeu_si128(output_mm + 1, r1);
+ _mm_storeu_si128(output_mm + 2, r2);
+ _mm_storeu_si128(output_mm + 3, r3);
+ }
+
+}
diff --git a/src/lib/stream/chacha/chacha_sse2/info.txt b/src/lib/stream/chacha/chacha_sse2/info.txt
new file mode 100644
index 000000000..965479746
--- /dev/null
+++ b/src/lib/stream/chacha/chacha_sse2/info.txt
@@ -0,0 +1,3 @@
+define CHACHA_SSE2 20160831
+
+need_isa sse2