aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils/simd
diff options
context:
space:
mode:
authorlloyd <[email protected]>2015-02-04 04:03:38 +0000
committerlloyd <[email protected]>2015-02-04 04:03:38 +0000
commit0dd060fed07b0060f94e3bae62e125a85c1bb877 (patch)
treeed4bc7a961e2b30f17ed5e80769c84b0c313c8b7 /src/lib/utils/simd
parentf9a7c85b74be0f4a7273e8e0591703af83036e81 (diff)
Remove algo factory, engines, global RNG, global state, etc.
Convert all uses of Algorithm_Factory and the engines to using Algo_Registry The shared pool of entropy sources remains but is moved to EntropySource. With that and few remaining initializations (default OIDs and aliases) moved elsewhere, the global state is empty and init and shutdown are no-ops. Remove almost all of the headers and code for handling the global state, except LibraryInitializer which remains as a compatability stub. Update seeding for blinding so only one hacky almost-global RNG instance needs to be setup instead of across all pubkey uses (it uses either the system RNG or an AutoSeeded_RNG if the system RNG is not available).
Diffstat (limited to 'src/lib/utils/simd')
-rw-r--r--src/lib/utils/simd/info.txt9
-rw-r--r--src/lib/utils/simd/simd_32.h30
-rw-r--r--src/lib/utils/simd/simd_altivec/info.txt9
-rw-r--r--src/lib/utils/simd/simd_altivec/simd_altivec.h215
-rw-r--r--src/lib/utils/simd/simd_scalar/info.txt7
-rw-r--r--src/lib/utils/simd/simd_scalar/simd_scalar.h215
-rw-r--r--src/lib/utils/simd/simd_sse2/info.txt9
-rw-r--r--src/lib/utils/simd/simd_sse2/simd_sse2.h169
8 files changed, 663 insertions, 0 deletions
diff --git a/src/lib/utils/simd/info.txt b/src/lib/utils/simd/info.txt
new file mode 100644
index 000000000..35620c940
--- /dev/null
+++ b/src/lib/utils/simd/info.txt
@@ -0,0 +1,9 @@
+define SIMD_32 20131128
+
+<header:internal>
+simd_32.h
+</header:internal>
+
+<requires>
+simd_sse2|simd_altivec|simd_scalar
+</requires>
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h
new file mode 100644
index 000000000..265e347a9
--- /dev/null
+++ b/src/lib/utils/simd/simd_32.h
@@ -0,0 +1,30 @@
+/*
+* Lightweight wrappers for SIMD operations
+* (C) 2009,2011 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SIMD_32_H__
+#define BOTAN_SIMD_32_H__
+
+#include <botan/types.h>
+
+#if defined(BOTAN_HAS_SIMD_SSE2)
+ #include <botan/internal/simd_sse2.h>
+ namespace Botan { typedef SIMD_SSE2 SIMD_32; }
+
+#elif defined(BOTAN_HAS_SIMD_ALTIVEC)
+ #include <botan/internal/simd_altivec.h>
+ namespace Botan { typedef SIMD_Altivec SIMD_32; }
+
+#elif defined(BOTAN_HAS_SIMD_SCALAR)
+ #include <botan/internal/simd_scalar.h>
+ namespace Botan { typedef SIMD_Scalar<u32bit,4> SIMD_32; }
+
+#else
+ #error "No SIMD module defined"
+
+#endif
+
+#endif
diff --git a/src/lib/utils/simd/simd_altivec/info.txt b/src/lib/utils/simd/simd_altivec/info.txt
new file mode 100644
index 000000000..19168a928
--- /dev/null
+++ b/src/lib/utils/simd/simd_altivec/info.txt
@@ -0,0 +1,9 @@
+define SIMD_ALTIVEC 20131128
+
+need_isa altivec
+
+load_on dep
+
+<header:internal>
+simd_altivec.h
+</header:internal>
diff --git a/src/lib/utils/simd/simd_altivec/simd_altivec.h b/src/lib/utils/simd/simd_altivec/simd_altivec.h
new file mode 100644
index 000000000..32533aafb
--- /dev/null
+++ b/src/lib/utils/simd/simd_altivec/simd_altivec.h
@@ -0,0 +1,215 @@
+/*
+* Lightweight wrappers around AltiVec for 32-bit operations
+* (C) 2009 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SIMD_ALTIVEC_H__
+#define BOTAN_SIMD_ALTIVEC_H__
+
+#if defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
+
+#include <botan/loadstor.h>
+#include <botan/cpuid.h>
+
+#include <altivec.h>
+#undef vector
+#undef bool
+
+namespace Botan {
+
+class SIMD_Altivec
+ {
+ public:
+ static bool enabled() { return CPUID::has_altivec(); }
+
+ SIMD_Altivec(const u32bit B[4])
+ {
+ m_reg = (__vector unsigned int){B[0], B[1], B[2], B[3]};
+ }
+
+ SIMD_Altivec(u32bit B0, u32bit B1, u32bit B2, u32bit B3)
+ {
+ m_reg = (__vector unsigned int){B0, B1, B2, B3};
+ }
+
+ SIMD_Altivec(u32bit B)
+ {
+ m_reg = (__vector unsigned int){B, B, B, B};
+ }
+
+ static SIMD_Altivec load_le(const void* in)
+ {
+ const u32bit* in_32 = static_cast<const u32bit*>(in);
+
+ __vector unsigned int R0 = vec_ld(0, in_32);
+ __vector unsigned int R1 = vec_ld(12, in_32);
+
+ __vector unsigned char perm = vec_lvsl(0, in_32);
+
+ perm = vec_xor(perm, vec_splat_u8(3));
+
+ R0 = vec_perm(R0, R1, perm);
+
+ return SIMD_Altivec(R0);
+ }
+
+ static SIMD_Altivec load_be(const void* in)
+ {
+ const u32bit* in_32 = static_cast<const u32bit*>(in);
+
+ __vector unsigned int R0 = vec_ld(0, in_32);
+ __vector unsigned int R1 = vec_ld(12, in_32);
+
+ __vector unsigned char perm = vec_lvsl(0, in_32);
+
+ R0 = vec_perm(R0, R1, perm);
+
+ return SIMD_Altivec(R0);
+ }
+
+ void store_le(byte out[]) const
+ {
+ __vector unsigned char perm = vec_lvsl(0, (u32bit*)0);
+
+ perm = vec_xor(perm, vec_splat_u8(3));
+
+ union {
+ __vector unsigned int V;
+ u32bit R[4];
+ } vec;
+
+ vec.V = vec_perm(m_reg, m_reg, perm);
+
+ Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
+ }
+
+ void store_be(byte out[]) const
+ {
+ union {
+ __vector unsigned int V;
+ u32bit R[4];
+ } vec;
+
+ vec.V = m_reg;
+
+ Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
+ }
+
+ void rotate_left(size_t rot)
+ {
+ const unsigned int r = static_cast<unsigned int>(rot);
+ m_reg = vec_rl(m_reg, (__vector unsigned int){r, r, r, r});
+ }
+
+ void rotate_right(size_t rot)
+ {
+ rotate_left(32 - rot);
+ }
+
+ void operator+=(const SIMD_Altivec& other)
+ {
+ m_reg = vec_add(m_reg, other.m_reg);
+ }
+
+ SIMD_Altivec operator+(const SIMD_Altivec& other) const
+ {
+ return vec_add(m_reg, other.m_reg);
+ }
+
+ void operator-=(const SIMD_Altivec& other)
+ {
+ m_reg = vec_sub(m_reg, other.m_reg);
+ }
+
+ SIMD_Altivec operator-(const SIMD_Altivec& other) const
+ {
+ return vec_sub(m_reg, other.m_reg);
+ }
+
+ void operator^=(const SIMD_Altivec& other)
+ {
+ m_reg = vec_xor(m_reg, other.m_reg);
+ }
+
+ SIMD_Altivec operator^(const SIMD_Altivec& other) const
+ {
+ return vec_xor(m_reg, other.m_reg);
+ }
+
+ void operator|=(const SIMD_Altivec& other)
+ {
+ m_reg = vec_or(m_reg, other.m_reg);
+ }
+
+ SIMD_Altivec operator&(const SIMD_Altivec& other)
+ {
+ return vec_and(m_reg, other.m_reg);
+ }
+
+ void operator&=(const SIMD_Altivec& other)
+ {
+ m_reg = vec_and(m_reg, other.m_reg);
+ }
+
+ SIMD_Altivec operator<<(size_t shift) const
+ {
+ const unsigned int s = static_cast<unsigned int>(shift);
+ return vec_sl(m_reg, (__vector unsigned int){s, s, s, s});
+ }
+
+ SIMD_Altivec operator>>(size_t shift) const
+ {
+ const unsigned int s = static_cast<unsigned int>(shift);
+ return vec_sr(m_reg, (__vector unsigned int){s, s, s, s});
+ }
+
+ SIMD_Altivec operator~() const
+ {
+ return vec_nor(m_reg, m_reg);
+ }
+
+ SIMD_Altivec andc(const SIMD_Altivec& other)
+ {
+ /*
+ AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
+ so swap the arguments
+ */
+ return vec_andc(other.m_reg, m_reg);
+ }
+
+ SIMD_Altivec bswap() const
+ {
+ __vector unsigned char perm = vec_lvsl(0, (u32bit*)0);
+
+ perm = vec_xor(perm, vec_splat_u8(3));
+
+ return SIMD_Altivec(vec_perm(m_reg, m_reg, perm));
+ }
+
+ static void transpose(SIMD_Altivec& B0, SIMD_Altivec& B1,
+ SIMD_Altivec& B2, SIMD_Altivec& B3)
+ {
+ __vector unsigned int T0 = vec_mergeh(B0.m_reg, B2.m_reg);
+ __vector unsigned int T1 = vec_mergel(B0.m_reg, B2.m_reg);
+ __vector unsigned int T2 = vec_mergeh(B1.m_reg, B3.m_reg);
+ __vector unsigned int T3 = vec_mergel(B1.m_reg, B3.m_reg);
+
+ B0.m_reg = vec_mergeh(T0, T2);
+ B1.m_reg = vec_mergel(T0, T2);
+ B2.m_reg = vec_mergeh(T1, T3);
+ B3.m_reg = vec_mergel(T1, T3);
+ }
+
+ private:
+ SIMD_Altivec(__vector unsigned int input) { m_reg = input; }
+
+ __vector unsigned int m_reg;
+ };
+
+}
+
+#endif
+
+#endif
diff --git a/src/lib/utils/simd/simd_scalar/info.txt b/src/lib/utils/simd/simd_scalar/info.txt
new file mode 100644
index 000000000..26a9fbfee
--- /dev/null
+++ b/src/lib/utils/simd/simd_scalar/info.txt
@@ -0,0 +1,7 @@
+define SIMD_SCALAR 20131128
+
+load_on dep
+
+<header:internal>
+simd_scalar.h
+</header:internal>
diff --git a/src/lib/utils/simd/simd_scalar/simd_scalar.h b/src/lib/utils/simd/simd_scalar/simd_scalar.h
new file mode 100644
index 000000000..379e2d6a8
--- /dev/null
+++ b/src/lib/utils/simd/simd_scalar/simd_scalar.h
@@ -0,0 +1,215 @@
+/*
+* Scalar emulation of SIMD
+* (C) 2009,2013 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SIMD_SCALAR_H__
+#define BOTAN_SIMD_SCALAR_H__
+
+#include <botan/loadstor.h>
+#include <botan/bswap.h>
+
+namespace Botan {
+
+/**
+* Fake SIMD, using plain scalar operations
+* Often still faster than iterative on superscalar machines
+*/
+template<typename T, size_t N>
+class SIMD_Scalar
+ {
+ public:
+ static bool enabled() { return true; }
+
+ static size_t size() { return N; }
+
+ SIMD_Scalar() { /* uninitialized */ }
+
+ SIMD_Scalar(const T B[N])
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = B[i];
+ }
+
+ SIMD_Scalar(T B)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = B;
+ }
+
+ static SIMD_Scalar<T,N> load_le(const void* in)
+ {
+ SIMD_Scalar<T,N> out;
+ const byte* in_b = static_cast<const byte*>(in);
+
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = Botan::load_le<T>(in_b, i);
+
+ return out;
+ }
+
+ static SIMD_Scalar<T,N> load_be(const void* in)
+ {
+ SIMD_Scalar<T,N> out;
+ const byte* in_b = static_cast<const byte*>(in);
+
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = Botan::load_be<T>(in_b, i);
+
+ return out;
+ }
+
+ void store_le(byte out[]) const
+ {
+ for(size_t i = 0; i != size(); ++i)
+ Botan::store_le(m_v[i], out + i*sizeof(T));
+ }
+
+ void store_be(byte out[]) const
+ {
+ for(size_t i = 0; i != size(); ++i)
+ Botan::store_be(m_v[i], out + i*sizeof(T));
+ }
+
+ void rotate_left(size_t rot)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = Botan::rotate_left(m_v[i], rot);
+ }
+
+ void rotate_right(size_t rot)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = Botan::rotate_right(m_v[i], rot);
+ }
+
+ void operator+=(const SIMD_Scalar<T,N>& other)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] += other.m_v[i];
+ }
+
+ void operator-=(const SIMD_Scalar<T,N>& other)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] -= other.m_v[i];
+ }
+
+ SIMD_Scalar<T,N> operator+(const SIMD_Scalar<T,N>& other) const
+ {
+ SIMD_Scalar<T,N> out = *this;
+ out += other;
+ return out;
+ }
+
+ SIMD_Scalar<T,N> operator-(const SIMD_Scalar<T,N>& other) const
+ {
+ SIMD_Scalar<T,N> out = *this;
+ out -= other;
+ return out;
+ }
+
+ void operator^=(const SIMD_Scalar<T,N>& other)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] ^= other.m_v[i];
+ }
+
+ SIMD_Scalar<T,N> operator^(const SIMD_Scalar<T,N>& other) const
+ {
+ SIMD_Scalar<T,N> out = *this;
+ out ^= other;
+ return out;
+ }
+
+ void operator|=(const SIMD_Scalar<T,N>& other)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] |= other.m_v[i];
+ }
+
+ void operator&=(const SIMD_Scalar<T,N>& other)
+ {
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] &= other.m_v[i];
+ }
+
+ SIMD_Scalar<T,N> operator&(const SIMD_Scalar<T,N>& other)
+ {
+ SIMD_Scalar<T,N> out = *this;
+ out &= other;
+ return out;
+ }
+
+ SIMD_Scalar<T,N> operator<<(size_t shift) const
+ {
+ SIMD_Scalar<T,N> out = *this;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] <<= shift;
+ return out;
+ }
+
+ SIMD_Scalar<T,N> operator>>(size_t shift) const
+ {
+ SIMD_Scalar<T,N> out = *this;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] >>= shift;
+ return out;
+ }
+
+ SIMD_Scalar<T,N> operator~() const
+ {
+ SIMD_Scalar<T,N> out = *this;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = ~out.m_v[i];
+ return out;
+ }
+
+ // (~reg) & other
+ SIMD_Scalar<T,N> andc(const SIMD_Scalar<T,N>& other)
+ {
+ SIMD_Scalar<T,N> out;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = (~m_v[i]) & other.m_v[i];
+ return out;
+ }
+
+ SIMD_Scalar<T,N> bswap() const
+ {
+ SIMD_Scalar<T,N> out;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = reverse_bytes(m_v[i]);
+ return out;
+ }
+
+ static void transpose(SIMD_Scalar<T,N>& B0, SIMD_Scalar<T,N>& B1,
+ SIMD_Scalar<T,N>& B2, SIMD_Scalar<T,N>& B3)
+ {
+ static_assert(N == 4, "4x4 transpose");
+ SIMD_Scalar<T,N> T0({B0.m_v[0], B1.m_v[0], B2.m_v[0], B3.m_v[0]});
+ SIMD_Scalar<T,N> T1({B0.m_v[1], B1.m_v[1], B2.m_v[1], B3.m_v[1]});
+ SIMD_Scalar<T,N> T2({B0.m_v[2], B1.m_v[2], B2.m_v[2], B3.m_v[2]});
+ SIMD_Scalar<T,N> T3({B0.m_v[3], B1.m_v[3], B2.m_v[3], B3.m_v[3]});
+
+ B0 = T0;
+ B1 = T1;
+ B2 = T2;
+ B3 = T3;
+ }
+
+ private:
+ SIMD_Scalar(std::initializer_list<T> B)
+ {
+ size_t i = 0;
+ for(auto v = B.begin(); v != B.end(); ++v)
+ m_v[i++] = *v;
+ }
+
+ T m_v[N];
+ };
+
+}
+
+#endif
diff --git a/src/lib/utils/simd/simd_sse2/info.txt b/src/lib/utils/simd/simd_sse2/info.txt
new file mode 100644
index 000000000..bd9e430cb
--- /dev/null
+++ b/src/lib/utils/simd/simd_sse2/info.txt
@@ -0,0 +1,9 @@
+define SIMD_SSE2 20131128
+
+need_isa sse2
+
+load_on dep
+
+<header:internal>
+simd_sse2.h
+</header:internal>
diff --git a/src/lib/utils/simd/simd_sse2/simd_sse2.h b/src/lib/utils/simd/simd_sse2/simd_sse2.h
new file mode 100644
index 000000000..61989eb8e
--- /dev/null
+++ b/src/lib/utils/simd/simd_sse2/simd_sse2.h
@@ -0,0 +1,169 @@
+/*
+* Lightweight wrappers for SSE2 intrinsics for 32-bit operations
+* (C) 2009 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SIMD_SSE_H__
+#define BOTAN_SIMD_SSE_H__
+
+#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
+
+#include <botan/cpuid.h>
+#include <emmintrin.h>
+
+namespace Botan {
+
+class SIMD_SSE2
+ {
+ public:
+ static bool enabled() { return CPUID::has_sse2(); }
+
+ SIMD_SSE2(const u32bit B[4])
+ {
+ reg = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
+ }
+
+ SIMD_SSE2(u32bit B0, u32bit B1, u32bit B2, u32bit B3)
+ {
+ reg = _mm_set_epi32(B0, B1, B2, B3);
+ }
+
+ SIMD_SSE2(u32bit B)
+ {
+ reg = _mm_set1_epi32(B);
+ }
+
+ static SIMD_SSE2 load_le(const void* in)
+ {
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(in));
+ }
+
+ static SIMD_SSE2 load_be(const void* in)
+ {
+ return load_le(in).bswap();
+ }
+
+ void store_le(byte out[]) const
+ {
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(out), reg);
+ }
+
+ void store_be(byte out[]) const
+ {
+ bswap().store_le(out);
+ }
+
+ void rotate_left(size_t rot)
+ {
+ reg = _mm_or_si128(_mm_slli_epi32(reg, static_cast<int>(rot)),
+ _mm_srli_epi32(reg, static_cast<int>(32-rot)));
+ }
+
+ void rotate_right(size_t rot)
+ {
+ rotate_left(32 - rot);
+ }
+
+ void operator+=(const SIMD_SSE2& other)
+ {
+ reg = _mm_add_epi32(reg, other.reg);
+ }
+
+ SIMD_SSE2 operator+(const SIMD_SSE2& other) const
+ {
+ return _mm_add_epi32(reg, other.reg);
+ }
+
+ void operator-=(const SIMD_SSE2& other)
+ {
+ reg = _mm_sub_epi32(reg, other.reg);
+ }
+
+ SIMD_SSE2 operator-(const SIMD_SSE2& other) const
+ {
+ return _mm_sub_epi32(reg, other.reg);
+ }
+
+ void operator^=(const SIMD_SSE2& other)
+ {
+ reg = _mm_xor_si128(reg, other.reg);
+ }
+
+ SIMD_SSE2 operator^(const SIMD_SSE2& other) const
+ {
+ return _mm_xor_si128(reg, other.reg);
+ }
+
+ void operator|=(const SIMD_SSE2& other)
+ {
+ reg = _mm_or_si128(reg, other.reg);
+ }
+
+ SIMD_SSE2 operator&(const SIMD_SSE2& other)
+ {
+ return _mm_and_si128(reg, other.reg);
+ }
+
+ void operator&=(const SIMD_SSE2& other)
+ {
+ reg = _mm_and_si128(reg, other.reg);
+ }
+
+ SIMD_SSE2 operator<<(size_t shift) const
+ {
+ return _mm_slli_epi32(reg, static_cast<int>(shift));
+ }
+
+ SIMD_SSE2 operator>>(size_t shift) const
+ {
+ return _mm_srli_epi32(reg, static_cast<int>(shift));
+ }
+
+ SIMD_SSE2 operator~() const
+ {
+ return _mm_xor_si128(reg, _mm_set1_epi32(0xFFFFFFFF));
+ }
+
+ // (~reg) & other
+ SIMD_SSE2 andc(const SIMD_SSE2& other)
+ {
+ return _mm_andnot_si128(reg, other.reg);
+ }
+
+ SIMD_SSE2 bswap() const
+ {
+ __m128i T = reg;
+
+ T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
+ T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
+
+ return _mm_or_si128(_mm_srli_epi16(T, 8),
+ _mm_slli_epi16(T, 8));
+ }
+
+ static void transpose(SIMD_SSE2& B0, SIMD_SSE2& B1,
+ SIMD_SSE2& B2, SIMD_SSE2& B3)
+ {
+ __m128i T0 = _mm_unpacklo_epi32(B0.reg, B1.reg);
+ __m128i T1 = _mm_unpacklo_epi32(B2.reg, B3.reg);
+ __m128i T2 = _mm_unpackhi_epi32(B0.reg, B1.reg);
+ __m128i T3 = _mm_unpackhi_epi32(B2.reg, B3.reg);
+ B0.reg = _mm_unpacklo_epi64(T0, T1);
+ B1.reg = _mm_unpackhi_epi64(T0, T1);
+ B2.reg = _mm_unpacklo_epi64(T2, T3);
+ B3.reg = _mm_unpackhi_epi64(T2, T3);
+ }
+
+ private:
+ SIMD_SSE2(__m128i in) { reg = in; }
+
+ __m128i reg;
+ };
+
+}
+
+#endif
+
+#endif