aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/simd/simd_32.h2
-rw-r--r--src/simd/simd_scalar/simd_scalar.h217
2 files changed, 105 insertions, 114 deletions
diff --git a/src/simd/simd_32.h b/src/simd/simd_32.h
index 17cf5b91e..15f882fea 100644
--- a/src/simd/simd_32.h
+++ b/src/simd/simd_32.h
@@ -20,7 +20,7 @@
#elif defined(BOTAN_HAS_SIMD_SCALAR)
#include <botan/internal/simd_scalar.h>
- namespace Botan { typedef SIMD_4_Scalar<u32bit> SIMD_32; }
+ namespace Botan { typedef SIMD_Scalar<u32bit,4> SIMD_32; }
#else
#error "No SIMD module defined"
diff --git a/src/simd/simd_scalar/simd_scalar.h b/src/simd/simd_scalar/simd_scalar.h
index cf1145794..55dd26d6f 100644
--- a/src/simd/simd_scalar/simd_scalar.h
+++ b/src/simd/simd_scalar/simd_scalar.h
@@ -17,197 +17,181 @@ namespace Botan {
* Fake SIMD, using plain scalar operations
* Often still faster than iterative on superscalar machines
*/
-template<typename T>
-class SIMD_4_Scalar
+template<typename T, size_t N>
+class SIMD_Scalar
{
public:
static bool enabled() { return true; }
- SIMD_4_Scalar(const T B[4])
- {
- R0 = B[0];
- R1 = B[1];
- R2 = B[2];
- R3 = B[3];
- }
+ static size_t size() { return N; }
+
+ SIMD_Scalar() { /* uninitialized */ }
- SIMD_4_Scalar(T B0, T B1, T B2, T B3)
+ SIMD_Scalar(const T B[N])
{
- R0 = B0;
- R1 = B1;
- R2 = B2;
- R3 = B3;
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = B[i];
}
- SIMD_4_Scalar(T B)
+ SIMD_Scalar(T B)
{
- R0 = B;
- R1 = B;
- R2 = B;
- R3 = B;
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = B;
}
- static SIMD_4_Scalar<T> load_le(const void* in)
+ static SIMD_Scalar<T,N> load_le(const void* in)
{
+ SIMD_Scalar<T,N> out;
const byte* in_b = static_cast<const byte*>(in);
- return SIMD_4_Scalar<T>(Botan::load_le<T>(in_b, 0),
- Botan::load_le<T>(in_b, 1),
- Botan::load_le<T>(in_b, 2),
- Botan::load_le<T>(in_b, 3));
+
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = Botan::load_le<T>(in_b, i);
+
+ return out;
}
- static SIMD_4_Scalar<T> load_be(const void* in)
+ static SIMD_Scalar<T,N> load_be(const void* in)
{
+ SIMD_Scalar<T,N> out;
const byte* in_b = static_cast<const byte*>(in);
- return SIMD_4_Scalar<T>(Botan::load_be<T>(in_b, 0),
- Botan::load_be<T>(in_b, 1),
- Botan::load_be<T>(in_b, 2),
- Botan::load_be<T>(in_b, 3));
+
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = Botan::load_be<T>(in_b, i);
+
+ return out;
}
void store_le(byte out[]) const
{
- Botan::store_le(out, R0, R1, R2, R3);
+ for(size_t i = 0; i != size(); ++i)
+ Botan::store_le(m_v[i], out + i*sizeof(T));
}
void store_be(byte out[]) const
{
- Botan::store_be(out, R0, R1, R2, R3);
+ for(size_t i = 0; i != size(); ++i)
+ Botan::store_be(m_v[i], out + i*sizeof(T));
}
void rotate_left(size_t rot)
{
- R0 = Botan::rotate_left(R0, rot);
- R1 = Botan::rotate_left(R1, rot);
- R2 = Botan::rotate_left(R2, rot);
- R3 = Botan::rotate_left(R3, rot);
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = Botan::rotate_left(m_v[i], rot);
}
void rotate_right(size_t rot)
{
- R0 = Botan::rotate_right(R0, rot);
- R1 = Botan::rotate_right(R1, rot);
- R2 = Botan::rotate_right(R2, rot);
- R3 = Botan::rotate_right(R3, rot);
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] = Botan::rotate_right(m_v[i], rot);
}
- void operator+=(const SIMD_4_Scalar<T>& other)
+ void operator+=(const SIMD_Scalar<T,N>& other)
{
- R0 += other.R0;
- R1 += other.R1;
- R2 += other.R2;
- R3 += other.R3;
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] += other.m_v[i];
}
- SIMD_4_Scalar<T> operator+(const SIMD_4_Scalar<T>& other) const
+ void operator-=(const SIMD_Scalar<T,N>& other)
{
- return SIMD_4_Scalar<T>(R0 + other.R0,
- R1 + other.R1,
- R2 + other.R2,
- R3 + other.R3);
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] -= other.m_v[i];
}
- void operator-=(const SIMD_4_Scalar<T>& other)
+ SIMD_Scalar<T,N> operator+(const SIMD_Scalar<T,N>& other) const
{
- R0 -= other.R0;
- R1 -= other.R1;
- R2 -= other.R2;
- R3 -= other.R3;
+ SIMD_Scalar<T,N> out = *this;
+ out += other;
+ return out;
}
- SIMD_4_Scalar<T> operator-(const SIMD_4_Scalar<T>& other) const
+ SIMD_Scalar<T,N> operator-(const SIMD_Scalar<T,N>& other) const
{
- return SIMD_4_Scalar<T>(R0 - other.R0,
- R1 - other.R1,
- R2 - other.R2,
- R3 - other.R3);
+ SIMD_Scalar<T,N> out = *this;
+ out -= other;
+ return out;
}
- void operator^=(const SIMD_4_Scalar<T>& other)
+ void operator^=(const SIMD_Scalar<T,N>& other)
{
- R0 ^= other.R0;
- R1 ^= other.R1;
- R2 ^= other.R2;
- R3 ^= other.R3;
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] ^= other.m_v[i];
}
- SIMD_4_Scalar<T> operator^(const SIMD_4_Scalar<T>& other) const
+ SIMD_Scalar<T,N> operator^(const SIMD_Scalar<T,N>& other) const
{
- return SIMD_4_Scalar<T>(R0 ^ other.R0,
- R1 ^ other.R1,
- R2 ^ other.R2,
- R3 ^ other.R3);
+ SIMD_Scalar<T,N> out = *this;
+ out ^= other;
+ return out;
}
- void operator|=(const SIMD_4_Scalar<T>& other)
+ void operator|=(const SIMD_Scalar<T,N>& other)
{
- R0 |= other.R0;
- R1 |= other.R1;
- R2 |= other.R2;
- R3 |= other.R3;
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] |= other.m_v[i];
}
- SIMD_4_Scalar<T> operator&(const SIMD_4_Scalar<T>& other)
+ void operator&=(const SIMD_Scalar<T,N>& other)
{
- return SIMD_4_Scalar<T>(R0 & other.R0,
- R1 & other.R1,
- R2 & other.R2,
- R3 & other.R3);
+ for(size_t i = 0; i != size(); ++i)
+ m_v[i] &= other.m_v[i];
}
- void operator&=(const SIMD_4_Scalar<T>& other)
+ SIMD_Scalar<T,N> operator&(const SIMD_Scalar<T,N>& other)
{
- R0 &= other.R0;
- R1 &= other.R1;
- R2 &= other.R2;
- R3 &= other.R3;
+ SIMD_Scalar<T,N> out = *this;
+ out &= other;
+ return out;
}
- SIMD_4_Scalar<T> operator<<(size_t shift) const
+ SIMD_Scalar<T,N> operator<<(size_t shift) const
{
- return SIMD_4_Scalar<T>(R0 << shift,
- R1 << shift,
- R2 << shift,
- R3 << shift);
+ SIMD_Scalar<T,N> out = *this;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] <<= shift;
+ return out;
}
- SIMD_4_Scalar<T> operator>>(size_t shift) const
+ SIMD_Scalar<T,N> operator>>(size_t shift) const
{
- return SIMD_4_Scalar<T>(R0 >> shift,
- R1 >> shift,
- R2 >> shift,
- R3 >> shift);
+ SIMD_Scalar<T,N> out = *this;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] >>= shift;
+ return out;
}
- SIMD_4_Scalar<T> operator~() const
+ SIMD_Scalar<T,N> operator~() const
{
- return SIMD_4_Scalar<T>(~R0, ~R1, ~R2, ~R3);
+ SIMD_Scalar<T,N> out = *this;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = ~out.m_v[i];
+ return out;
}
// (~reg) & other
- SIMD_4_Scalar<T> andc(const SIMD_4_Scalar<T>& other)
+ SIMD_Scalar<T,N> andc(const SIMD_Scalar<T,N>& other)
{
- return SIMD_4_Scalar<T>(~R0 & other.R0,
- ~R1 & other.R1,
- ~R2 & other.R2,
- ~R3 & other.R3);
+ SIMD_Scalar<T,N> out;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = (~m_v[i]) & other.m_v[i];
+ return out;
}
- SIMD_4_Scalar<T> bswap() const
+ SIMD_Scalar<T,N> bswap() const
{
- return SIMD_4_Scalar<T>(reverse_bytes(R0),
- reverse_bytes(R1),
- reverse_bytes(R2),
- reverse_bytes(R3));
+ SIMD_Scalar<T,N> out;
+ for(size_t i = 0; i != size(); ++i)
+ out.m_v[i] = reverse_bytes(m_v[i]);
+ return out;
}
- static void transpose(SIMD_4_Scalar<T>& B0, SIMD_4_Scalar<T>& B1,
- SIMD_4_Scalar<T>& B2, SIMD_4_Scalar<T>& B3)
+ static void transpose(SIMD_Scalar<T,N>& B0, SIMD_Scalar<T,N>& B1,
+ SIMD_Scalar<T,N>& B2, SIMD_Scalar<T,N>& B3)
{
- SIMD_4_Scalar<T> T0(B0.R0, B1.R0, B2.R0, B3.R0);
- SIMD_4_Scalar<T> T1(B0.R1, B1.R1, B2.R1, B3.R1);
- SIMD_4_Scalar<T> T2(B0.R2, B1.R2, B2.R2, B3.R2);
- SIMD_4_Scalar<T> T3(B0.R3, B1.R3, B2.R3, B3.R3);
+ static_assert(N == 4, "4x4 transpose");
+ SIMD_Scalar<T,N> T0({B0.m_v[0], B1.m_v[0], B2.m_v[0], B3.m_v[0]});
+ SIMD_Scalar<T,N> T1({B0.m_v[1], B1.m_v[1], B2.m_v[1], B3.m_v[1]});
+ SIMD_Scalar<T,N> T2({B0.m_v[2], B1.m_v[2], B2.m_v[2], B3.m_v[2]});
+ SIMD_Scalar<T,N> T3({B0.m_v[3], B1.m_v[3], B2.m_v[3], B3.m_v[3]});
B0 = T0;
B1 = T1;
@@ -216,7 +200,14 @@ class SIMD_4_Scalar
}
private:
- T R0, R1, R2, R3;
+ SIMD_Scalar(std::initializer_list<T> B)
+ {
+ size_t i = 0;
+ for(auto v = B.begin(); v != B.end(); ++v)
+ m_v[i++] = *v;
+ }
+
+ T m_v[N];
};
}