diff options
author | lloyd <[email protected]> | 2013-12-21 00:42:11 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2013-12-21 00:42:11 +0000 |
commit | 95751e32df0fc5a90c03f1caf9ea1962e501797d (patch) | |
tree | 998a7d47c866bb6f35009375e60c0fbc47fdd7ea | |
parent | 9d6c7524f5a6421530adee17d7843cb358180779 (diff) |
Make Simd_Scalar of generic size
-rw-r--r-- | src/simd/simd_32.h | 2 | ||||
-rw-r--r-- | src/simd/simd_scalar/simd_scalar.h | 217 |
2 files changed, 105 insertions, 114 deletions
diff --git a/src/simd/simd_32.h b/src/simd/simd_32.h index 17cf5b91e..15f882fea 100644 --- a/src/simd/simd_32.h +++ b/src/simd/simd_32.h @@ -20,7 +20,7 @@ #elif defined(BOTAN_HAS_SIMD_SCALAR) #include <botan/internal/simd_scalar.h> - namespace Botan { typedef SIMD_4_Scalar<u32bit> SIMD_32; } + namespace Botan { typedef SIMD_Scalar<u32bit,4> SIMD_32; } #else #error "No SIMD module defined" diff --git a/src/simd/simd_scalar/simd_scalar.h b/src/simd/simd_scalar/simd_scalar.h index cf1145794..55dd26d6f 100644 --- a/src/simd/simd_scalar/simd_scalar.h +++ b/src/simd/simd_scalar/simd_scalar.h @@ -17,197 +17,181 @@ namespace Botan { * Fake SIMD, using plain scalar operations * Often still faster than iterative on superscalar machines */ -template<typename T> -class SIMD_4_Scalar +template<typename T, size_t N> +class SIMD_Scalar { public: static bool enabled() { return true; } - SIMD_4_Scalar(const T B[4]) - { - R0 = B[0]; - R1 = B[1]; - R2 = B[2]; - R3 = B[3]; - } + static size_t size() { return N; } + + SIMD_Scalar() { /* uninitialized */ } - SIMD_4_Scalar(T B0, T B1, T B2, T B3) + SIMD_Scalar(const T B[N]) { - R0 = B0; - R1 = B1; - R2 = B2; - R3 = B3; + for(size_t i = 0; i != size(); ++i) + m_v[i] = B[i]; } - SIMD_4_Scalar(T B) + SIMD_Scalar(T B) { - R0 = B; - R1 = B; - R2 = B; - R3 = B; + for(size_t i = 0; i != size(); ++i) + m_v[i] = B; } - static SIMD_4_Scalar<T> load_le(const void* in) + static SIMD_Scalar<T,N> load_le(const void* in) { + SIMD_Scalar<T,N> out; const byte* in_b = static_cast<const byte*>(in); - return SIMD_4_Scalar<T>(Botan::load_le<T>(in_b, 0), - Botan::load_le<T>(in_b, 1), - Botan::load_le<T>(in_b, 2), - Botan::load_le<T>(in_b, 3)); + + for(size_t i = 0; i != size(); ++i) + out.m_v[i] = Botan::load_le<T>(in_b, i); + + return out; } - static SIMD_4_Scalar<T> load_be(const void* in) + static SIMD_Scalar<T,N> load_be(const void* in) { + SIMD_Scalar<T,N> out; const byte* in_b = static_cast<const byte*>(in); - return SIMD_4_Scalar<T>(Botan::load_be<T>(in_b, 0), - Botan::load_be<T>(in_b, 1), - Botan::load_be<T>(in_b, 2), - Botan::load_be<T>(in_b, 3)); + + for(size_t i = 0; i != size(); ++i) + out.m_v[i] = Botan::load_be<T>(in_b, i); + + return out; } void store_le(byte out[]) const { - Botan::store_le(out, R0, R1, R2, R3); + for(size_t i = 0; i != size(); ++i) + Botan::store_le(m_v[i], out + i*sizeof(T)); } void store_be(byte out[]) const { - Botan::store_be(out, R0, R1, R2, R3); + for(size_t i = 0; i != size(); ++i) + Botan::store_be(m_v[i], out + i*sizeof(T)); } void rotate_left(size_t rot) { - R0 = Botan::rotate_left(R0, rot); - R1 = Botan::rotate_left(R1, rot); - R2 = Botan::rotate_left(R2, rot); - R3 = Botan::rotate_left(R3, rot); + for(size_t i = 0; i != size(); ++i) + m_v[i] = Botan::rotate_left(m_v[i], rot); } void rotate_right(size_t rot) { - R0 = Botan::rotate_right(R0, rot); - R1 = Botan::rotate_right(R1, rot); - R2 = Botan::rotate_right(R2, rot); - R3 = Botan::rotate_right(R3, rot); + for(size_t i = 0; i != size(); ++i) + m_v[i] = Botan::rotate_right(m_v[i], rot); } - void operator+=(const SIMD_4_Scalar<T>& other) + void operator+=(const SIMD_Scalar<T,N>& other) { - R0 += other.R0; - R1 += other.R1; - R2 += other.R2; - R3 += other.R3; + for(size_t i = 0; i != size(); ++i) + m_v[i] += other.m_v[i]; } - SIMD_4_Scalar<T> operator+(const SIMD_4_Scalar<T>& other) const + void operator-=(const SIMD_Scalar<T,N>& other) { - return SIMD_4_Scalar<T>(R0 + other.R0, - R1 + other.R1, - R2 + other.R2, - R3 + other.R3); + for(size_t i = 0; i != size(); ++i) + m_v[i] -= other.m_v[i]; } - void operator-=(const SIMD_4_Scalar<T>& other) + SIMD_Scalar<T,N> operator+(const SIMD_Scalar<T,N>& other) const { - R0 -= other.R0; - R1 -= other.R1; - R2 -= other.R2; - R3 -= other.R3; + SIMD_Scalar<T,N> out = *this; + out += other; + return out; } - SIMD_4_Scalar<T> operator-(const SIMD_4_Scalar<T>& other) const + SIMD_Scalar<T,N> operator-(const SIMD_Scalar<T,N>& other) const { - return SIMD_4_Scalar<T>(R0 - other.R0, - R1 - other.R1, - R2 - other.R2, - R3 - other.R3); + SIMD_Scalar<T,N> out = *this; + out -= other; + return out; } - void operator^=(const SIMD_4_Scalar<T>& other) + void operator^=(const SIMD_Scalar<T,N>& other) { - R0 ^= other.R0; - R1 ^= other.R1; - R2 ^= other.R2; - R3 ^= other.R3; + for(size_t i = 0; i != size(); ++i) + m_v[i] ^= other.m_v[i]; } - SIMD_4_Scalar<T> operator^(const SIMD_4_Scalar<T>& other) const + SIMD_Scalar<T,N> operator^(const SIMD_Scalar<T,N>& other) const { - return SIMD_4_Scalar<T>(R0 ^ other.R0, - R1 ^ other.R1, - R2 ^ other.R2, - R3 ^ other.R3); + SIMD_Scalar<T,N> out = *this; + out ^= other; + return out; } - void operator|=(const SIMD_4_Scalar<T>& other) + void operator|=(const SIMD_Scalar<T,N>& other) { - R0 |= other.R0; - R1 |= other.R1; - R2 |= other.R2; - R3 |= other.R3; + for(size_t i = 0; i != size(); ++i) + m_v[i] |= other.m_v[i]; } - SIMD_4_Scalar<T> operator&(const SIMD_4_Scalar<T>& other) + void operator&=(const SIMD_Scalar<T,N>& other) { - return SIMD_4_Scalar<T>(R0 & other.R0, - R1 & other.R1, - R2 & other.R2, - R3 & other.R3); + for(size_t i = 0; i != size(); ++i) + m_v[i] &= other.m_v[i]; } - void operator&=(const SIMD_4_Scalar<T>& other) + SIMD_Scalar<T,N> operator&(const SIMD_Scalar<T,N>& other) { - R0 &= other.R0; - R1 &= other.R1; - R2 &= other.R2; - R3 &= other.R3; + SIMD_Scalar<T,N> out = *this; + out &= other; + return out; } - SIMD_4_Scalar<T> operator<<(size_t shift) const + SIMD_Scalar<T,N> operator<<(size_t shift) const { - return SIMD_4_Scalar<T>(R0 << shift, - R1 << shift, - R2 << shift, - R3 << shift); + SIMD_Scalar<T,N> out = *this; + for(size_t i = 0; i != size(); ++i) + out.m_v[i] <<= shift; + return out; } - SIMD_4_Scalar<T> operator>>(size_t shift) const + SIMD_Scalar<T,N> operator>>(size_t shift) const { - return SIMD_4_Scalar<T>(R0 >> shift, - R1 >> shift, - R2 >> shift, - R3 >> shift); + SIMD_Scalar<T,N> out = *this; + for(size_t i = 0; i != size(); ++i) + out.m_v[i] >>= shift; + return out; } - SIMD_4_Scalar<T> operator~() const + SIMD_Scalar<T,N> operator~() const { - return SIMD_4_Scalar<T>(~R0, ~R1, ~R2, ~R3); + SIMD_Scalar<T,N> out = *this; + for(size_t i = 0; i != size(); ++i) + out.m_v[i] = ~out.m_v[i]; + return out; } // (~reg) & other - SIMD_4_Scalar<T> andc(const SIMD_4_Scalar<T>& other) + SIMD_Scalar<T,N> andc(const SIMD_Scalar<T,N>& other) { - return SIMD_4_Scalar<T>(~R0 & other.R0, - ~R1 & other.R1, - ~R2 & other.R2, - ~R3 & other.R3); + SIMD_Scalar<T,N> out; + for(size_t i = 0; i != size(); ++i) + out.m_v[i] = (~m_v[i]) & other.m_v[i]; + return out; } - SIMD_4_Scalar<T> bswap() const + SIMD_Scalar<T,N> bswap() const { - return SIMD_4_Scalar<T>(reverse_bytes(R0), - reverse_bytes(R1), - reverse_bytes(R2), - reverse_bytes(R3)); + SIMD_Scalar<T,N> out; + for(size_t i = 0; i != size(); ++i) + out.m_v[i] = reverse_bytes(m_v[i]); + return out; } - static void transpose(SIMD_4_Scalar<T>& B0, SIMD_4_Scalar<T>& B1, - SIMD_4_Scalar<T>& B2, SIMD_4_Scalar<T>& B3) + static void transpose(SIMD_Scalar<T,N>& B0, SIMD_Scalar<T,N>& B1, + SIMD_Scalar<T,N>& B2, SIMD_Scalar<T,N>& B3) { - SIMD_4_Scalar<T> T0(B0.R0, B1.R0, B2.R0, B3.R0); - SIMD_4_Scalar<T> T1(B0.R1, B1.R1, B2.R1, B3.R1); - SIMD_4_Scalar<T> T2(B0.R2, B1.R2, B2.R2, B3.R2); - SIMD_4_Scalar<T> T3(B0.R3, B1.R3, B2.R3, B3.R3); + static_assert(N == 4, "4x4 transpose"); + SIMD_Scalar<T,N> T0({B0.m_v[0], B1.m_v[0], B2.m_v[0], B3.m_v[0]}); + SIMD_Scalar<T,N> T1({B0.m_v[1], B1.m_v[1], B2.m_v[1], B3.m_v[1]}); + SIMD_Scalar<T,N> T2({B0.m_v[2], B1.m_v[2], B2.m_v[2], B3.m_v[2]}); + SIMD_Scalar<T,N> T3({B0.m_v[3], B1.m_v[3], B2.m_v[3], B3.m_v[3]}); B0 = T0; B1 = T1; @@ -216,7 +200,14 @@ class SIMD_4_Scalar } private: - T R0, R1, R2, R3; + SIMD_Scalar(std::initializer_list<T> B) + { + size_t i = 0; + for(auto v = B.begin(); v != B.end(); ++v) + m_v[i++] = *v; + } + + T m_v[N]; }; } |