aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2019-09-06 06:59:29 -0400
committerJack Lloyd <[email protected]>2019-09-06 06:59:29 -0400
commitcc6b42d71b0cdc2ebfa2b33b5039991f2d79f606 (patch)
tree5cbf2e2735856a02089db02cc2ff30257906543e /src/lib/utils
parent81896920ca384e3d12d9261e2fd1343a5b825441 (diff)
parent22a9e8de4762cf53264d898faea63a25dd72325f (diff)
Merge GH #2097 Remove scalar SIMD_4x32 implementation
Diffstat (limited to 'src/lib/utils')
-rw-r--r--src/lib/utils/simd/info.txt20
-rw-r--r--src/lib/utils/simd/simd_32.h174
2 files changed, 34 insertions, 160 deletions
diff --git a/src/lib/utils/simd/info.txt b/src/lib/utils/simd/info.txt
index 7784902a6..4a7044afc 100644
--- a/src/lib/utils/simd/info.txt
+++ b/src/lib/utils/simd/info.txt
@@ -5,3 +5,23 @@ SIMD_32 -> 20131128
<header:internal>
simd_32.h
</header:internal>
+
+<isa>
+x86_32:sse2
+x86_64:sse2
+x32:sse2
+arm32:neon
+arm64:neon
+ppc32:altivec
+ppc64:altivec
+</isa>
+
+<arch>
+x86_32
+x86_64
+x32
+arm32
+arm64
+ppc32
+ppc64
+</arch>
diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h
index de02e84f1..4c1599842 100644
--- a/src/lib/utils/simd/simd_32.h
+++ b/src/lib/utils/simd/simd_32.h
@@ -1,6 +1,6 @@
/*
* Lightweight wrappers for SIMD operations
-* (C) 2009,2011,2016,2017 Jack Lloyd
+* (C) 2009,2011,2016,2017,2019 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -9,26 +9,26 @@
#define BOTAN_SIMD_32_H_
#include <botan/types.h>
-#include <botan/loadstor.h>
-#include <botan/bswap.h>
-#include <botan/cpuid.h>
#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
#include <emmintrin.h>
#define BOTAN_SIMD_USE_SSE2
#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
+ #include <botan/bswap.h>
+ #include <botan/loadstor.h>
#include <altivec.h>
#undef vector
#undef bool
#define BOTAN_SIMD_USE_ALTIVEC
#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
+ #include <botan/cpuid.h>
#include <arm_neon.h>
#define BOTAN_SIMD_USE_NEON
#else
- #include <botan/rotate.h>
+ #error "No SIMD instruction set enabled"
#endif
namespace Botan {
@@ -39,8 +39,6 @@ namespace Botan {
typedef __vector unsigned int native_simd_type;
#elif defined(BOTAN_SIMD_USE_NEON)
typedef uint32x4_t native_simd_type;
-#else
- typedef struct { uint32_t val[4]; } native_simd_type;
#endif
/**
@@ -74,11 +72,6 @@ class SIMD_4x32 final
m_simd = vec_splat_u32(0);
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = vdupq_n_u32(0);
-#else
- m_simd.val[0] = 0;
- m_simd.val[1] = 0;
- m_simd.val[2] = 0;
- m_simd.val[3] = 0;
#endif
}
@@ -94,11 +87,6 @@ class SIMD_4x32 final
m_simd = val;
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = vld1q_u32(B);
-#else
- m_simd.val[0] = B[0];
- m_simd.val[1] = B[1];
- m_simd.val[2] = B[2];
- m_simd.val[3] = B[3];
#endif
}
@@ -116,11 +104,6 @@ class SIMD_4x32 final
// Better way to do this?
const uint32_t B[4] = { B0, B1, B2, B3 };
m_simd = vld1q_u32(B);
-#else
- m_simd.val[0] = B0;
- m_simd.val[1] = B1;
- m_simd.val[2] = B2;
- m_simd.val[3] = B3;
#endif
}
@@ -131,7 +114,7 @@ class SIMD_4x32 final
{
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_set1_epi32(B));
-#elif defined(BOTAN_SIMD_USE_ARM)
+#elif defined(BOTAN_SIMD_USE_NEON)
return SIMD_4x32(vdupq_n_u32(B));
#else
return SIMD_4x32(B, B, B, B);
@@ -145,8 +128,8 @@ class SIMD_4x32 final
{
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_set1_epi8(B));
-#elif defined(BOTAN_SIMD_USE_ARM)
- return SIMD_4x32(vdupq_n_u8(B));
+#elif defined(BOTAN_SIMD_USE_NEON)
+ return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
#else
const uint32_t B4 = make_uint32(B, B, B, B);
return SIMD_4x32(B4, B4, B4, B4);
@@ -161,28 +144,13 @@ class SIMD_4x32 final
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
#elif defined(BOTAN_SIMD_USE_ALTIVEC)
-
uint32_t R[4];
Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
return SIMD_4x32(R);
-
#elif defined(BOTAN_SIMD_USE_NEON)
-
SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
-
-#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- return l.bswap();
-#elif defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- return l;
-#else
return CPUID::is_big_endian() ? l.bswap() : l;
#endif
-
-#else
- SIMD_4x32 out;
- Botan::load_le(out.m_simd.val, static_cast<const uint8_t*>(in), 4);
- return out;
-#endif
}
/**
@@ -199,22 +167,9 @@ class SIMD_4x32 final
return SIMD_4x32(R);
#elif defined(BOTAN_SIMD_USE_NEON)
-
SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
-
-#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- return l.bswap();
-#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- return l;
-#else
return CPUID::is_little_endian() ? l.bswap() : l;
#endif
-
-#else
- SIMD_4x32 out;
- Botan::load_be(out.m_simd.val, static_cast<const uint8_t*>(in), 4);
- return out;
-#endif
}
void store_le(uint32_t out[]) const
@@ -241,12 +196,6 @@ class SIMD_4x32 final
Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
#elif defined(BOTAN_SIMD_USE_NEON)
-
-#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
-#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
-#else
if(CPUID::is_little_endian())
{
vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
@@ -256,10 +205,6 @@ class SIMD_4x32 final
vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
}
#endif
-
-#else
- Botan::store_le(out, m_simd.val[0], m_simd.val[1], m_simd.val[2], m_simd.val[3]);
-#endif
}
/**
@@ -281,12 +226,6 @@ class SIMD_4x32 final
Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
#elif defined(BOTAN_SIMD_USE_NEON)
-
-#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- vst1q_u8(out, vreinterpretq_u8_u32(m_simd);
-#elif defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
-#else
if(CPUID::is_little_endian())
{
vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
@@ -296,10 +235,6 @@ class SIMD_4x32 final
vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
}
#endif
-
-#else
- Botan::store_be(out, m_simd.val[0], m_simd.val[1], m_simd.val[2], m_simd.val[3]);
-#endif
}
/*
@@ -336,12 +271,7 @@ class SIMD_4x32 final
#elif defined(BOTAN_SIMD_USE_NEON)
- #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
-
- return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
- vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
-
- #else
+#if defined(BOTAN_TARGET_ARCH_IS_ARM64)
BOTAN_IF_CONSTEXPR(ROT == 8)
{
@@ -353,19 +283,9 @@ class SIMD_4x32 final
{
return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
}
- else
- {
- return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
- vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
- }
-
- #endif
-
-#else
- return SIMD_4x32(Botan::rotl<ROT>(m_simd.val[0]),
- Botan::rotl<ROT>(m_simd.val[1]),
- Botan::rotl<ROT>(m_simd.val[2]),
- Botan::rotl<ROT>(m_simd.val[3]));
+#endif
+ return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
+ vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
#endif
}
@@ -436,11 +356,6 @@ class SIMD_4x32 final
m_simd = vec_add(m_simd, other.m_simd);
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = vaddq_u32(m_simd, other.m_simd);
-#else
- m_simd.val[0] += other.m_simd.val[0];
- m_simd.val[1] += other.m_simd.val[1];
- m_simd.val[2] += other.m_simd.val[2];
- m_simd.val[3] += other.m_simd.val[3];
#endif
}
@@ -452,11 +367,6 @@ class SIMD_4x32 final
m_simd = vec_sub(m_simd, other.m_simd);
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = vsubq_u32(m_simd, other.m_simd);
-#else
- m_simd.val[0] -= other.m_simd.val[0];
- m_simd.val[1] -= other.m_simd.val[1];
- m_simd.val[2] -= other.m_simd.val[2];
- m_simd.val[3] -= other.m_simd.val[3];
#endif
}
@@ -469,11 +379,6 @@ class SIMD_4x32 final
m_simd = vec_xor(m_simd, other.m_simd);
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = veorq_u32(m_simd, other.m_simd);
-#else
- m_simd.val[0] ^= other.m_simd.val[0];
- m_simd.val[1] ^= other.m_simd.val[1];
- m_simd.val[2] ^= other.m_simd.val[2];
- m_simd.val[3] ^= other.m_simd.val[3];
#endif
}
@@ -485,11 +390,6 @@ class SIMD_4x32 final
m_simd = vec_or(m_simd, other.m_simd);
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = vorrq_u32(m_simd, other.m_simd);
-#else
- m_simd.val[0] |= other.m_simd.val[0];
- m_simd.val[1] |= other.m_simd.val[1];
- m_simd.val[2] |= other.m_simd.val[2];
- m_simd.val[3] |= other.m_simd.val[3];
#endif
}
@@ -501,11 +401,6 @@ class SIMD_4x32 final
m_simd = vec_and(m_simd, other.m_simd);
#elif defined(BOTAN_SIMD_USE_NEON)
m_simd = vandq_u32(m_simd, other.m_simd);
-#else
- m_simd.val[0] &= other.m_simd.val[0];
- m_simd.val[1] &= other.m_simd.val[1];
- m_simd.val[2] &= other.m_simd.val[2];
- m_simd.val[3] &= other.m_simd.val[3];
#endif
}
@@ -521,11 +416,6 @@ class SIMD_4x32 final
return SIMD_4x32(vec_sl(m_simd, shifts));
#elif defined(BOTAN_SIMD_USE_NEON)
return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
-#else
- return SIMD_4x32(m_simd.val[0] << SHIFT,
- m_simd.val[1] << SHIFT,
- m_simd.val[2] << SHIFT,
- m_simd.val[3] << SHIFT);
#endif
}
@@ -540,10 +430,6 @@ class SIMD_4x32 final
return SIMD_4x32(vec_sr(m_simd, shifts));
#elif defined(BOTAN_SIMD_USE_NEON)
return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
-#else
- return SIMD_4x32(m_simd.val[0] >> SHIFT, m_simd.val[1] >> SHIFT,
- m_simd.val[2] >> SHIFT, m_simd.val[3] >> SHIFT);
-
#endif
}
@@ -555,8 +441,6 @@ class SIMD_4x32 final
return SIMD_4x32(vec_nor(m_simd, m_simd));
#elif defined(BOTAN_SIMD_USE_NEON)
return SIMD_4x32(vmvnq_u32(m_simd));
-#else
- return SIMD_4x32(~m_simd.val[0], ~m_simd.val[1], ~m_simd.val[2], ~m_simd.val[3]);
#endif
}
@@ -574,11 +458,6 @@ class SIMD_4x32 final
#elif defined(BOTAN_SIMD_USE_NEON)
// NEON is also a & ~b
return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
-#else
- return SIMD_4x32((~m_simd.val[0]) & other.m_simd.val[0],
- (~m_simd.val[1]) & other.m_simd.val[1],
- (~m_simd.val[2]) & other.m_simd.val[2],
- (~m_simd.val[3]) & other.m_simd.val[3]);
#endif
}
@@ -606,15 +485,7 @@ class SIMD_4x32 final
return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
#elif defined(BOTAN_SIMD_USE_NEON)
-
return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
-
-#else
- // scalar
- return SIMD_4x32(reverse_bytes(m_simd.val[0]),
- reverse_bytes(m_simd.val[1]),
- reverse_bytes(m_simd.val[2]),
- reverse_bytes(m_simd.val[3]));
#endif
}
@@ -644,10 +515,8 @@ class SIMD_4x32 final
B1.m_simd = vec_mergel(T0, T1);
B2.m_simd = vec_mergeh(T2, T3);
B3.m_simd = vec_mergel(T2, T3);
-#elif defined(BOTAN_SIMD_USE_NEON)
-
-#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
+#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
@@ -658,32 +527,17 @@ class SIMD_4x32 final
B2.m_simd = O1.val[0];
B3.m_simd = O1.val[1];
-#elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
+#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
-
const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
B0.m_simd = vzip1q_u32(T0, T1);
B1.m_simd = vzip2q_u32(T0, T1);
-
B2.m_simd = vzip1q_u32(T2, T3);
B3.m_simd = vzip2q_u32(T2, T3);
#endif
-
-#else
- // scalar
- SIMD_4x32 T0(B0.m_simd.val[0], B1.m_simd.val[0], B2.m_simd.val[0], B3.m_simd.val[0]);
- SIMD_4x32 T1(B0.m_simd.val[1], B1.m_simd.val[1], B2.m_simd.val[1], B3.m_simd.val[1]);
- SIMD_4x32 T2(B0.m_simd.val[2], B1.m_simd.val[2], B2.m_simd.val[2], B3.m_simd.val[2]);
- SIMD_4x32 T3(B0.m_simd.val[3], B1.m_simd.val[3], B2.m_simd.val[3], B3.m_simd.val[3]);
-
- B0 = T0;
- B1 = T1;
- B2 = T2;
- B3 = T3;
-#endif
}
native_simd_type raw() const { return m_simd; }