aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2019-09-07 10:37:51 -0400
committerJack Lloyd <[email protected]>2019-09-07 10:37:51 -0400
commita2845235b3721526da2b0a949fe6053e1320eec3 (patch)
tree90c12ea63e8bdcfc263fe1e9ea681986e53f2926 /src/lib
parent0c40885129a3bc0a683636bba0c5ff47575735d7 (diff)
Add support for vector permute AES using AltiVec
Slower than T-tables on the machines I've tried, but constant time.
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/block/aes/aes_vperm/aes_vperm.cpp22
-rw-r--r--src/lib/block/aes/aes_vperm/info.txt4
-rw-r--r--src/lib/utils/cpuid/cpuid.h2
3 files changed, 28 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes_vperm/aes_vperm.cpp b/src/lib/block/aes/aes_vperm/aes_vperm.cpp
index 10e1e5c26..811dc10b5 100644
--- a/src/lib/block/aes/aes_vperm/aes_vperm.cpp
+++ b/src/lib/block/aes/aes_vperm/aes_vperm.cpp
@@ -41,6 +41,15 @@ inline SIMD_4x32 shuffle(SIMD_4x32 a, SIMD_4x32 b)
return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(tbl, idx)));
#endif
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+ __vector unsigned char bv = (__vector unsigned char)b.raw();
+
+ const auto high_bit = vec_sl(vec_sr(bv, vec_splat_u8(7)), vec_splat_u8(4));
+ bv = vec_and(bv, vec_splat_u8(0x0F));
+ bv = vec_add(bv, high_bit);
+
+ const __vector unsigned int zero = vec_splat_u32(0);
+ return SIMD_4x32(vec_perm(a.raw(), zero, bv));
#else
#error "No shuffle implementation available"
#endif
@@ -53,6 +62,16 @@ inline SIMD_4x32 shift_elems_left(SIMD_4x32 x)
return SIMD_4x32(_mm_slli_si128(x.raw(), 4*I));
#elif defined(BOTAN_SIMD_USE_NEON)
return SIMD_4x32(vreinterpretq_u32_u8(vextq_u8(vdupq_n_u8(0), vreinterpretq_u8_u32(x.raw()), 16 - 4*I)));
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+ const __vector unsigned int zero = vec_splat_u32(0);
+
+ const __vector unsigned char shuf[3] = {
+ { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
+ { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
+ { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
+ };
+
+ return SIMD_4x32(vec_perm(x.raw(), zero, shuf[I-1]));
#else
#error "No shift_elems_left implementation available"
#endif
@@ -64,6 +83,9 @@ inline SIMD_4x32 alignr8(SIMD_4x32 a, SIMD_4x32 b)
return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 8));
#elif defined(BOTAN_SIMD_USE_NEON)
return SIMD_4x32(vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(b.raw()), vreinterpretq_u8_u32(a.raw()), 8)));
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+ const __vector unsigned char mask = {8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23};
+ return SIMD_4x32(vec_perm(b.raw(), a.raw(), mask));
#else
#error "No alignr8 implementation available"
#endif
diff --git a/src/lib/block/aes/aes_vperm/info.txt b/src/lib/block/aes/aes_vperm/info.txt
index f771ca2c3..b92cc21b3 100644
--- a/src/lib/block/aes/aes_vperm/info.txt
+++ b/src/lib/block/aes/aes_vperm/info.txt
@@ -9,6 +9,8 @@ x86_32:ssse3
x86_64:ssse3
arm32:neon
arm64:neon
+ppc32:altivec
+ppc64:altivec
</isa>
<arch>
@@ -16,6 +18,8 @@ x86_32
x86_64
arm32
arm64
+ppc32
+ppc64
</arch>
<requires>
diff --git a/src/lib/utils/cpuid/cpuid.h b/src/lib/utils/cpuid/cpuid.h
index d998d5364..3a8f54d6b 100644
--- a/src/lib/utils/cpuid/cpuid.h
+++ b/src/lib/utils/cpuid/cpuid.h
@@ -325,6 +325,8 @@ class BOTAN_PUBLIC_API(2,1) CPUID final
return has_ssse3();
#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
return has_neon();
+#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+ return has_altivec();
#else
return false;
#endif