Add support for vector permute AES using AltiVec

Slower than T-tables on the machines I've tried, but constant time.
author: Jack Lloyd <[email protected]> 2019-09-07 10:37:51 -0400
committer: Jack Lloyd <[email protected]> 2019-09-07 10:37:51 -0400
commit: a2845235b3721526da2b0a949fe6053e1320eec3 (patch)
tree: 90c12ea63e8bdcfc263fe1e9ea681986e53f2926 /src/lib
parent: 0c40885129a3bc0a683636bba0c5ff47575735d7 (diff)
3 files changed, 28 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes_vperm/aes_vperm.cpp b/src/lib/block/aes/aes_vperm/aes_vperm.cpp
index 10e1e5c26..811dc10b5 100644
--- a/src/lib/block/aes/aes_vperm/aes_vperm.cpp
+++ b/src/lib/block/aes/aes_vperm/aes_vperm.cpp
@@ -41,6 +41,15 @@ inline SIMD_4x32 shuffle(SIMD_4x32 a, SIMD_4x32 b)
    return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(tbl, idx)));
 #endif
 
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+   __vector unsigned char bv = (__vector unsigned char)b.raw();
+
+   const auto high_bit = vec_sl(vec_sr(bv, vec_splat_u8(7)), vec_splat_u8(4));
+   bv = vec_and(bv, vec_splat_u8(0x0F));
+   bv = vec_add(bv, high_bit);
+
+   const __vector unsigned int zero = vec_splat_u32(0);
+   return SIMD_4x32(vec_perm(a.raw(), zero, bv));
 #else
    #error "No shuffle implementation available"
 #endif
@@ -53,6 +62,16 @@ inline SIMD_4x32 shift_elems_left(SIMD_4x32 x)
    return SIMD_4x32(_mm_slli_si128(x.raw(), 4*I));
 #elif defined(BOTAN_SIMD_USE_NEON)
    return SIMD_4x32(vreinterpretq_u32_u8(vextq_u8(vdupq_n_u8(0), vreinterpretq_u8_u32(x.raw()), 16 - 4*I)));
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+   const __vector unsigned int zero = vec_splat_u32(0);
+
+   const __vector unsigned char shuf[3] = {
+     { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
+     { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
+     { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
+   };
+
+   return SIMD_4x32(vec_perm(x.raw(), zero, shuf[I-1]));
 #else
    #error "No shift_elems_left implementation available"
 #endif
@@ -64,6 +83,9 @@ inline SIMD_4x32 alignr8(SIMD_4x32 a, SIMD_4x32 b)
    return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 8));
 #elif defined(BOTAN_SIMD_USE_NEON)
    return SIMD_4x32(vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(b.raw()), vreinterpretq_u8_u32(a.raw()), 8)));
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+   const __vector unsigned char mask = {8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23};
+   return SIMD_4x32(vec_perm(b.raw(), a.raw(), mask));
 #else
    #error "No alignr8 implementation available"
 #endif
diff --git a/src/lib/block/aes/aes_vperm/info.txt b/src/lib/block/aes/aes_vperm/info.txt
index f771ca2c3..b92cc21b3 100644
--- a/src/lib/block/aes/aes_vperm/info.txt
+++ b/src/lib/block/aes/aes_vperm/info.txt
@@ -9,6 +9,8 @@ x86_32:ssse3
 x86_64:ssse3
 arm32:neon
 arm64:neon
+ppc32:altivec
+ppc64:altivec
 </isa>
 
 <arch>
@@ -16,6 +18,8 @@ x86_32
 x86_64
 arm32
 arm64
+ppc32
+ppc64
 </arch>
 
 <requires>
diff --git a/src/lib/utils/cpuid/cpuid.h b/src/lib/utils/cpuid/cpuid.h
index d998d5364..3a8f54d6b 100644
--- a/src/lib/utils/cpuid/cpuid.h
+++ b/src/lib/utils/cpuid/cpuid.h
@@ -325,6 +325,8 @@ class BOTAN_PUBLIC_API(2,1) CPUID final
          return has_ssse3();
 #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
          return has_neon();
+#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+         return has_altivec();
 #else
          return false;
 #endif
author	Jack Lloyd <[email protected]>	2019-09-07 10:37:51 -0400
committer	Jack Lloyd <[email protected]>	2019-09-07 10:37:51 -0400
commit	a2845235b3721526da2b0a949fe6053e1320eec3 (patch)
tree	90c12ea63e8bdcfc263fe1e9ea681986e53f2926 /src/lib
parent	0c40885129a3bc0a683636bba0c5ff47575735d7 (diff)