aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/modes
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2020-01-23 11:29:28 -0500
committerJack Lloyd <[email protected]>2020-01-23 11:29:28 -0500
commit901d89a3ffa113b06fa0e600d622d5d088a2fa9d (patch)
treef5e5183fbe2f2bf0b4945b1498ff0094ae2e5d41 /src/lib/modes
parent206c18114e7e7336a6f41251979b979bb74223e7 (diff)
Add support for POWER VPSUM instruction for GCM
On POWER8, improves GMAC performance by 5-14x and AES-128/GCM performance by 3-7x, depending on the buffer size used. We are not using the instruction optimally here, because VPSUM can compute 2 distinct carryless multiplies and then add the products together. Instead we emulate clmul/pmull behavior using masks and shifts.
Diffstat (limited to 'src/lib/modes')
-rw-r--r--src/lib/modes/aead/gcm/clmul_cpu/clmul_cpu.cpp36
-rw-r--r--src/lib/modes/aead/gcm/clmul_cpu/info.txt2
2 files changed, 36 insertions, 2 deletions
diff --git a/src/lib/modes/aead/gcm/clmul_cpu/clmul_cpu.cpp b/src/lib/modes/aead/gcm/clmul_cpu/clmul_cpu.cpp
index 2a41121d1..fb482afe7 100644
--- a/src/lib/modes/aead/gcm/clmul_cpu/clmul_cpu.cpp
+++ b/src/lib/modes/aead/gcm/clmul_cpu/clmul_cpu.cpp
@@ -1,6 +1,6 @@
/*
-* Hook for CLMUL/PMULL
-* (C) 2013,2017,2019 Jack Lloyd
+* Hook for CLMUL/PMULL/VPMSUM
+* (C) 2013,2017,2019,2020 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -26,6 +26,9 @@ BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) reverse_vector(cons
const uint8_t maskb[16] = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
const uint8x16_t mask = vld1q_u8(maskb);
return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(in.raw()), mask)));
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+ const __vector unsigned char mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+ return SIMD_4x32(vec_perm(in.raw(), in.raw(), mask));
#endif
}
@@ -40,6 +43,35 @@ BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_CLMUL_ISA) clmul(const SIMD_4x
const uint64_t a = vgetq_lane_u64(vreinterpretq_u64_u32(x.raw()), M & 0x01);
const uint64_t b = vgetq_lane_u64(vreinterpretq_u64_u32(H.raw()), (M & 0x10) >> 4);
return SIMD_4x32(reinterpret_cast<uint32x4_t>(vmull_p64(a, b)));
+#elif defined(BOTAN_SIMD_USE_ALTIVEC)
+ const SIMD_4x32 mask_lo = SIMD_4x32(0, 0, 0xFFFFFFFF, 0xFFFFFFFF);
+
+ SIMD_4x32 i1 = x;
+ SIMD_4x32 i2 = H;
+
+ if(M == 0x11)
+ {
+ i1 &= mask_lo;
+ i2 &= mask_lo;
+ }
+ else if(M == 0x10)
+ {
+ i1 = i1.shift_elems_left<2>();
+ }
+ else if(M == 0x01)
+ {
+ i2 = i2.shift_elems_left<2>();
+ }
+ else if(M == 0x00)
+ {
+ i1 = mask_lo.andc(i1);
+ i2 = mask_lo.andc(i2);
+ }
+
+ return SIMD_4x32((__vector unsigned int)__builtin_crypto_vpmsumd(
+ (__vector unsigned long)i1.raw(),
+ (__vector unsigned long)i2.raw())
+ );
#endif
}
diff --git a/src/lib/modes/aead/gcm/clmul_cpu/info.txt b/src/lib/modes/aead/gcm/clmul_cpu/info.txt
index bc018c2ed..938a8c61d 100644
--- a/src/lib/modes/aead/gcm/clmul_cpu/info.txt
+++ b/src/lib/modes/aead/gcm/clmul_cpu/info.txt
@@ -19,12 +19,14 @@ x86_64:ssse3
x86_64:aesni
arm64:neon
arm64:armv8crypto
+ppc64:powercrypto
</isa>
<arch>
x86_32
x86_64
arm64
+ppc64
</arch>
<cc>