aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/block/aes
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2019-09-01 07:16:58 -0400
committerJack Lloyd <[email protected]>2019-09-01 09:03:54 -0400
commit1456e825379a8621fbb8cd8c5c7e7e3201ddd2fe (patch)
tree24349d15adbd75466557d4a929703e0633cbf2f7 /src/lib/block/aes
parent3a3a7b38160dbfd76fc0e073b23e7f35e480cbd9 (diff)
Support NEON for AES vector permutes
Rename aes_ssse3 -> aes_vperm
Diffstat (limited to 'src/lib/block/aes')
-rw-r--r--src/lib/block/aes/aes.cpp158
-rw-r--r--src/lib/block/aes/aes.h24
-rw-r--r--src/lib/block/aes/aes_vperm/aes_vperm.cpp (renamed from src/lib/block/aes/aes_ssse3/aes_ssse3.cpp)78
-rw-r--r--src/lib/block/aes/aes_vperm/info.txt (renamed from src/lib/block/aes/aes_ssse3/info.txt)4
4 files changed, 154 insertions, 110 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp
index 568dfb1b3..2813a5f5a 100644
--- a/src/lib/block/aes/aes.cpp
+++ b/src/lib/block/aes/aes.cpp
@@ -456,13 +456,6 @@ const char* aes_provider()
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return "ssse3";
- }
-#endif
-
#if defined(BOTAN_HAS_AES_POWER8)
if(CPUID::has_ppc_crypto())
{
@@ -477,6 +470,13 @@ const char* aes_provider()
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return "vperm";
+ }
+#endif
+
return "base";
}
@@ -501,13 +501,6 @@ void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return ssse3_encrypt_n(in, out, blocks);
- }
-#endif
-
#if defined(BOTAN_HAS_AES_ARMV8)
if(CPUID::has_arm_aes())
{
@@ -522,6 +515,13 @@ void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_encrypt_n(in, out, blocks);
+ }
+#endif
+
aes_encrypt_n(in, out, blocks, m_EK, m_ME);
}
@@ -536,13 +536,6 @@ void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return ssse3_decrypt_n(in, out, blocks);
- }
-#endif
-
#if defined(BOTAN_HAS_AES_ARMV8)
if(CPUID::has_arm_aes())
{
@@ -557,6 +550,13 @@ void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_decrypt_n(in, out, blocks);
+ }
+#endif
+
aes_decrypt_n(in, out, blocks, m_DK, m_MD);
}
@@ -569,10 +569,24 @@ void AES_128::key_schedule(const uint8_t key[], size_t length)
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
{
- return ssse3_key_schedule(key, length);
+ return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_key_schedule(key, length);
}
#endif
@@ -598,13 +612,6 @@ void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return ssse3_encrypt_n(in, out, blocks);
- }
-#endif
-
#if defined(BOTAN_HAS_AES_ARMV8)
if(CPUID::has_arm_aes())
{
@@ -619,6 +626,13 @@ void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_encrypt_n(in, out, blocks);
+ }
+#endif
+
aes_encrypt_n(in, out, blocks, m_EK, m_ME);
}
@@ -633,13 +647,6 @@ void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return ssse3_decrypt_n(in, out, blocks);
- }
-#endif
-
#if defined(BOTAN_HAS_AES_ARMV8)
if(CPUID::has_arm_aes())
{
@@ -654,6 +661,13 @@ void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_decrypt_n(in, out, blocks);
+ }
+#endif
+
aes_decrypt_n(in, out, blocks, m_DK, m_MD);
}
@@ -666,10 +680,24 @@ void AES_192::key_schedule(const uint8_t key[], size_t length)
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
{
- return ssse3_key_schedule(key, length);
+ return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_key_schedule(key, length);
}
#endif
@@ -695,13 +723,6 @@ void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return ssse3_encrypt_n(in, out, blocks);
- }
-#endif
-
#if defined(BOTAN_HAS_AES_ARMV8)
if(CPUID::has_arm_aes())
{
@@ -716,6 +737,13 @@ void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_encrypt_n(in, out, blocks);
+ }
+#endif
+
aes_encrypt_n(in, out, blocks, m_EK, m_ME);
}
@@ -730,13 +758,6 @@ void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
- {
- return ssse3_decrypt_n(in, out, blocks);
- }
-#endif
-
#if defined(BOTAN_HAS_AES_ARMV8)
if(CPUID::has_arm_aes())
{
@@ -751,6 +772,13 @@ void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
+ {
+ return vperm_decrypt_n(in, out, blocks);
+ }
+#endif
+
aes_decrypt_n(in, out, blocks, m_DK, m_MD);
}
@@ -763,10 +791,24 @@ void AES_256::key_schedule(const uint8_t key[], size_t length)
}
#endif
-#if defined(BOTAN_HAS_AES_SSSE3)
- if(CPUID::has_ssse3())
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_POWER8)
+ if(CPUID::has_ppc_crypto())
+ {
+ return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
{
- return ssse3_key_schedule(key, length);
+ return vperm_key_schedule(key, length);
}
#endif
diff --git a/src/lib/block/aes/aes.h b/src/lib/block/aes/aes.h
index 294cdcad3..6083467b6 100644
--- a/src/lib/block/aes/aes.h
+++ b/src/lib/block/aes/aes.h
@@ -31,10 +31,10 @@ class BOTAN_PUBLIC_API(2,0) AES_128 final : public Block_Cipher_Fixed_Params<16,
private:
void key_schedule(const uint8_t key[], size_t length) override;
-#if defined(BOTAN_HAS_AES_SSSE3)
- void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void ssse3_key_schedule(const uint8_t key[], size_t length);
+#if defined(BOTAN_HAS_AES_VPERM)
+ void vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void vperm_key_schedule(const uint8_t key[], size_t length);
#endif
#if defined(BOTAN_HAS_AES_NI)
@@ -74,10 +74,10 @@ class BOTAN_PUBLIC_API(2,0) AES_192 final : public Block_Cipher_Fixed_Params<16,
size_t parallelism() const override;
private:
-#if defined(BOTAN_HAS_AES_SSSE3)
- void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void ssse3_key_schedule(const uint8_t key[], size_t length);
+#if defined(BOTAN_HAS_AES_VPERM)
+ void vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void vperm_key_schedule(const uint8_t key[], size_t length);
#endif
#if defined(BOTAN_HAS_AES_NI)
@@ -120,10 +120,10 @@ class BOTAN_PUBLIC_API(2,0) AES_256 final : public Block_Cipher_Fixed_Params<16,
size_t parallelism() const override;
private:
-#if defined(BOTAN_HAS_AES_SSSE3)
- void ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void ssse3_key_schedule(const uint8_t key[], size_t length);
+#if defined(BOTAN_HAS_AES_VPERM)
+ void vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void vperm_key_schedule(const uint8_t key[], size_t length);
#endif
#if defined(BOTAN_HAS_AES_NI)
diff --git a/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp b/src/lib/block/aes/aes_vperm/aes_vperm.cpp
index fa8bf4faa..a36118cbd 100644
--- a/src/lib/block/aes/aes_ssse3/aes_ssse3.cpp
+++ b/src/lib/block/aes/aes_vperm/aes_vperm.cpp
@@ -1,5 +1,5 @@
/*
-* AES using SSSE3
+* AES using vector permutes (SSSE3, NEON)
* (C) 2010,2016,2019 Jack Lloyd
*
* This is more or less a direct translation of public domain x86-64
@@ -28,39 +28,33 @@ inline SIMD_4x32 shuffle(SIMD_4x32 a, SIMD_4x32 b)
{
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_shuffle_epi8(a.raw(), b.raw()));
-#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
+#elif defined(BOTAN_SIMD_USE_NEON)
+ const uint8x16_t tbl = vreinterpretq_u8_u32(a.raw());
+ const uint8x16_t idx = vreinterpretq_u8_u32(b.raw());
- const int8x16_t tbl = vreinterpretq_s8_m128i(a.raw());
- const uint8x16_t idx = vreinterpretq_u8_m128i(b.raw());
+#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
+ uint8x8x2_t tbl2 = { vget_low_u8(tbl), vget_high_u8(tbl) };
- // fixme use vdupq_n_s8
- const uint8_t alignas(16) mask[16] = {
- 0x8F, 0x8F, 0x8F, 0x8F, 0x8F, 0x8F, 0x8F, 0x8F,
- 0x8F, 0x8F, 0x8F, 0x8F, 0x8F, 0x8F, 0x8F, 0x8F
- };
+ return SIMD_4x32(vreinterpretq_u32_u8(
+ vcombine_u8(vtbl2_u8(tbl2, vget_low_u8(idx)),
+ vtbl2_u8(tbl2, vget_high_u8(idx)))));
- const uint8x16_t idx_masked =
- vandq_u8(idx, vld1q_u8(mask)); // avoid using meaningless bits
+#else
+ return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(tbl, idx)));
+#endif
- return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked));
#else
#error "No shuffle implementation available"
#endif
}
-template<size_t I1, size_t I2, size_t I3, size_t I4>
-inline SIMD_4x32 shuffle32(SIMD_4x32 x)
- {
- return SIMD_4x32(_mm_shuffle_epi32(x.raw(), _MM_SHUFFLE(I1, I2, I3, I4)));
- }
-
template<size_t I>
inline SIMD_4x32 slli(SIMD_4x32 x)
{
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_slli_si128(x.raw(), 4*I));
-#else
- #error "No ssli implementation available"
+#elif defined(BOTAN_SIMD_USE_NEON)
+ return SIMD_4x32(vreinterpretq_u32_u8(vextq_u8(vdupq_n_u8(0), vreinterpretq_u8_u32(x.raw()), 16 - 4*I)));
#endif
}
@@ -68,8 +62,10 @@ inline SIMD_4x32 zero_top_half(SIMD_4x32 x)
{
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_slli_si128(_mm_srli_si128(x.raw(), 8), 8));
-#else
- #error "No zero_top_half implementation available"
+#elif defined(BOTAN_SIMD_USE_NEON)
+ // fixme do better ?
+ SIMD_4x32 mask(0, 0, ~0, ~0);
+ return x & mask;
#endif
}
@@ -78,8 +74,8 @@ inline SIMD_4x32 alignr(SIMD_4x32 a, SIMD_4x32 b)
{
#if defined(BOTAN_SIMD_USE_SSE2)
return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), C));
-#else
- #error "No alignr implementation available"
+#elif defined(BOTAN_SIMD_USE_NEON)
+ return SIMD_4x32(vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(b.raw()), vreinterpretq_u8_u32(a.raw()), C)));
#endif
}
@@ -109,6 +105,8 @@ const SIMD_4x32 sr[4] = {
const SIMD_4x32 lo_nibs_mask = SIMD_4x32::splat_u8(0x0F);
const SIMD_4x32 hi_nibs_mask = SIMD_4x32::splat_u8(0xF0);
+const SIMD_4x32 shuffle3333 = SIMD_4x32::splat(0x0F0E0D0C);
+
inline SIMD_4x32 low_nibs(SIMD_4x32 x)
{
return lo_nibs_mask & x;
@@ -257,32 +255,32 @@ void vperm_decrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks,
}
-void AES_128::ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
vperm_encrypt_blocks(in, out, blocks, m_EK.data(), 10);
}
-void AES_128::ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
vperm_decrypt_blocks(in, out, blocks, m_DK.data(), 10);
}
-void AES_192::ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
vperm_encrypt_blocks(in, out, blocks, m_EK.data(), 12);
}
-void AES_192::ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
vperm_decrypt_blocks(in, out, blocks, m_DK.data(), 12);
}
-void AES_256::ssse3_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
vperm_encrypt_blocks(in, out, blocks, m_EK.data(), 14);
}
-void AES_256::ssse3_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
vperm_decrypt_blocks(in, out, blocks, m_DK.data(), 14);
}
@@ -335,7 +333,7 @@ SIMD_4x32 aes_schedule_mangle_dec(SIMD_4x32 k, uint8_t round_no)
t = aes_schedule_transform(t, dsk[6], dsk[7]);
output = shuffle(t ^ output, mc_forward0);
- return shuffle(output, SIMD_4x32(sr[round_no % 4]));
+ return shuffle(output, sr[round_no % 4]);
}
SIMD_4x32 aes_schedule_mangle_last(SIMD_4x32 k, uint8_t round_no)
@@ -343,7 +341,7 @@ SIMD_4x32 aes_schedule_mangle_last(SIMD_4x32 k, uint8_t round_no)
const SIMD_4x32 out_tr1(0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121);
const SIMD_4x32 out_tr2(0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1);
- k = shuffle(k, SIMD_4x32(sr[round_no % 4]));
+ k = shuffle(k, sr[round_no % 4]);
k ^= SIMD_4x32::splat_u8(0x5B);
return aes_schedule_transform(k, out_tr1, out_tr2);
}
@@ -383,7 +381,7 @@ SIMD_4x32 aes_schedule_round(SIMD_4x32& rcon, SIMD_4x32 input1, SIMD_4x32 input2
{
input2 ^= alignr<15>(SIMD_4x32(), rcon);
rcon = alignr<15>(rcon, rcon);
- input1 = shuffle32<3,3,3,3>(input1);
+ input1 = shuffle(input1, shuffle3333);
input1 = alignr<1>(input1, input1);
return aes_schedule_round(input1, input2);
@@ -391,12 +389,16 @@ SIMD_4x32 aes_schedule_round(SIMD_4x32& rcon, SIMD_4x32 input1, SIMD_4x32 input2
SIMD_4x32 aes_schedule_192_smear(SIMD_4x32 x, SIMD_4x32 y)
{
- return y ^ shuffle32<3,3,3,2>(x) ^ shuffle32<2,0,0,0>(y);
+ const SIMD_4x32 shuffle3332 =
+ SIMD_4x32(0x0B0A0908, 0x0F0E0D0C, 0x0F0E0D0C, 0x0F0E0D0C);
+ const SIMD_4x32 shuffle2000 =
+ SIMD_4x32(0x03020100, 0x03020100, 0x03020100, 0x0B0A0908);
+ return y ^ shuffle(x, shuffle3332) ^ shuffle(y, shuffle2000);
}
}
-void AES_128::ssse3_key_schedule(const uint8_t keyb[], size_t)
+void AES_128::vperm_key_schedule(const uint8_t keyb[], size_t)
{
m_EK.resize(11*4);
m_DK.resize(11*4);
@@ -424,7 +426,7 @@ void AES_128::ssse3_key_schedule(const uint8_t keyb[], size_t)
aes_schedule_mangle_last_dec(key).store_le(&m_DK[0]);
}
-void AES_192::ssse3_key_schedule(const uint8_t keyb[], size_t)
+void AES_192::vperm_key_schedule(const uint8_t keyb[], size_t)
{
m_EK.resize(13*4);
m_DK.resize(13*4);
@@ -474,7 +476,7 @@ void AES_192::ssse3_key_schedule(const uint8_t keyb[], size_t)
}
}
-void AES_256::ssse3_key_schedule(const uint8_t keyb[], size_t)
+void AES_256::vperm_key_schedule(const uint8_t keyb[], size_t)
{
m_EK.resize(15*4);
m_DK.resize(15*4);
@@ -502,7 +504,7 @@ void AES_256::ssse3_key_schedule(const uint8_t keyb[], size_t)
aes_schedule_mangle(key2, i % 4).store_le(&m_EK[4*i]);
aes_schedule_mangle_dec(key2, (i+2)%4).store_le(&m_DK[4*(14-i)]);
- key2 = aes_schedule_round(shuffle32<3,3,3,3>(key2), k_t);
+ key2 = aes_schedule_round(shuffle(key2, shuffle3333), k_t);
aes_schedule_mangle(key2, (i-1)%4).store_le(&m_EK[4*(i+1)]);
aes_schedule_mangle_dec(key2, (i+1)%4).store_le(&m_DK[4*(13-i)]);
diff --git a/src/lib/block/aes/aes_ssse3/info.txt b/src/lib/block/aes/aes_vperm/info.txt
index 49d9a9214..5ff0c2aa2 100644
--- a/src/lib/block/aes/aes_ssse3/info.txt
+++ b/src/lib/block/aes/aes_vperm/info.txt
@@ -1,9 +1,9 @@
<defines>
-AES_SSSE3 -> 20131128
+AES_VPERM -> 20190901
</defines>
<isa>
-ssse3
+neon
</isa>
<requires>