aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2020-05-08 08:15:27 -0400
committerJack Lloyd <[email protected]>2020-05-08 08:15:27 -0400
commit5332eae53fecb49ec661369e21cd3fd06b51ada8 (patch)
treeddfa80de28e72aadbcd2ac9db57227dc8ebca0bd /src/lib
parent1f89b360f7ddfb97dcc12ec428dc3d00f5b411b9 (diff)
parent7b6d3eeaf0495d16454888402df301f21979c8d1 (diff)
Merge GH #2348 Add constant time bitsliced AES
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/block/aes/aes.cpp972
-rw-r--r--src/lib/block/aes/aes.h42
-rw-r--r--src/lib/block/aes/aes_armv8/aes_armv8.cpp186
-rw-r--r--src/lib/block/aes/aes_ni/aes_ni.cpp12
-rw-r--r--src/lib/block/aes/aes_power8/aes_power8.cpp45
-rw-r--r--src/lib/utils/cpuid/cpuid.h16
6 files changed, 613 insertions, 660 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp
index 64205504f..e1a50a2a3 100644
--- a/src/lib/block/aes/aes.cpp
+++ b/src/lib/block/aes/aes.cpp
@@ -1,9 +1,6 @@
/*
-* AES
* (C) 1999-2010,2015,2017,2018,2020 Jack Lloyd
*
-* Based on the public domain reference implementation by Paulo Baretto
-*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -13,113 +10,18 @@
#include <botan/rotate.h>
#include <botan/internal/bit_ops.h>
#include <botan/internal/ct_utils.h>
-#include <type_traits>
-
-/*
-* This implementation is based on table lookups which are known to be
-* vulnerable to timing and cache based side channel attacks. Some
-* countermeasures are used which may be helpful in some situations:
-*
-* - Only a single 256-word T-table is used, with rotations applied.
-* Most implementations use 4 (or sometimes 5) T-tables, which leaks
-* much more information via cache usage.
-*
-* - The TE and TD tables are computed at runtime to avoid flush+reload
-* attacks using clflush. As different processes will not share the
-* same underlying table data, an attacker can't manipulate another
-* processes cache lines via their shared reference to the library
-* read only segment. (However, prime+probe attacks are still possible.)
-*
-* - Each cache line of the lookup tables is accessed at the beginning
-* of each call to encrypt or decrypt. (See the Z variable below)
-*
-* If available SSSE3 or AES-NI are used instead of this version, as both
-* are faster and immune to side channel attacks.
-*
-* Some AES cache timing papers for reference:
-*
-* "Software mitigations to hedge AES against cache-based software side
-* channel vulnerabilities" https://eprint.iacr.org/2006/052.pdf
-*
-* "Cache Games - Bringing Access-Based Cache Attacks on AES to Practice"
-* http://www.ieee-security.org/TC/SP2011/PAPERS/2011/paper031.pdf
-*
-* "Cache-Collision Timing Attacks Against AES" Bonneau, Mironov
-* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.88.4753
-*/
namespace Botan {
-namespace {
-
-alignas(64)
-const uint8_t SE[256] = {
- 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
- 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
- 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
- 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
- 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
- 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
- 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
- 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
- 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
- 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
- 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
- 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
- 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
- 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
- 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
- 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
- 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
- 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
- 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
- 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
- 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
- 0xB0, 0x54, 0xBB, 0x16 };
-
-alignas(64)
-const uint8_t SD[256] = {
- 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
- 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
- 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
- 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
- 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
- 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
- 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
- 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
- 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
- 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
- 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
- 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
- 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
- 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
- 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
- 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
- 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
- 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
- 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
- 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
- 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
- 0x55, 0x21, 0x0C, 0x7D };
-
-inline constexpr uint8_t xtime(uint8_t s) { return static_cast<uint8_t>(s << 1) ^ ((s >> 7) * 0x1B); }
-inline constexpr uint8_t xtime3(uint8_t s) { return xtime(s) ^ s; }
+#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI)
+ #define BOTAN_HAS_HW_AES_SUPPORT
+#endif
-inline uint32_t InvMixColumn(uint8_t s1)
- {
- const uint8_t s2 = xtime(s1);
- const uint8_t s4 = xtime(s2);
- const uint8_t s8 = xtime(s4);
- const uint8_t s9 = s8 ^ s1;
- const uint8_t s11 = s9 ^ s2;
- const uint8_t s13 = s9 ^ s4;
- const uint8_t s14 = s8 ^ s4 ^ s2;
- return make_uint32(s14, s9, s13, s11);
- }
+namespace {
/*
-This is a bitsliced AES sbox computation which can execute up
-to 32 parallel sbox computations.
+This is an AES sbox circuit which can execute in bitsliced mode up to 32x in
+parallel.
The circuit is from "A depth-16 circuit for the AES S-box" by Boyar
and Peralta (https://eprint.iacr.org/2011/332.pdf)
@@ -289,165 +191,377 @@ void AES_SBOX(uint32_t V[8])
V[7] = S7;
}
-inline uint32_t SE_word(uint32_t x)
+void AES_INV_SBOX(uint32_t V[8])
{
- uint32_t I[8] = { 0 };
+ const uint32_t I0 = V[0];
+ const uint32_t I1 = V[1];
+ const uint32_t I2 = V[2];
+ const uint32_t I3 = V[3];
+ const uint32_t I4 = V[4];
+ const uint32_t I5 = V[5];
+ const uint32_t I6 = V[6];
+ const uint32_t I7 = V[7];
- // 0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27 4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31
- x = bit_permute_step<uint32_t>(x, 0x00aa00aa, 7); // Bit index swap 0,3
- x = bit_permute_step<uint32_t>(x, 0x0000cccc, 14); // Bit index swap 1,4
- x = bit_permute_step<uint32_t>(x, 0x00f000f0, 4); // Bit index swap 2,3
- x = bit_permute_step<uint32_t>(x, 0x0000ff00, 8); // Bit index swap 3,4
+ // Figure 6: Top linear transform in reverse direction
+ const uint32_t T23 = I0 ^ I3;
+ const uint32_t T22 = ~(I1 ^ I3);
+ const uint32_t T2 = ~(I0 ^ I1);
+ const uint32_t T1 = I3 ^ I4;
+ const uint32_t T24 = ~(I4 ^ I7);
+ const uint32_t R5 = I6 ^ I7;
+ const uint32_t T8 = ~(I1 ^ T23);
+ const uint32_t T19 = T22 ^ R5;
+ const uint32_t T9 = ~(I7 ^ T1);
+ const uint32_t T10 = T2 ^ T24;
+ const uint32_t T13 = T2 ^ R5;
+ const uint32_t T3 = T1 ^ R5;
+ const uint32_t T25 = ~(I2 ^ T1);
+ const uint32_t R13 = I1 ^ I6;
+ const uint32_t T17 = ~(I2 ^ T19);
+ const uint32_t T20 = T24 ^ R13;
+ const uint32_t T4 = I4 ^ T8;
+ const uint32_t R17 = ~(I2 ^ I5);
+ const uint32_t R18 = ~(I5 ^ I6);
+ const uint32_t R19 = ~(I2 ^ I4);
+ const uint32_t Y5 = I0 ^ R17;
+ const uint32_t T6 = T22 ^ R17;
+ const uint32_t T16 = R13 ^ R19;
+ const uint32_t T27 = T1 ^ R18;
+ const uint32_t T15 = T10 ^ T27;
+ const uint32_t T14 = T10 ^ R18;
+ const uint32_t T26 = T3 ^ T16;
- for(size_t i = 0; i != 8; ++i)
- I[i] = (x >> (28-4*i)) & 0xF;
+ const uint32_t D = Y5;
- AES_SBOX(I);
+ // Figure 7: Shared part of AES S-box circuit
+ const uint32_t M1 = T13 & T6;
+ const uint32_t M2 = T23 & T8;
+ const uint32_t M3 = T14 ^ M1;
+ const uint32_t M4 = T19 & D;
+ const uint32_t M5 = M4 ^ M1;
+ const uint32_t M6 = T3 & T16;
+ const uint32_t M7 = T22 & T9;
+ const uint32_t M8 = T26 ^ M6;
+ const uint32_t M9 = T20 & T17;
+ const uint32_t M10 = M9 ^ M6;
+ const uint32_t M11 = T1 & T15;
+ const uint32_t M12 = T4 & T27;
+ const uint32_t M13 = M12 ^ M11;
+ const uint32_t M14 = T2 & T10;
+ const uint32_t M15 = M14 ^ M11;
+ const uint32_t M16 = M3 ^ M2;
- x = 0;
+ const uint32_t M17 = M5 ^ T24;
+ const uint32_t M18 = M8 ^ M7;
+ const uint32_t M19 = M10 ^ M15;
+ const uint32_t M20 = M16 ^ M13;
+ const uint32_t M21 = M17 ^ M15;
+ const uint32_t M22 = M18 ^ M13;
+ const uint32_t M23 = M19 ^ T25;
+ const uint32_t M24 = M22 ^ M23;
+ const uint32_t M25 = M22 & M20;
+ const uint32_t M26 = M21 ^ M25;
+ const uint32_t M27 = M20 ^ M21;
+ const uint32_t M28 = M23 ^ M25;
+ const uint32_t M29 = M28 & M27;
+ const uint32_t M30 = M26 & M24;
+ const uint32_t M31 = M20 & M23;
+ const uint32_t M32 = M27 & M31;
- for(size_t i = 0; i != 8; ++i)
- x = (x << 4) + (I[i] & 0xF);
+ const uint32_t M33 = M27 ^ M25;
+ const uint32_t M34 = M21 & M22;
+ const uint32_t M35 = M24 & M34;
+ const uint32_t M36 = M24 ^ M25;
+ const uint32_t M37 = M21 ^ M29;
+ const uint32_t M38 = M32 ^ M33;
+ const uint32_t M39 = M23 ^ M30;
+ const uint32_t M40 = M35 ^ M36;
+ const uint32_t M41 = M38 ^ M40;
+ const uint32_t M42 = M37 ^ M39;
+ const uint32_t M43 = M37 ^ M38;
+ const uint32_t M44 = M39 ^ M40;
+ const uint32_t M45 = M42 ^ M41;
+ const uint32_t M46 = M44 & T6;
+ const uint32_t M47 = M40 & T8;
+ const uint32_t M48 = M39 & D;
- // 0 4 8 12 16 20 24 28 1 5 9 13 17 21 25 29 2 6 10 14 18 22 26 30 3 7 11 15 19 23 27 31
- x = bit_permute_step<uint32_t>(x, 0x0a0a0a0a, 3); // Bit index swap 0,2
- x = bit_permute_step<uint32_t>(x, 0x00cc00cc, 6); // Bit index swap 1,3
- x = bit_permute_step<uint32_t>(x, 0x0000f0f0, 12); // Bit index swap 2,4
- x = bit_permute_step<uint32_t>(x, 0x0000ff00, 8); // Bit index swap 3,4
+ const uint32_t M49 = M43 & T16;
+ const uint32_t M50 = M38 & T9;
+ const uint32_t M51 = M37 & T17;
+ const uint32_t M52 = M42 & T15;
+ const uint32_t M53 = M45 & T27;
+ const uint32_t M54 = M41 & T10;
+ const uint32_t M55 = M44 & T13;
+ const uint32_t M56 = M40 & T23;
+ const uint32_t M57 = M39 & T19;
+ const uint32_t M58 = M43 & T3;
+ const uint32_t M59 = M38 & T22;
+ const uint32_t M60 = M37 & T20;
+ const uint32_t M61 = M42 & T1;
+ const uint32_t M62 = M45 & T4;
+ const uint32_t M63 = M41 & T2;
- return x;
+ // Figure 9 Bottom linear transform in reverse direction
+ const uint32_t P0 = M52 ^ M61;
+ const uint32_t P1 = M58 ^ M59;
+ const uint32_t P2 = M54 ^ M62;
+ const uint32_t P3 = M47 ^ M50;
+ const uint32_t P4 = M48 ^ M56;
+ const uint32_t P5 = M46 ^ M51;
+ const uint32_t P6 = M49 ^ M60;
+ const uint32_t P7 = P0 ^ P1;
+ const uint32_t P8 = M50 ^ M53;
+ const uint32_t P9 = M55 ^ M63;
+ const uint32_t P10 = M57 ^ P4;
+ const uint32_t P11 = P0 ^ P3;
+ const uint32_t P12 = M46 ^ M48;
+ const uint32_t P13 = M49 ^ M51;
+ const uint32_t P14 = M49 ^ M62;
+ const uint32_t P15 = M54 ^ M59;
+ const uint32_t P16 = M57 ^ M61;
+ const uint32_t P17 = M58 ^ P2;
+ const uint32_t P18 = M63 ^ P5;
+ const uint32_t P19 = P2 ^ P3;
+ const uint32_t P20 = P4 ^ P6;
+ const uint32_t P22 = P2 ^ P7;
+ const uint32_t P23 = P7 ^ P8;
+ const uint32_t P24 = P5 ^ P7;
+ const uint32_t P25 = P6 ^ P10;
+ const uint32_t P26 = P9 ^ P11;
+ const uint32_t P27 = P10 ^ P18;
+ const uint32_t P28 = P11 ^ P25;
+ const uint32_t P29 = P15 ^ P20;
+ const uint32_t W0 = P13 ^ P22;
+ const uint32_t W1 = P26 ^ P29;
+ const uint32_t W2 = P17 ^ P28;
+ const uint32_t W3 = P12 ^ P22;
+ const uint32_t W4 = P23 ^ P27;
+ const uint32_t W5 = P19 ^ P24;
+ const uint32_t W6 = P14 ^ P23;
+ const uint32_t W7 = P9 ^ P16;
+
+ V[0] = W0;
+ V[1] = W1;
+ V[2] = W2;
+ V[3] = W3;
+ V[4] = W4;
+ V[5] = W5;
+ V[6] = W6;
+ V[7] = W7;
}
-const uint32_t* AES_TE()
+inline void bit_transpose(uint32_t B[8])
{
- class TE_Table final
+ swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1);
+ swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1);
+ swap_bits<uint32_t>(B[5], B[4], 0x55555555, 1);
+ swap_bits<uint32_t>(B[7], B[6], 0x55555555, 1);
+
+ swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2);
+ swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2);
+ swap_bits<uint32_t>(B[6], B[4], 0x33333333, 2);
+ swap_bits<uint32_t>(B[7], B[5], 0x33333333, 2);
+
+ swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4);
+ swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4);
+ swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4);
+ swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4);
+ }
+
+inline void ks_expand(uint32_t B[8], const uint32_t K[], size_t r)
+ {
+ /*
+ This is bit_transpose of K[r..r+4] || K[r..r+4], we can save some computation
+ due to knowing the first and second halves are the same data.
+ */
+ for(size_t i = 0; i != 4; ++i)
+ B[i] = K[r + i];
+
+ swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1);
+ swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1);
+
+ swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2);
+ swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2);
+
+ B[4] = B[0];
+ B[5] = B[1];
+ B[6] = B[2];
+ B[7] = B[3];
+
+ swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4);
+ swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4);
+ swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4);
+ swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4);
+ }
+
+inline void shift_rows(uint32_t B[8])
+ {
+ // 3 0 1 2 7 4 5 6 10 11 8 9 14 15 12 13 17 18 19 16 21 22 23 20 24 25 26 27 28 29 30 31
+#if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
+ for(size_t i = 0; i != 8; i += 2)
+ {
+ uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i+1];
+ x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2);
+ x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1);
+ B[i] = static_cast<uint32_t>(x >> 32);
+ B[i+1] = static_cast<uint32_t>(x);
+ }
+#else
+ for(size_t i = 0; i != 8; ++i)
{
- public:
- TE_Table()
- {
- uint32_t* p = reinterpret_cast<uint32_t*>(&data);
- for(size_t i = 0; i != 256; ++i)
- {
- const uint8_t s = SE[i];
- p[i] = make_uint32(xtime(s), s, s, xtime3(s));
- }
- }
-
- const uint32_t* ptr() const
- {
- return reinterpret_cast<const uint32_t*>(&data);
- }
- private:
- std::aligned_storage<256*sizeof(uint32_t), 64>::type data;
- };
-
- static TE_Table table;
- return table.ptr();
+ uint32_t x = B[i];
+ x = bit_permute_step<uint32_t>(x, 0x00223311, 2);
+ x = bit_permute_step<uint32_t>(x, 0x00550055, 1);
+ B[i] = x;
+ }
+#endif
}
-const uint32_t* AES_TD()
+inline void inv_shift_rows(uint32_t B[8])
{
- class TD_Table final
+ // Inverse of shift_rows, just inverting the steps
+
+#if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
+ for(size_t i = 0; i != 8; i += 2)
{
- public:
- TD_Table()
- {
- uint32_t* p = reinterpret_cast<uint32_t*>(&data);
- for(size_t i = 0; i != 256; ++i)
- {
- p[i] = InvMixColumn(SD[i]);
- }
- }
-
- const uint32_t* ptr() const
- {
- return reinterpret_cast<const uint32_t*>(&data);
- }
- private:
- std::aligned_storage<256*sizeof(uint32_t), 64>::type data;
- };
-
- static TD_Table table;
- return table.ptr();
+ uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i+1];
+ x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1);
+ x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2);
+ B[i] = static_cast<uint32_t>(x >> 32);
+ B[i+1] = static_cast<uint32_t>(x);
+ }
+#else
+ for(size_t i = 0; i != 8; ++i)
+ {
+ uint32_t x = B[i];
+ x = bit_permute_step<uint32_t>(x, 0x00550055, 1);
+ x = bit_permute_step<uint32_t>(x, 0x00223311, 2);
+ B[i] = x;
+ }
+#endif
}
-#define AES_T(T, K, V0, V1, V2, V3) \
- (K ^ T[get_byte(0, V0)] ^ \
- rotr< 8>(T[get_byte(1, V1)]) ^ \
- rotr<16>(T[get_byte(2, V2)]) ^ \
- rotr<24>(T[get_byte(3, V3)]))
+inline void mix_columns(uint32_t B[8])
+ {
+ // carry high bits in B[0] to positions in 0x1b == 0b11011
+ const uint32_t X2[8] = {
+ B[1],
+ B[2],
+ B[3],
+ B[4] ^ B[0],
+ B[5] ^ B[0],
+ B[6],
+ B[7] ^ B[0],
+ B[0],
+ };
+
+ for(size_t i = 0; i != 8; i++)
+ {
+ const uint32_t X3 = B[i] ^ X2[i];
+ B[i] = X2[i] ^ rotr<8>(B[i]) ^ rotr<16>(B[i]) ^ rotr<24>(X3);
+ }
+ }
+
+void inv_mix_columns(uint32_t B[8])
+ {
+ const uint32_t X2[8] = {
+ B[1],
+ B[2],
+ B[3],
+ B[4] ^ B[0],
+ B[5] ^ B[0],
+ B[6],
+ B[7] ^ B[0],
+ B[0],
+ };
+ const uint32_t X4[8] = {
+ X2[1],
+ X2[2],
+ X2[3],
+ X2[4] ^ X2[0],
+ X2[5] ^ X2[0],
+ X2[6],
+ X2[7] ^ X2[0],
+ X2[0],
+ };
+ const uint32_t X8[8] = {
+ X4[1],
+ X4[2],
+ X4[3],
+ X4[4] ^ X4[0],
+ X4[5] ^ X4[0],
+ X4[6],
+ X4[7] ^ X4[0],
+ X4[0],
+ };
+
+ for(size_t i = 0; i != 8; i++)
+ {
+ const uint32_t X9 = X8[i] ^ B[i];
+ const uint32_t X11 = X9 ^ X2[i];
+ const uint32_t X13 = X9 ^ X4[i];
+ const uint32_t X14 = X8[i] ^ X4[i] ^ X2[i];
+
+ B[i] = X14 ^ rotr<8>(X9) ^ rotr<16>(X13) ^ rotr<24>(X11);
+ }
+ }
/*
* AES Encryption
*/
void aes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks,
- const secure_vector<uint32_t>& EK,
- const secure_vector<uint8_t>& ME)
+ const secure_vector<uint32_t>& EK)
{
- BOTAN_ASSERT(EK.size() && ME.size() == 16, "Key was set");
+ BOTAN_ASSERT(EK.size() == 44 || EK.size() == 52 || EK.size() == 60, "Key was set");
- const size_t cache_line_size = CPUID::cache_line_size();
- const uint32_t* TE = AES_TE();
+ const size_t rounds = (EK.size() - 4) / 4;
- // Hit every cache line of TE
- volatile uint32_t Z = 0;
- for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t))
+ uint32_t KS[13*8] = { 0 }; // actual maximum is (rounds - 1) * 8
+ for(size_t i = 0; i < rounds - 1; i += 1)
{
- Z |= TE[i];
+ ks_expand(&KS[8*i], EK.data(), 4*i + 4);
}
- Z &= TE[82]; // this is zero, which hopefully the compiler cannot deduce
- for(size_t i = 0; i < blocks; ++i)
+ const size_t BLOCK_SIZE = 16;
+ const size_t BITSLICED_BLOCKS = 8*sizeof(uint32_t) / BLOCK_SIZE;
+
+ while(blocks > 0)
{
- uint32_t T0, T1, T2, T3;
- load_be(in + 16*i, T0, T1, T2, T3);
+ const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS);
+
+ uint32_t B[8] = { 0 };
- T0 ^= EK[0];
- T1 ^= EK[1];
- T2 ^= EK[2];
- T3 ^= EK[3];
+ load_be(B, in, this_loop*4);
- T0 ^= Z;
+ for(size_t i = 0; i != 8; ++i)
+ B[i] ^= EK[i % 4];
- uint32_t B0 = AES_T(TE, EK[4], T0, T1, T2, T3);
- uint32_t B1 = AES_T(TE, EK[5], T1, T2, T3, T0);
- uint32_t B2 = AES_T(TE, EK[6], T2, T3, T0, T1);
- uint32_t B3 = AES_T(TE, EK[7], T3, T0, T1, T2);
+ bit_transpose(B);
- for(size_t r = 2*4; r < EK.size(); r += 2*4)
+ for(size_t r = 0; r != rounds - 1; ++r)
{
- T0 = AES_T(TE, EK[r ], B0, B1, B2, B3);
- T1 = AES_T(TE, EK[r+1], B1, B2, B3, B0);
- T2 = AES_T(TE, EK[r+2], B2, B3, B0, B1);
- T3 = AES_T(TE, EK[r+3], B3, B0, B1, B2);
-
- B0 = AES_T(TE, EK[r+4], T0, T1, T2, T3);
- B1 = AES_T(TE, EK[r+5], T1, T2, T3, T0);
- B2 = AES_T(TE, EK[r+6], T2, T3, T0, T1);
- B3 = AES_T(TE, EK[r+7], T3, T0, T1, T2);
+ AES_SBOX(B);
+ shift_rows(B);
+ mix_columns(B);
+
+ for(size_t i = 0; i != 8; ++i)
+ B[i] ^= KS[8*r + i];
}
- /*
- * Use TE[x] >> 8 instead of SE[] so encryption only references a single
- * lookup table.
- */
- out[16*i+ 0] = static_cast<uint8_t>(TE[get_byte(0, B0)] >> 8) ^ ME[0];
- out[16*i+ 1] = static_cast<uint8_t>(TE[get_byte(1, B1)] >> 8) ^ ME[1];
- out[16*i+ 2] = static_cast<uint8_t>(TE[get_byte(2, B2)] >> 8) ^ ME[2];
- out[16*i+ 3] = static_cast<uint8_t>(TE[get_byte(3, B3)] >> 8) ^ ME[3];
- out[16*i+ 4] = static_cast<uint8_t>(TE[get_byte(0, B1)] >> 8) ^ ME[4];
- out[16*i+ 5] = static_cast<uint8_t>(TE[get_byte(1, B2)] >> 8) ^ ME[5];
- out[16*i+ 6] = static_cast<uint8_t>(TE[get_byte(2, B3)] >> 8) ^ ME[6];
- out[16*i+ 7] = static_cast<uint8_t>(TE[get_byte(3, B0)] >> 8) ^ ME[7];
- out[16*i+ 8] = static_cast<uint8_t>(TE[get_byte(0, B2)] >> 8) ^ ME[8];
- out[16*i+ 9] = static_cast<uint8_t>(TE[get_byte(1, B3)] >> 8) ^ ME[9];
- out[16*i+10] = static_cast<uint8_t>(TE[get_byte(2, B0)] >> 8) ^ ME[10];
- out[16*i+11] = static_cast<uint8_t>(TE[get_byte(3, B1)] >> 8) ^ ME[11];
- out[16*i+12] = static_cast<uint8_t>(TE[get_byte(0, B3)] >> 8) ^ ME[12];
- out[16*i+13] = static_cast<uint8_t>(TE[get_byte(1, B0)] >> 8) ^ ME[13];
- out[16*i+14] = static_cast<uint8_t>(TE[get_byte(2, B1)] >> 8) ^ ME[14];
- out[16*i+15] = static_cast<uint8_t>(TE[get_byte(3, B2)] >> 8) ^ ME[15];
+ // Final round:
+ AES_SBOX(B);
+ shift_rows(B);
+ bit_transpose(B);
+
+ for(size_t i = 0; i != 8; ++i)
+ B[i] ^= EK[4*rounds + i % 4];
+
+ copy_out_be(out, this_loop*4*sizeof(uint32_t), B);
+
+ in += this_loop * BLOCK_SIZE;
+ out += this_loop * BLOCK_SIZE;
+ blocks -= this_loop;
}
}
@@ -455,81 +569,95 @@ void aes_encrypt_n(const uint8_t in[], uint8_t out[],
* AES Decryption
*/
void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks,
- const secure_vector<uint32_t>& DK,
- const secure_vector<uint8_t>& MD)
+ const secure_vector<uint32_t>& DK)
{
- BOTAN_ASSERT(DK.size() && MD.size() == 16, "Key was set");
+ BOTAN_ASSERT(DK.size() == 44 || DK.size() == 52 || DK.size() == 60, "Key was set");
- const size_t cache_line_size = CPUID::cache_line_size();
- const uint32_t* TD = AES_TD();
+ const size_t rounds = (DK.size() - 4) / 4;
- volatile uint32_t Z = 0;
- for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t))
+ uint32_t KS[13*8] = { 0 }; // actual maximum is (rounds - 1) * 8
+ for(size_t i = 0; i < rounds - 1; i += 1)
{
- Z |= TD[i];
+ ks_expand(&KS[8*i], DK.data(), 4*i + 4);
}
- for(size_t i = 0; i < 256; i += cache_line_size)
- {
- Z |= SD[i];
- }
- Z &= TD[99]; // this is zero, which hopefully the compiler cannot deduce
- for(size_t i = 0; i != blocks; ++i)
+ const size_t BLOCK_SIZE = 16;
+ const size_t BITSLICED_BLOCKS = 8*sizeof(uint32_t) / BLOCK_SIZE;
+
+ while(blocks > 0)
{
- uint32_t T0 = load_be<uint32_t>(in, 0) ^ DK[0];
- uint32_t T1 = load_be<uint32_t>(in, 1) ^ DK[1];
- uint32_t T2 = load_be<uint32_t>(in, 2) ^ DK[2];
- uint32_t T3 = load_be<uint32_t>(in, 3) ^ DK[3];
+ const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS);
+
+ uint32_t B[8] = { 0 };
+
+ load_be(B, in, this_loop*4);
- T0 ^= Z;
+ for(size_t i = 0; i != 8; ++i)
+ B[i] ^= DK[i % 4];
- uint32_t B0 = AES_T(TD, DK[4], T0, T3, T2, T1);
- uint32_t B1 = AES_T(TD, DK[5], T1, T0, T3, T2);
- uint32_t B2 = AES_T(TD, DK[6], T2, T1, T0, T3);
- uint32_t B3 = AES_T(TD, DK[7], T3, T2, T1, T0);
+ bit_transpose(B);
- for(size_t r = 2*4; r < DK.size(); r += 2*4)
+ for(size_t r = 0; r != rounds - 1; ++r)
{
- T0 = AES_T(TD, DK[r ], B0, B3, B2, B1);
- T1 = AES_T(TD, DK[r+1], B1, B0, B3, B2);
- T2 = AES_T(TD, DK[r+2], B2, B1, B0, B3);
- T3 = AES_T(TD, DK[r+3], B3, B2, B1, B0);
-
- B0 = AES_T(TD, DK[r+4], T0, T3, T2, T1);
- B1 = AES_T(TD, DK[r+5], T1, T0, T3, T2);
- B2 = AES_T(TD, DK[r+6], T2, T1, T0, T3);
- B3 = AES_T(TD, DK[r+7], T3, T2, T1, T0);
+ AES_INV_SBOX(B);
+ inv_shift_rows(B);
+ inv_mix_columns(B);
+
+ for(size_t i = 0; i != 8; ++i)
+ B[i] ^= KS[8*r + i];
}
- out[ 0] = SD[get_byte(0, B0)] ^ MD[0];
- out[ 1] = SD[get_byte(1, B3)] ^ MD[1];
- out[ 2] = SD[get_byte(2, B2)] ^ MD[2];
- out[ 3] = SD[get_byte(3, B1)] ^ MD[3];
- out[ 4] = SD[get_byte(0, B1)] ^ MD[4];
- out[ 5] = SD[get_byte(1, B0)] ^ MD[5];
- out[ 6] = SD[get_byte(2, B3)] ^ MD[6];
- out[ 7] = SD[get_byte(3, B2)] ^ MD[7];
- out[ 8] = SD[get_byte(0, B2)] ^ MD[8];
- out[ 9] = SD[get_byte(1, B1)] ^ MD[9];
- out[10] = SD[get_byte(2, B0)] ^ MD[10];
- out[11] = SD[get_byte(3, B3)] ^ MD[11];
- out[12] = SD[get_byte(0, B3)] ^ MD[12];
- out[13] = SD[get_byte(1, B2)] ^ MD[13];
- out[14] = SD[get_byte(2, B1)] ^ MD[14];
- out[15] = SD[get_byte(3, B0)] ^ MD[15];
-
- in += 16;
- out += 16;
+ // Final round:
+ AES_INV_SBOX(B);
+ inv_shift_rows(B);
+ bit_transpose(B);
+
+ for(size_t i = 0; i != 8; ++i)
+ B[i] ^= DK[4*rounds + i % 4];
+
+ copy_out_be(out, this_loop*4*sizeof(uint32_t), B);
+
+ in += this_loop * BLOCK_SIZE;
+ out += this_loop * BLOCK_SIZE;
+ blocks -= this_loop;
}
}
-#undef AES_T
+inline constexpr uint8_t xtime(uint8_t s) { return static_cast<uint8_t>(s << 1) ^ ((s >> 7) * 0x1B); }
+
+inline uint32_t InvMixColumn(uint8_t s1)
+ {
+ const uint8_t s2 = xtime(s1);
+ const uint8_t s4 = xtime(s2);
+ const uint8_t s8 = xtime(s4);
+ const uint8_t s9 = s8 ^ s1;
+ const uint8_t s11 = s9 ^ s2;
+ const uint8_t s13 = s9 ^ s4;
+ const uint8_t s14 = s8 ^ s4 ^ s2;
+ return make_uint32(s14, s9, s13, s11);
+ }
+
+uint32_t SE_word(uint32_t x)
+ {
+ uint32_t I[8] = { 0 };
+
+ for(size_t i = 0; i != 8; ++i)
+ I[i] = (x >> (7-i)) & 0x01010101;
+
+ AES_SBOX(I);
+
+ x = 0;
+
+ for(size_t i = 0; i != 8; ++i)
+ x |= ((I[i] & 0x01010101) << (7-i));
+
+ return x;
+ }
void aes_key_schedule(const uint8_t key[], size_t length,
secure_vector<uint32_t>& EK,
secure_vector<uint32_t>& DK,
- secure_vector<uint8_t>& ME,
- secure_vector<uint8_t>& MD)
+ bool bswap_keys = false)
{
static const uint32_t RC[10] = {
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
@@ -544,127 +672,88 @@ void aes_key_schedule(const uint8_t key[], size_t length,
CT::poison(key, length);
- secure_vector<uint32_t> XEK(length + 32);
- secure_vector<uint32_t> XDK(length + 32);
+ EK.resize(length + 28);
+ DK.resize(length + 28);
for(size_t i = 0; i != X; ++i)
- XEK[i] = load_be<uint32_t>(key, i);
+ EK[i] = load_be<uint32_t>(key, i);
for(size_t i = X; i < 4*(rounds+1); i += X)
{
- XEK[i] = XEK[i-X] ^ RC[(i-X)/X] ^ rotl<8>(SE_word(XEK[i-1]));
+ EK[i] = EK[i-X] ^ RC[(i-X)/X] ^ rotl<8>(SE_word(EK[i-1]));
- for(size_t j = 1; j != X; ++j)
+ for(size_t j = 1; j != X && (i+j) < EK.size(); ++j)
{
- XEK[i+j] = XEK[i+j-X];
+ EK[i+j] = EK[i+j-X];
if(X == 8 && j == 4)
- XEK[i+j] ^= SE_word(XEK[i+j-1]);
+ EK[i+j] ^= SE_word(EK[i+j-1]);
else
- XEK[i+j] ^= XEK[i+j-1];
+ EK[i+j] ^= EK[i+j-1];
}
}
for(size_t i = 0; i != 4*(rounds+1); i += 4)
{
- XDK[i ] = XEK[4*rounds-i ];
- XDK[i+1] = XEK[4*rounds-i+1];
- XDK[i+2] = XEK[4*rounds-i+2];
- XDK[i+3] = XEK[4*rounds-i+3];
+ DK[i ] = EK[4*rounds-i ];
+ DK[i+1] = EK[4*rounds-i+1];
+ DK[i+2] = EK[4*rounds-i+2];
+ DK[i+3] = EK[4*rounds-i+3];
}
- for(size_t i = 4; i != length + 24; ++i)
+ for(size_t i = 4; i != DK.size() - 4; ++i)
{
- const uint8_t s0 = get_byte(0, XDK[i]);
- const uint8_t s1 = get_byte(1, XDK[i]);
- const uint8_t s2 = get_byte(2, XDK[i]);
- const uint8_t s3 = get_byte(3, XDK[i]);
+ const uint8_t s0 = get_byte(0, DK[i]);
+ const uint8_t s1 = get_byte(1, DK[i]);
+ const uint8_t s2 = get_byte(2, DK[i]);
+ const uint8_t s3 = get_byte(3, DK[i]);
- XDK[i] = InvMixColumn(s0) ^
+ DK[i] = InvMixColumn(s0) ^
rotr<8>(InvMixColumn(s1)) ^
rotr<16>(InvMixColumn(s2)) ^
rotr<24>(InvMixColumn(s3));
}
- ME.resize(16);
- MD.resize(16);
-
- for(size_t i = 0; i != 4; ++i)
+ if(bswap_keys)
{
- store_be(XEK[i+4*rounds], &ME[4*i]);
- store_be(XEK[i], &MD[4*i]);
- }
-
- EK.resize(length + 24);
- DK.resize(length + 24);
- copy_mem(EK.data(), XEK.data(), EK.size());
- copy_mem(DK.data(), XDK.data(), DK.size());
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
- {
- // ARM needs the subkeys to be byte reversed
-
+ // HW AES on little endian needs the subkeys to be byte reversed
for(size_t i = 0; i != EK.size(); ++i)
EK[i] = reverse_bytes(EK[i]);
for(size_t i = 0; i != DK.size(); ++i)
DK[i] = reverse_bytes(DK[i]);
}
-#endif
CT::unpoison(EK.data(), EK.size());
CT::unpoison(DK.data(), DK.size());
- CT::unpoison(ME.data(), ME.size());
- CT::unpoison(MD.data(), MD.size());
CT::unpoison(key, length);
}
size_t aes_parallelism()
{
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
- {
- return 4;
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return 4;
+ return 4; // pipelined
}
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_AES_VPERM)
+ if(CPUID::has_vperm())
{
- return 4;
+ return 2; // pipelined
}
#endif
- return 1;
+ // bitsliced:
+ return 2;
}
const char* aes_provider()
{
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return "aesni";
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return "power8";
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
- {
- return "armv8";
+ return "cpu";
}
#endif
@@ -692,24 +781,10 @@ void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_EK.empty() == false);
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
- {
- return aesni_encrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return armv8_encrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return power8_encrypt_n(in, out, blocks);
+ return hw_aes_encrypt_n(in, out, blocks);
}
#endif
@@ -720,31 +795,17 @@ void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
- aes_encrypt_n(in, out, blocks, m_EK, m_ME);
+ aes_encrypt_n(in, out, blocks, m_EK);
}
void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_DK.empty() == false);
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
- {
- return aesni_decrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return armv8_decrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return power8_decrypt_n(in, out, blocks);
+ return hw_aes_decrypt_n(in, out, blocks);
}
#endif
@@ -755,7 +816,7 @@ void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
- aes_decrypt_n(in, out, blocks, m_DK, m_MD);
+ aes_decrypt_n(in, out, blocks, m_DK);
}
void AES_128::key_schedule(const uint8_t key[], size_t length)
@@ -767,17 +828,10 @@ void AES_128::key_schedule(const uint8_t key[], size_t length)
}
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian());
}
#endif
@@ -788,39 +842,23 @@ void AES_128::key_schedule(const uint8_t key[], size_t length)
}
#endif
- aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ aes_key_schedule(key, length, m_EK, m_DK);
}
void AES_128::clear()
{
zap(m_EK);
zap(m_DK);
- zap(m_ME);
- zap(m_MD);
}
void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_EK.empty() == false);
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
- {
- return aesni_encrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return armv8_encrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return power8_encrypt_n(in, out, blocks);
+ return hw_aes_encrypt_n(in, out, blocks);
}
#endif
@@ -831,31 +869,17 @@ void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
- aes_encrypt_n(in, out, blocks, m_EK, m_ME);
+ aes_encrypt_n(in, out, blocks, m_EK);
}
void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_DK.empty() == false);
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return aesni_decrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
- {
- return armv8_decrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return power8_decrypt_n(in, out, blocks);
+ return hw_aes_decrypt_n(in, out, blocks);
}
#endif
@@ -866,7 +890,7 @@ void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
- aes_decrypt_n(in, out, blocks, m_DK, m_MD);
+ aes_decrypt_n(in, out, blocks, m_DK);
}
void AES_192::key_schedule(const uint8_t key[], size_t length)
@@ -878,17 +902,10 @@ void AES_192::key_schedule(const uint8_t key[], size_t length)
}
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian());
}
#endif
@@ -899,39 +916,23 @@ void AES_192::key_schedule(const uint8_t key[], size_t length)
}
#endif
- aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ aes_key_schedule(key, length, m_EK, m_DK);
}
void AES_192::clear()
{
zap(m_EK);
zap(m_DK);
- zap(m_ME);
- zap(m_MD);
}
void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_EK.empty() == false);
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
- {
- return aesni_encrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
- {
- return armv8_encrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return power8_encrypt_n(in, out, blocks);
+ return hw_aes_encrypt_n(in, out, blocks);
}
#endif
@@ -942,31 +943,17 @@ void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
- aes_encrypt_n(in, out, blocks, m_EK, m_ME);
+ aes_encrypt_n(in, out, blocks, m_EK);
}
void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_DK.empty() == false);
-#if defined(BOTAN_HAS_AES_NI)
- if(CPUID::has_aes_ni())
- {
- return aesni_decrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return armv8_decrypt_n(in, out, blocks);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
- {
- return power8_decrypt_n(in, out, blocks);
+ return hw_aes_decrypt_n(in, out, blocks);
}
#endif
@@ -977,7 +964,7 @@ void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
- aes_decrypt_n(in, out, blocks, m_DK, m_MD);
+ aes_decrypt_n(in, out, blocks, m_DK);
}
void AES_256::key_schedule(const uint8_t key[], size_t length)
@@ -989,17 +976,10 @@ void AES_256::key_schedule(const uint8_t key[], size_t length)
}
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- if(CPUID::has_arm_aes())
- {
- return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
- }
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- if(CPUID::has_power_crypto())
+#if defined(BOTAN_HAS_HW_AES_SUPPORT)
+ if(CPUID::has_hw_aes())
{
- return aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian());
}
#endif
@@ -1010,15 +990,13 @@ void AES_256::key_schedule(const uint8_t key[], size_t length)
}
#endif
- aes_key_schedule(key, length, m_EK, m_DK, m_ME, m_MD);
+ aes_key_schedule(key, length, m_EK, m_DK);
}
void AES_256::clear()
{
zap(m_EK);
zap(m_DK);
- zap(m_ME);
- zap(m_MD);
}
}
diff --git a/src/lib/block/aes/aes.h b/src/lib/block/aes/aes.h
index 84d997d05..76248200d 100644
--- a/src/lib/block/aes/aes.h
+++ b/src/lib/block/aes/aes.h
@@ -40,23 +40,15 @@ class BOTAN_PUBLIC_API(2,0) AES_128 final : public Block_Cipher_Fixed_Params<16,
#endif
#if defined(BOTAN_HAS_AES_NI)
- void aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
void aesni_key_schedule(const uint8_t key[], size_t length);
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- void power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI)
+ void hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
#endif
secure_vector<uint32_t> m_EK, m_DK;
- secure_vector<uint8_t> m_ME, m_MD;
};
/**
@@ -83,25 +75,17 @@ class BOTAN_PUBLIC_API(2,0) AES_192 final : public Block_Cipher_Fixed_Params<16,
#endif
#if defined(BOTAN_HAS_AES_NI)
- void aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
void aesni_key_schedule(const uint8_t key[], size_t length);
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- void power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI)
+ void hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
#endif
void key_schedule(const uint8_t key[], size_t length) override;
secure_vector<uint32_t> m_EK, m_DK;
- secure_vector<uint8_t> m_ME, m_MD;
};
/**
@@ -129,25 +113,17 @@ class BOTAN_PUBLIC_API(2,0) AES_256 final : public Block_Cipher_Fixed_Params<16,
#endif
#if defined(BOTAN_HAS_AES_NI)
- void aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
void aesni_key_schedule(const uint8_t key[], size_t length);
#endif
-#if defined(BOTAN_HAS_AES_ARMV8)
- void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
-#endif
-
-#if defined(BOTAN_HAS_AES_POWER8)
- void power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
- void power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI)
+ void hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
#endif
void key_schedule(const uint8_t key[], size_t length) override;
secure_vector<uint32_t> m_EK, m_DK;
- secure_vector<uint8_t> m_ME, m_MD;
};
}
diff --git a/src/lib/block/aes/aes_armv8/aes_armv8.cpp b/src/lib/block/aes/aes_armv8/aes_armv8.cpp
index f4261954b..9766bf88c 100644
--- a/src/lib/block/aes/aes_armv8/aes_armv8.cpp
+++ b/src/lib/block/aes/aes_armv8/aes_armv8.cpp
@@ -54,22 +54,21 @@ namespace Botan {
* AES-128 Encryption
*/
BOTAN_FUNC_ISA("+crypto")
-void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
- const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
-
- const uint8x16_t K0 = vld1q_u8(skey + 0);
- const uint8x16_t K1 = vld1q_u8(skey + 16);
- const uint8x16_t K2 = vld1q_u8(skey + 32);
- const uint8x16_t K3 = vld1q_u8(skey + 48);
- const uint8x16_t K4 = vld1q_u8(skey + 64);
- const uint8x16_t K5 = vld1q_u8(skey + 80);
- const uint8x16_t K6 = vld1q_u8(skey + 96);
- const uint8x16_t K7 = vld1q_u8(skey + 112);
- const uint8x16_t K8 = vld1q_u8(skey + 128);
- const uint8x16_t K9 = vld1q_u8(skey + 144);
- const uint8x16_t K10 = vld1q_u8(mkey);
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0*16);
+ const uint8x16_t K1 = vld1q_u8(skey + 1*16);
+ const uint8x16_t K2 = vld1q_u8(skey + 2*16);
+ const uint8x16_t K3 = vld1q_u8(skey + 3*16);
+ const uint8x16_t K4 = vld1q_u8(skey + 4*16);
+ const uint8x16_t K5 = vld1q_u8(skey + 5*16);
+ const uint8x16_t K6 = vld1q_u8(skey + 6*16);
+ const uint8x16_t K7 = vld1q_u8(skey + 7*16);
+ const uint8x16_t K8 = vld1q_u8(skey + 8*16);
+ const uint8x16_t K9 = vld1q_u8(skey + 9*16);
+ const uint8x16_t K10 = vld1q_u8(skey + 10*16);
while(blocks >= 4)
{
@@ -120,22 +119,21 @@ void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-128 Decryption
*/
BOTAN_FUNC_ISA("+crypto")
-void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
- const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
-
- const uint8x16_t K0 = vld1q_u8(skey + 0);
- const uint8x16_t K1 = vld1q_u8(skey + 16);
- const uint8x16_t K2 = vld1q_u8(skey + 32);
- const uint8x16_t K3 = vld1q_u8(skey + 48);
- const uint8x16_t K4 = vld1q_u8(skey + 64);
- const uint8x16_t K5 = vld1q_u8(skey + 80);
- const uint8x16_t K6 = vld1q_u8(skey + 96);
- const uint8x16_t K7 = vld1q_u8(skey + 112);
- const uint8x16_t K8 = vld1q_u8(skey + 128);
- const uint8x16_t K9 = vld1q_u8(skey + 144);
- const uint8x16_t K10 = vld1q_u8(mkey);
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0*16);
+ const uint8x16_t K1 = vld1q_u8(skey + 1*16);
+ const uint8x16_t K2 = vld1q_u8(skey + 2*16);
+ const uint8x16_t K3 = vld1q_u8(skey + 3*16);
+ const uint8x16_t K4 = vld1q_u8(skey + 4*16);
+ const uint8x16_t K5 = vld1q_u8(skey + 5*16);
+ const uint8x16_t K6 = vld1q_u8(skey + 6*16);
+ const uint8x16_t K7 = vld1q_u8(skey + 7*16);
+ const uint8x16_t K8 = vld1q_u8(skey + 8*16);
+ const uint8x16_t K9 = vld1q_u8(skey + 9*16);
+ const uint8x16_t K10 = vld1q_u8(skey + 10*16);
while(blocks >= 4)
{
@@ -186,24 +184,23 @@ void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-192 Encryption
*/
BOTAN_FUNC_ISA("+crypto")
-void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
- const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
-
- const uint8x16_t K0 = vld1q_u8(skey + 0);
- const uint8x16_t K1 = vld1q_u8(skey + 16);
- const uint8x16_t K2 = vld1q_u8(skey + 32);
- const uint8x16_t K3 = vld1q_u8(skey + 48);
- const uint8x16_t K4 = vld1q_u8(skey + 64);
- const uint8x16_t K5 = vld1q_u8(skey + 80);
- const uint8x16_t K6 = vld1q_u8(skey + 96);
- const uint8x16_t K7 = vld1q_u8(skey + 112);
- const uint8x16_t K8 = vld1q_u8(skey + 128);
- const uint8x16_t K9 = vld1q_u8(skey + 144);
- const uint8x16_t K10 = vld1q_u8(skey + 160);
- const uint8x16_t K11 = vld1q_u8(skey + 176);
- const uint8x16_t K12 = vld1q_u8(mkey);
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0*16);
+ const uint8x16_t K1 = vld1q_u8(skey + 1*16);
+ const uint8x16_t K2 = vld1q_u8(skey + 2*16);
+ const uint8x16_t K3 = vld1q_u8(skey + 3*16);
+ const uint8x16_t K4 = vld1q_u8(skey + 4*16);
+ const uint8x16_t K5 = vld1q_u8(skey + 5*16);
+ const uint8x16_t K6 = vld1q_u8(skey + 6*16);
+ const uint8x16_t K7 = vld1q_u8(skey + 7*16);
+ const uint8x16_t K8 = vld1q_u8(skey + 8*16);
+ const uint8x16_t K9 = vld1q_u8(skey + 9*16);
+ const uint8x16_t K10 = vld1q_u8(skey + 10*16);
+ const uint8x16_t K11 = vld1q_u8(skey + 11*16);
+ const uint8x16_t K12 = vld1q_u8(skey + 12*16);
while(blocks >= 4)
{
@@ -258,24 +255,23 @@ void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-192 Decryption
*/
BOTAN_FUNC_ISA("+crypto")
-void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
- const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
-
- const uint8x16_t K0 = vld1q_u8(skey + 0);
- const uint8x16_t K1 = vld1q_u8(skey + 16);
- const uint8x16_t K2 = vld1q_u8(skey + 32);
- const uint8x16_t K3 = vld1q_u8(skey + 48);
- const uint8x16_t K4 = vld1q_u8(skey + 64);
- const uint8x16_t K5 = vld1q_u8(skey + 80);
- const uint8x16_t K6 = vld1q_u8(skey + 96);
- const uint8x16_t K7 = vld1q_u8(skey + 112);
- const uint8x16_t K8 = vld1q_u8(skey + 128);
- const uint8x16_t K9 = vld1q_u8(skey + 144);
- const uint8x16_t K10 = vld1q_u8(skey + 160);
- const uint8x16_t K11 = vld1q_u8(skey + 176);
- const uint8x16_t K12 = vld1q_u8(mkey);
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0*16);
+ const uint8x16_t K1 = vld1q_u8(skey + 1*16);
+ const uint8x16_t K2 = vld1q_u8(skey + 2*16);
+ const uint8x16_t K3 = vld1q_u8(skey + 3*16);
+ const uint8x16_t K4 = vld1q_u8(skey + 4*16);
+ const uint8x16_t K5 = vld1q_u8(skey + 5*16);
+ const uint8x16_t K6 = vld1q_u8(skey + 6*16);
+ const uint8x16_t K7 = vld1q_u8(skey + 7*16);
+ const uint8x16_t K8 = vld1q_u8(skey + 8*16);
+ const uint8x16_t K9 = vld1q_u8(skey + 9*16);
+ const uint8x16_t K10 = vld1q_u8(skey + 10*16);
+ const uint8x16_t K11 = vld1q_u8(skey + 11*16);
+ const uint8x16_t K12 = vld1q_u8(skey + 12*16);
while(blocks >= 4)
{
@@ -330,26 +326,25 @@ void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-256 Encryption
*/
BOTAN_FUNC_ISA("+crypto")
-void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
- const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
-
- const uint8x16_t K0 = vld1q_u8(skey + 0);
- const uint8x16_t K1 = vld1q_u8(skey + 16);
- const uint8x16_t K2 = vld1q_u8(skey + 32);
- const uint8x16_t K3 = vld1q_u8(skey + 48);
- const uint8x16_t K4 = vld1q_u8(skey + 64);
- const uint8x16_t K5 = vld1q_u8(skey + 80);
- const uint8x16_t K6 = vld1q_u8(skey + 96);
- const uint8x16_t K7 = vld1q_u8(skey + 112);
- const uint8x16_t K8 = vld1q_u8(skey + 128);
- const uint8x16_t K9 = vld1q_u8(skey + 144);
- const uint8x16_t K10 = vld1q_u8(skey + 160);
- const uint8x16_t K11 = vld1q_u8(skey + 176);
- const uint8x16_t K12 = vld1q_u8(skey + 192);
- const uint8x16_t K13 = vld1q_u8(skey + 208);
- const uint8x16_t K14 = vld1q_u8(mkey);
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0*16);
+ const uint8x16_t K1 = vld1q_u8(skey + 1*16);
+ const uint8x16_t K2 = vld1q_u8(skey + 2*16);
+ const uint8x16_t K3 = vld1q_u8(skey + 3*16);
+ const uint8x16_t K4 = vld1q_u8(skey + 4*16);
+ const uint8x16_t K5 = vld1q_u8(skey + 5*16);
+ const uint8x16_t K6 = vld1q_u8(skey + 6*16);
+ const uint8x16_t K7 = vld1q_u8(skey + 7*16);
+ const uint8x16_t K8 = vld1q_u8(skey + 8*16);
+ const uint8x16_t K9 = vld1q_u8(skey + 9*16);
+ const uint8x16_t K10 = vld1q_u8(skey + 10*16);
+ const uint8x16_t K11 = vld1q_u8(skey + 11*16);
+ const uint8x16_t K12 = vld1q_u8(skey + 12*16);
+ const uint8x16_t K13 = vld1q_u8(skey + 13*16);
+ const uint8x16_t K14 = vld1q_u8(skey + 14*16);
while(blocks >= 4)
{
@@ -408,26 +403,25 @@ void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-256 Decryption
*/
BOTAN_FUNC_ISA("+crypto")
-void AES_256::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
- const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
-
- const uint8x16_t K0 = vld1q_u8(skey + 0);
- const uint8x16_t K1 = vld1q_u8(skey + 16);
- const uint8x16_t K2 = vld1q_u8(skey + 32);
- const uint8x16_t K3 = vld1q_u8(skey + 48);
- const uint8x16_t K4 = vld1q_u8(skey + 64);
- const uint8x16_t K5 = vld1q_u8(skey + 80);
- const uint8x16_t K6 = vld1q_u8(skey + 96);
- const uint8x16_t K7 = vld1q_u8(skey + 112);
- const uint8x16_t K8 = vld1q_u8(skey + 128);
- const uint8x16_t K9 = vld1q_u8(skey + 144);
- const uint8x16_t K10 = vld1q_u8(skey + 160);
- const uint8x16_t K11 = vld1q_u8(skey + 176);
- const uint8x16_t K12 = vld1q_u8(skey + 192);
- const uint8x16_t K13 = vld1q_u8(skey + 208);
- const uint8x16_t K14 = vld1q_u8(mkey);
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0*16);
+ const uint8x16_t K1 = vld1q_u8(skey + 1*16);
+ const uint8x16_t K2 = vld1q_u8(skey + 2*16);
+ const uint8x16_t K3 = vld1q_u8(skey + 3*16);
+ const uint8x16_t K4 = vld1q_u8(skey + 4*16);
+ const uint8x16_t K5 = vld1q_u8(skey + 5*16);
+ const uint8x16_t K6 = vld1q_u8(skey + 6*16);
+ const uint8x16_t K7 = vld1q_u8(skey + 7*16);
+ const uint8x16_t K8 = vld1q_u8(skey + 8*16);
+ const uint8x16_t K9 = vld1q_u8(skey + 9*16);
+ const uint8x16_t K10 = vld1q_u8(skey + 10*16);
+ const uint8x16_t K11 = vld1q_u8(skey + 11*16);
+ const uint8x16_t K12 = vld1q_u8(skey + 12*16);
+ const uint8x16_t K13 = vld1q_u8(skey + 13*16);
+ const uint8x16_t K14 = vld1q_u8(skey + 14*16);
while(blocks >= 4)
{
diff --git a/src/lib/block/aes/aes_ni/aes_ni.cpp b/src/lib/block/aes/aes_ni/aes_ni.cpp
index 0160bc1ee..76c695f32 100644
--- a/src/lib/block/aes/aes_ni/aes_ni.cpp
+++ b/src/lib/block/aes/aes_ni/aes_ni.cpp
@@ -107,7 +107,7 @@ __m128i aes_256_key_expansion(__m128i key, __m128i key2)
* AES-128 Encryption
*/
BOTAN_FUNC_ISA("ssse3,aes")
-void AES_128::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
__m128i* out_mm = reinterpret_cast<__m128i*>(out);
@@ -184,7 +184,7 @@ void AES_128::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-128 Decryption
*/
BOTAN_FUNC_ISA("ssse3,aes")
-void AES_128::aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
__m128i* out_mm = reinterpret_cast<__m128i*>(out);
@@ -314,7 +314,7 @@ void AES_128::aesni_key_schedule(const uint8_t key[], size_t)
* AES-192 Encryption
*/
BOTAN_FUNC_ISA("ssse3,aes")
-void AES_192::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
__m128i* out_mm = reinterpret_cast<__m128i*>(out);
@@ -397,7 +397,7 @@ void AES_192::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-192 Decryption
*/
BOTAN_FUNC_ISA("ssse3,aes")
-void AES_192::aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
__m128i* out_mm = reinterpret_cast<__m128i*>(out);
@@ -530,7 +530,7 @@ void AES_192::aesni_key_schedule(const uint8_t key[], size_t)
* AES-256 Encryption
*/
BOTAN_FUNC_ISA("ssse3,aes")
-void AES_256::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
__m128i* out_mm = reinterpret_cast<__m128i*>(out);
@@ -619,7 +619,7 @@ void AES_256::aesni_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
* AES-256 Decryption
*/
BOTAN_FUNC_ISA("ssse3,aes")
-void AES_256::aesni_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
__m128i* out_mm = reinterpret_cast<__m128i*>(out);
diff --git a/src/lib/block/aes/aes_power8/aes_power8.cpp b/src/lib/block/aes/aes_power8/aes_power8.cpp
index b9af23ef2..18bc85933 100644
--- a/src/lib/block/aes/aes_power8/aes_power8.cpp
+++ b/src/lib/block/aes/aes_power8/aes_power8.cpp
@@ -24,22 +24,6 @@ typedef __vector unsigned char Altivec8x16;
namespace {
-inline Altivec64x2 load_key(const uint32_t key[])
- {
- Altivec32x4 vec = vec_vsx_ld(0, key);
-
- if(CPUID::is_little_endian())
- {
- const Altivec8x16 mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
- const Altivec8x16 zero = {0};
- return (Altivec64x2)vec_perm((Altivec8x16)vec, zero, mask);
- }
- else
- {
- return (Altivec64x2)vec;
- }
- }
-
inline Altivec8x16 reverse_vec(Altivec8x16 src)
{
if(CPUID::is_little_endian())
@@ -54,6 +38,11 @@ inline Altivec8x16 reverse_vec(Altivec8x16 src)
}
}
+inline Altivec64x2 load_key(const uint32_t key[])
+ {
+ return (Altivec64x2)reverse_vec((Altivec8x16)vec_vsx_ld(0, key));;
+ }
+
inline Altivec64x2 load_block(const uint8_t src[])
{
return (Altivec64x2)reverse_vec(vec_vsx_ld(0, src));
@@ -112,7 +101,7 @@ inline void store_blocks(Altivec64x2 B0, Altivec64x2 B1,
}
BOTAN_FUNC_ISA("crypto")
-void AES_128::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[0]);
const Altivec64x2 K1 = load_key(&m_EK[4]);
@@ -124,7 +113,7 @@ void AES_128::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
const Altivec64x2 K7 = load_key(&m_EK[28]);
const Altivec64x2 K8 = load_key(&m_EK[32]);
const Altivec64x2 K9 = load_key(&m_EK[36]);
- const Altivec64x2 K10 = load_block(m_ME.data());
+ const Altivec64x2 K10 = load_key(&m_EK[40]);
while(blocks >= 4)
{
@@ -176,9 +165,9 @@ void AES_128::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
}
BOTAN_FUNC_ISA("crypto")
-void AES_128::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
- const Altivec64x2 K0 = load_block(m_ME.data());
+ const Altivec64x2 K0 = load_key(&m_EK[40]);
const Altivec64x2 K1 = load_key(&m_EK[36]);
const Altivec64x2 K2 = load_key(&m_EK[32]);
const Altivec64x2 K3 = load_key(&m_EK[28]);
@@ -240,7 +229,7 @@ void AES_128::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
}
BOTAN_FUNC_ISA("crypto")
-void AES_192::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[0]);
const Altivec64x2 K1 = load_key(&m_EK[4]);
@@ -254,7 +243,7 @@ void AES_192::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
const Altivec64x2 K9 = load_key(&m_EK[36]);
const Altivec64x2 K10 = load_key(&m_EK[40]);
const Altivec64x2 K11 = load_key(&m_EK[44]);
- const Altivec64x2 K12 = load_block(m_ME.data());
+ const Altivec64x2 K12 = load_key(&m_EK[48]);
while(blocks >= 4)
{
@@ -310,9 +299,9 @@ void AES_192::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
}
BOTAN_FUNC_ISA("crypto")
-void AES_192::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
- const Altivec64x2 K0 = load_block(m_ME.data());
+ const Altivec64x2 K0 = load_key(&m_EK[48]);
const Altivec64x2 K1 = load_key(&m_EK[44]);
const Altivec64x2 K2 = load_key(&m_EK[40]);
const Altivec64x2 K3 = load_key(&m_EK[36]);
@@ -380,7 +369,7 @@ void AES_192::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
}
BOTAN_FUNC_ISA("crypto")
-void AES_256::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[0]);
const Altivec64x2 K1 = load_key(&m_EK[4]);
@@ -396,7 +385,7 @@ void AES_256::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
const Altivec64x2 K11 = load_key(&m_EK[44]);
const Altivec64x2 K12 = load_key(&m_EK[48]);
const Altivec64x2 K13 = load_key(&m_EK[52]);
- const Altivec64x2 K14 = load_block(m_ME.data());
+ const Altivec64x2 K14 = load_key(&m_EK[56]);
while(blocks >= 4)
{
@@ -456,9 +445,9 @@ void AES_256::power8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks)
}
BOTAN_FUNC_ISA("crypto")
-void AES_256::power8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
- const Altivec64x2 K0 = load_block(m_ME.data());
+ const Altivec64x2 K0 = load_key(&m_EK[56]);
const Altivec64x2 K1 = load_key(&m_EK[52]);
const Altivec64x2 K2 = load_key(&m_EK[48]);
const Altivec64x2 K3 = load_key(&m_EK[44]);
diff --git a/src/lib/utils/cpuid/cpuid.h b/src/lib/utils/cpuid/cpuid.h
index d9e6b97b3..d9e0a74a6 100644
--- a/src/lib/utils/cpuid/cpuid.h
+++ b/src/lib/utils/cpuid/cpuid.h
@@ -335,6 +335,22 @@ class BOTAN_PUBLIC_API(2,1) CPUID final
}
/**
+ * Check if the processor supports hardware AES instructions
+ */
+ static bool has_hw_aes()
+ {
+#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+ return has_aes_ni();
+#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ return has_arm_aes();
+#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+ return has_power_crypto();
+#else
+ return false;
+#endif
+ }
+
+ /**
* Check if the processor supports carryless multiply
* (CLMUL, PMULL)
*/