diff options
author | lloyd <[email protected]> | 2009-11-06 18:57:58 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-11-06 18:57:58 +0000 |
commit | 07412401c927e01da3504f0c2b7e94d4ac13ee33 (patch) | |
tree | 8a0a221d4b962db645d4a34ef9a97ccfd1cfcf3d /src | |
parent | 89da502ff80a9c63038b8b02a5062e460dff4649 (diff) |
Add a complete but untested AES-128 using the AES-NI intrinsics.
From looking at how key gen works in particular, it seems easiest to provide
only AES-128, AES-192, and AES-256 and not a general AES class that can
accept any key length. This also has the bonus of allowing full loop unrolling
which may be a win (how much so will depend on the latency/throughput of
the AES instructions which is currently unknown).
No block interleaving, though of course it works very nicely here, simply
due to the desire to keep things simple until what is currently here can
actually be tested. (Intel has an emulator that is supposed to work but
just crashes on my machine...)
I'm not entirely sure if byte swapping is required. Intel has a white paper
out that suggests it isn't (and really it would have been stupid of them to
not build this into the aes instructions), but who knows. If it turns
out to be necessary there is a pretty fast bswap instruction for SSE anyway.
Diffstat (limited to 'src')
-rw-r--r-- | src/block/aes_intel/aes_intel.cpp | 147 | ||||
-rw-r--r-- | src/block/aes_intel/aes_intel.h | 50 | ||||
-rw-r--r-- | src/engine/aes_isa_eng/aes_isa_engine.cpp | 18 |
3 files changed, 147 insertions, 68 deletions
diff --git a/src/block/aes_intel/aes_intel.cpp b/src/block/aes_intel/aes_intel.cpp index bd814e6c8..8a8d0331a 100644 --- a/src/block/aes_intel/aes_intel.cpp +++ b/src/block/aes_intel/aes_intel.cpp @@ -6,16 +6,70 @@ */ #include <botan/aes_intel.h> +#include <wmmintrin.h> namespace Botan { +namespace { + +__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon) + { + key_with_rcon = _mm_shuffle_epi32(key_with_rcon, 0xff); + + __m128i T = _mm_slli_si128 (key, 0x4); + key = _mm_xor_si128 (key, T); + T = _mm_slli_si128 (T, 0x4); + key = _mm_xor_si128 (key, T); + T = _mm_slli_si128 (T, 0x4); + + key = _mm_xor_si128 (key, T); + key = _mm_xor_si128 (key, key_with_rcon); + return key; + } + +} + /** * AES Encryption */ -void AES_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const +void AES_128_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const { + const __m128i* in_mm = (const __m128i*)in; + __m128i* out_mm = (__m128i*)out; + + const __m128i* key_mm = (const __m128i*)&EK[0]; + + __m128i K0 = _mm_loadu_si128(key_mm); + __m128i K1 = _mm_loadu_si128(key_mm + 1); + __m128i K2 = _mm_loadu_si128(key_mm + 2); + __m128i K3 = _mm_loadu_si128(key_mm + 3); + __m128i K4 = _mm_loadu_si128(key_mm + 4); + __m128i K5 = _mm_loadu_si128(key_mm + 5); + __m128i K6 = _mm_loadu_si128(key_mm + 6); + __m128i K7 = _mm_loadu_si128(key_mm + 7); + __m128i K8 = _mm_loadu_si128(key_mm + 8); + __m128i K9 = _mm_loadu_si128(key_mm + 9); + __m128i K10 = _mm_loadu_si128(key_mm + 10); + for(u32bit i = 0; i != blocks; ++i) { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesenc_si128(B, K1); + B = _mm_aesenc_si128(B, K2); + B = _mm_aesenc_si128(B, K3); + B = _mm_aesenc_si128(B, K4); + B = _mm_aesenc_si128(B, K5); + B = _mm_aesenc_si128(B, K6); + B = _mm_aesenc_si128(B, K7); + B = _mm_aesenc_si128(B, K8); + B = _mm_aesenc_si128(B, K9); + B = _mm_aesenclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); + in += BLOCK_SIZE; out += BLOCK_SIZE; } @@ -24,11 +78,43 @@ void AES_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const /** * AES Decryption */ -void AES_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const +void AES_128_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const { + const __m128i* in_mm = (const __m128i*)in; + __m128i* out_mm = (__m128i*)out; + + const __m128i* key_mm = (const __m128i*)&DK[0]; + + __m128i K0 = _mm_loadu_si128(key_mm); + __m128i K1 = _mm_loadu_si128(key_mm + 1); + __m128i K2 = _mm_loadu_si128(key_mm + 2); + __m128i K3 = _mm_loadu_si128(key_mm + 3); + __m128i K4 = _mm_loadu_si128(key_mm + 4); + __m128i K5 = _mm_loadu_si128(key_mm + 5); + __m128i K6 = _mm_loadu_si128(key_mm + 6); + __m128i K7 = _mm_loadu_si128(key_mm + 7); + __m128i K8 = _mm_loadu_si128(key_mm + 8); + __m128i K9 = _mm_loadu_si128(key_mm + 9); + __m128i K10 = _mm_loadu_si128(key_mm + 10); for(u32bit i = 0; i != blocks; ++i) { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesdec_si128(B, K1); + B = _mm_aesdec_si128(B, K2); + B = _mm_aesdec_si128(B, K3); + B = _mm_aesdec_si128(B, K4); + B = _mm_aesdec_si128(B, K5); + B = _mm_aesdec_si128(B, K6); + B = _mm_aesdec_si128(B, K7); + B = _mm_aesdec_si128(B, K8); + B = _mm_aesdec_si128(B, K9); + B = _mm_aesdeclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); in += BLOCK_SIZE; out += BLOCK_SIZE; @@ -38,25 +124,60 @@ void AES_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const /** * AES Key Schedule */ -void AES_Intel::key_schedule(const byte key[], u32bit length) +void AES_128_Intel::key_schedule(const byte key[], u32bit length) { - } -/** -* AES Constructor -*/ -AES_Intel::AES_Intel(u32bit key_size) : BlockCipher(16, key_size) - { - if(key_size != 16 && key_size != 24 && key_size != 32) - throw Invalid_Key_Length(name(), key_size); - ROUNDS = (key_size / 4) + 6; +#define AES_128_key_exp_with_rcon(K, RCON) \ + aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON)); + + __m128i K0 = _mm_loadu_si128((const __m128i*)key); + __m128i K1 = AES_128_key_exp_with_rcon(K0, 0x01); + __m128i K2 = AES_128_key_exp_with_rcon(K1, 0x02); + __m128i K3 = AES_128_key_exp_with_rcon(K2, 0x04); + __m128i K4 = AES_128_key_exp_with_rcon(K3, 0x08); + __m128i K5 = AES_128_key_exp_with_rcon(K4, 0x10); + __m128i K6 = AES_128_key_exp_with_rcon(K5, 0x20); + __m128i K7 = AES_128_key_exp_with_rcon(K6, 0x40); + __m128i K8 = AES_128_key_exp_with_rcon(K7, 0x80); + __m128i K9 = AES_128_key_exp_with_rcon(K8, 0x1B); + __m128i K10 = AES_128_key_exp_with_rcon(K9, 0x36); + + __m128i* EK_mm = (__m128i*)&EK[0]; + _mm_storeu_si128(EK_mm , K0); + _mm_storeu_si128(EK_mm + 1, K1); + _mm_storeu_si128(EK_mm + 2, K2); + _mm_storeu_si128(EK_mm + 3, K3); + _mm_storeu_si128(EK_mm + 4, K4); + _mm_storeu_si128(EK_mm + 5, K5); + _mm_storeu_si128(EK_mm + 6, K6); + _mm_storeu_si128(EK_mm + 7, K7); + _mm_storeu_si128(EK_mm + 8, K8); + _mm_storeu_si128(EK_mm + 9, K9); + _mm_storeu_si128(EK_mm + 10, K10); + + // Now generate decryption keys + + __m128i* DK_mm = (__m128i*)&DK[0]; + _mm_storeu_si128(DK_mm , K10); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9)); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8)); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7)); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6)); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5)); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4)); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3)); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2)); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1)); + _mm_storeu_si128(DK_mm + 10, K0); } /** * Clear memory of sensitive data */ -void AES_Intel::clear() +void AES_128_Intel::clear() { + EK.clear(); + DK.clear(); } } diff --git a/src/block/aes_intel/aes_intel.h b/src/block/aes_intel/aes_intel.h index b40c2d3f6..90270939c 100644 --- a/src/block/aes_intel/aes_intel.h +++ b/src/block/aes_intel/aes_intel.h @@ -12,61 +12,21 @@ namespace Botan { -class BOTAN_DLL AES_Intel : public BlockCipher +class BOTAN_DLL AES_128_Intel : public BlockCipher { public: void encrypt_n(const byte in[], byte out[], u32bit blocks) const; void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void clear(); - std::string name() const { return "AES"; } - BlockCipher* clone() const { return new AES_Intel; } + std::string name() const { return "AES-128"; } + BlockCipher* clone() const { return new AES_128_Intel; } - AES_Intel() : BlockCipher(16, 16, 32, 8) { ROUNDS = 14; } - AES_Intel(u32bit); + AES_128_Intel() : BlockCipher(16, 16) { } private: void key_schedule(const byte[], u32bit); - u32bit ROUNDS; - - SecureBuffer<u32bit, 56> EK; - SecureBuffer<byte, 16> ME; - - SecureBuffer<u32bit, 56> DK; - SecureBuffer<byte, 16> MD; - }; - -/** -* AES-128 -*/ -class BOTAN_DLL AES_Intel_128 : public AES_Intel - { - public: - std::string name() const { return "AES-128"; } - BlockCipher* clone() const { return new AES_Intel_128; } - AES_Intel_128() : AES_Intel(16) {} - }; - -/** -* AES-192 -*/ -class BOTAN_DLL AES_Intel_192 : public AES_Intel - { - public: - std::string name() const { return "AES-192"; } - BlockCipher* clone() const { return new AES_Intel_192; } - AES_Intel_192() : AES_Intel(24) {} - }; - -/** -* AES-256 -*/ -class BOTAN_DLL AES_Intel_256 : public AES_Intel - { - public: - std::string name() const { return "AES-256"; } - BlockCipher* clone() const { return new AES_Intel_256; } - AES_Intel_256() : AES_Intel(32) {} + SecureBuffer<u32bit, 44> EK, DK; }; } diff --git a/src/engine/aes_isa_eng/aes_isa_engine.cpp b/src/engine/aes_isa_eng/aes_isa_engine.cpp index c74f8bddc..122ec0f6e 100644 --- a/src/engine/aes_isa_eng/aes_isa_engine.cpp +++ b/src/engine/aes_isa_eng/aes_isa_engine.cpp @@ -25,28 +25,26 @@ AES_ISA_Engine::find_block_cipher(const SCAN_Name& request, #if defined(BOTAN_HAS_AES_INTEL) if(CPUID::has_intel_aes()) { - if(request.algo_name() == "AES") - return new AES_Intel; if(request.algo_name() == "AES-128") - return new AES_Intel_128; + return new AES_128_Intel; +#if 0 if(request.algo_name() == "AES-192") - return new AES_Intel_192; + return new AES_192_Intel; if(request.algo_name() == "AES-256") - return new AES_Intel_256; + return new AES_256_Intel; +#endif } #endif #if defined(BOTAN_HAS_AES_VIA) if(CPUID::has_via_aes()) { - if(request.algo_name() == "AES") - return new AES_Via; if(request.algo_name() == "AES-128") - return new AES_Via_128; + return new AES_128_Via; if(request.algo_name() == "AES-192") - return new AES_Via_192; + return new AES_192_Via; if(request.algo_name() == "AES-256") - return new AES_Via_256; + return new AES_256_Via; } #endif |