diff options
-rw-r--r-- | src/block/aes_intel/aes_intel.cpp | 147 | ||||
-rw-r--r-- | src/block/aes_intel/aes_intel.h | 50 | ||||
-rw-r--r-- | src/engine/aes_isa_eng/aes_isa_engine.cpp | 18 |
3 files changed, 147 insertions, 68 deletions
diff --git a/src/block/aes_intel/aes_intel.cpp b/src/block/aes_intel/aes_intel.cpp index bd814e6c8..8a8d0331a 100644 --- a/src/block/aes_intel/aes_intel.cpp +++ b/src/block/aes_intel/aes_intel.cpp @@ -6,16 +6,70 @@ */ #include <botan/aes_intel.h> +#include <wmmintrin.h> namespace Botan { +namespace { + +__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon) + { + key_with_rcon = _mm_shuffle_epi32(key_with_rcon, 0xff); + + __m128i T = _mm_slli_si128 (key, 0x4); + key = _mm_xor_si128 (key, T); + T = _mm_slli_si128 (T, 0x4); + key = _mm_xor_si128 (key, T); + T = _mm_slli_si128 (T, 0x4); + + key = _mm_xor_si128 (key, T); + key = _mm_xor_si128 (key, key_with_rcon); + return key; + } + +} + /** * AES Encryption */ -void AES_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const +void AES_128_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const { + const __m128i* in_mm = (const __m128i*)in; + __m128i* out_mm = (__m128i*)out; + + const __m128i* key_mm = (const __m128i*)&EK[0]; + + __m128i K0 = _mm_loadu_si128(key_mm); + __m128i K1 = _mm_loadu_si128(key_mm + 1); + __m128i K2 = _mm_loadu_si128(key_mm + 2); + __m128i K3 = _mm_loadu_si128(key_mm + 3); + __m128i K4 = _mm_loadu_si128(key_mm + 4); + __m128i K5 = _mm_loadu_si128(key_mm + 5); + __m128i K6 = _mm_loadu_si128(key_mm + 6); + __m128i K7 = _mm_loadu_si128(key_mm + 7); + __m128i K8 = _mm_loadu_si128(key_mm + 8); + __m128i K9 = _mm_loadu_si128(key_mm + 9); + __m128i K10 = _mm_loadu_si128(key_mm + 10); + for(u32bit i = 0; i != blocks; ++i) { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesenc_si128(B, K1); + B = _mm_aesenc_si128(B, K2); + B = _mm_aesenc_si128(B, K3); + B = _mm_aesenc_si128(B, K4); + B = _mm_aesenc_si128(B, K5); + B = _mm_aesenc_si128(B, K6); + B = _mm_aesenc_si128(B, K7); + B = _mm_aesenc_si128(B, K8); + B = _mm_aesenc_si128(B, K9); + B = _mm_aesenclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); + in += BLOCK_SIZE; out += BLOCK_SIZE; } @@ -24,11 +78,43 @@ void AES_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const /** * AES Decryption */ -void AES_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const +void AES_128_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const { + const __m128i* in_mm = (const __m128i*)in; + __m128i* out_mm = (__m128i*)out; + + const __m128i* key_mm = (const __m128i*)&DK[0]; + + __m128i K0 = _mm_loadu_si128(key_mm); + __m128i K1 = _mm_loadu_si128(key_mm + 1); + __m128i K2 = _mm_loadu_si128(key_mm + 2); + __m128i K3 = _mm_loadu_si128(key_mm + 3); + __m128i K4 = _mm_loadu_si128(key_mm + 4); + __m128i K5 = _mm_loadu_si128(key_mm + 5); + __m128i K6 = _mm_loadu_si128(key_mm + 6); + __m128i K7 = _mm_loadu_si128(key_mm + 7); + __m128i K8 = _mm_loadu_si128(key_mm + 8); + __m128i K9 = _mm_loadu_si128(key_mm + 9); + __m128i K10 = _mm_loadu_si128(key_mm + 10); for(u32bit i = 0; i != blocks; ++i) { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesdec_si128(B, K1); + B = _mm_aesdec_si128(B, K2); + B = _mm_aesdec_si128(B, K3); + B = _mm_aesdec_si128(B, K4); + B = _mm_aesdec_si128(B, K5); + B = _mm_aesdec_si128(B, K6); + B = _mm_aesdec_si128(B, K7); + B = _mm_aesdec_si128(B, K8); + B = _mm_aesdec_si128(B, K9); + B = _mm_aesdeclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); in += BLOCK_SIZE; out += BLOCK_SIZE; @@ -38,25 +124,60 @@ void AES_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const /** * AES Key Schedule */ -void AES_Intel::key_schedule(const byte key[], u32bit length) +void AES_128_Intel::key_schedule(const byte key[], u32bit length) { - } -/** -* AES Constructor -*/ -AES_Intel::AES_Intel(u32bit key_size) : BlockCipher(16, key_size) - { - if(key_size != 16 && key_size != 24 && key_size != 32) - throw Invalid_Key_Length(name(), key_size); - ROUNDS = (key_size / 4) + 6; +#define AES_128_key_exp_with_rcon(K, RCON) \ + aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON)); + + __m128i K0 = _mm_loadu_si128((const __m128i*)key); + __m128i K1 = AES_128_key_exp_with_rcon(K0, 0x01); + __m128i K2 = AES_128_key_exp_with_rcon(K1, 0x02); + __m128i K3 = AES_128_key_exp_with_rcon(K2, 0x04); + __m128i K4 = AES_128_key_exp_with_rcon(K3, 0x08); + __m128i K5 = AES_128_key_exp_with_rcon(K4, 0x10); + __m128i K6 = AES_128_key_exp_with_rcon(K5, 0x20); + __m128i K7 = AES_128_key_exp_with_rcon(K6, 0x40); + __m128i K8 = AES_128_key_exp_with_rcon(K7, 0x80); + __m128i K9 = AES_128_key_exp_with_rcon(K8, 0x1B); + __m128i K10 = AES_128_key_exp_with_rcon(K9, 0x36); + + __m128i* EK_mm = (__m128i*)&EK[0]; + _mm_storeu_si128(EK_mm , K0); + _mm_storeu_si128(EK_mm + 1, K1); + _mm_storeu_si128(EK_mm + 2, K2); + _mm_storeu_si128(EK_mm + 3, K3); + _mm_storeu_si128(EK_mm + 4, K4); + _mm_storeu_si128(EK_mm + 5, K5); + _mm_storeu_si128(EK_mm + 6, K6); + _mm_storeu_si128(EK_mm + 7, K7); + _mm_storeu_si128(EK_mm + 8, K8); + _mm_storeu_si128(EK_mm + 9, K9); + _mm_storeu_si128(EK_mm + 10, K10); + + // Now generate decryption keys + + __m128i* DK_mm = (__m128i*)&DK[0]; + _mm_storeu_si128(DK_mm , K10); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9)); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8)); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7)); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6)); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5)); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4)); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3)); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2)); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1)); + _mm_storeu_si128(DK_mm + 10, K0); } /** * Clear memory of sensitive data */ -void AES_Intel::clear() +void AES_128_Intel::clear() { + EK.clear(); + DK.clear(); } } diff --git a/src/block/aes_intel/aes_intel.h b/src/block/aes_intel/aes_intel.h index b40c2d3f6..90270939c 100644 --- a/src/block/aes_intel/aes_intel.h +++ b/src/block/aes_intel/aes_intel.h @@ -12,61 +12,21 @@ namespace Botan { -class BOTAN_DLL AES_Intel : public BlockCipher +class BOTAN_DLL AES_128_Intel : public BlockCipher { public: void encrypt_n(const byte in[], byte out[], u32bit blocks) const; void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void clear(); - std::string name() const { return "AES"; } - BlockCipher* clone() const { return new AES_Intel; } + std::string name() const { return "AES-128"; } + BlockCipher* clone() const { return new AES_128_Intel; } - AES_Intel() : BlockCipher(16, 16, 32, 8) { ROUNDS = 14; } - AES_Intel(u32bit); + AES_128_Intel() : BlockCipher(16, 16) { } private: void key_schedule(const byte[], u32bit); - u32bit ROUNDS; - - SecureBuffer<u32bit, 56> EK; - SecureBuffer<byte, 16> ME; - - SecureBuffer<u32bit, 56> DK; - SecureBuffer<byte, 16> MD; - }; - -/** -* AES-128 -*/ -class BOTAN_DLL AES_Intel_128 : public AES_Intel - { - public: - std::string name() const { return "AES-128"; } - BlockCipher* clone() const { return new AES_Intel_128; } - AES_Intel_128() : AES_Intel(16) {} - }; - -/** -* AES-192 -*/ -class BOTAN_DLL AES_Intel_192 : public AES_Intel - { - public: - std::string name() const { return "AES-192"; } - BlockCipher* clone() const { return new AES_Intel_192; } - AES_Intel_192() : AES_Intel(24) {} - }; - -/** -* AES-256 -*/ -class BOTAN_DLL AES_Intel_256 : public AES_Intel - { - public: - std::string name() const { return "AES-256"; } - BlockCipher* clone() const { return new AES_Intel_256; } - AES_Intel_256() : AES_Intel(32) {} + SecureBuffer<u32bit, 44> EK, DK; }; } diff --git a/src/engine/aes_isa_eng/aes_isa_engine.cpp b/src/engine/aes_isa_eng/aes_isa_engine.cpp index c74f8bddc..122ec0f6e 100644 --- a/src/engine/aes_isa_eng/aes_isa_engine.cpp +++ b/src/engine/aes_isa_eng/aes_isa_engine.cpp @@ -25,28 +25,26 @@ AES_ISA_Engine::find_block_cipher(const SCAN_Name& request, #if defined(BOTAN_HAS_AES_INTEL) if(CPUID::has_intel_aes()) { - if(request.algo_name() == "AES") - return new AES_Intel; if(request.algo_name() == "AES-128") - return new AES_Intel_128; + return new AES_128_Intel; +#if 0 if(request.algo_name() == "AES-192") - return new AES_Intel_192; + return new AES_192_Intel; if(request.algo_name() == "AES-256") - return new AES_Intel_256; + return new AES_256_Intel; +#endif } #endif #if defined(BOTAN_HAS_AES_VIA) if(CPUID::has_via_aes()) { - if(request.algo_name() == "AES") - return new AES_Via; if(request.algo_name() == "AES-128") - return new AES_Via_128; + return new AES_128_Via; if(request.algo_name() == "AES-192") - return new AES_Via_192; + return new AES_192_Via; if(request.algo_name() == "AES-256") - return new AES_Via_256; + return new AES_256_Via; } #endif |