diff options
-rw-r--r-- | doc/relnotes/1_11_1.rst | 7 | ||||
-rw-r--r-- | src/block/aes_ni/aes_ni.cpp | 44 |
2 files changed, 33 insertions, 18 deletions
diff --git a/doc/relnotes/1_11_1.rst b/doc/relnotes/1_11_1.rst index 8d46e04f3..0619e07ac 100644 --- a/doc/relnotes/1_11_1.rst +++ b/doc/relnotes/1_11_1.rst @@ -78,3 +78,10 @@ of ``data`` that returns a mutable pointer has been renamed The constructor ``BigInt(NumberType type, size_t n)`` has been removed, replaced by ``BigInt::power_of_2``. + +AES-NI Crash Fixed +"""""""""""""""""""""""""""""""""""""""" + +In 1.11.0, when compiled by GCC, the AES-NI implementation of AES-192 +would crash if the mlock-based allocator was used due to an alignment +issue. diff --git a/src/block/aes_ni/aes_ni.cpp b/src/block/aes_ni/aes_ni.cpp index c320fe82c..4dca6c7f2 100644 --- a/src/block/aes_ni/aes_ni.cpp +++ b/src/block/aes_ni/aes_ni.cpp @@ -1,6 +1,6 @@ /* * AES using AES-NI instructions -* (C) 2009 Jack Lloyd +* (C) 2009,2012 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -485,10 +485,10 @@ void AES_192_NI::key_schedule(const byte key[], size_t) load_le(&EK[0], key, 6); -#define AES_192_key_exp(RCON, EK_OFF) \ - aes_192_key_expansion(&K0, &K1, \ - _mm_aeskeygenassist_si128(K1, RCON), \ - &EK[EK_OFF], EK_OFF == 48) + #define AES_192_key_exp(RCON, EK_OFF) \ + aes_192_key_expansion(&K0, &K1, \ + _mm_aeskeygenassist_si128(K1, RCON), \ + &EK[EK_OFF], EK_OFF == 48) AES_192_key_exp(0x01, 6); AES_192_key_exp(0x02, 12); @@ -499,22 +499,25 @@ void AES_192_NI::key_schedule(const byte key[], size_t) AES_192_key_exp(0x40, 42); AES_192_key_exp(0x80, 48); + #undef AES_192_key_exp + // Now generate decryption keys const __m128i* EK_mm = (const __m128i*)&EK[0]; + __m128i* DK_mm = (__m128i*)&DK[0]; - _mm_storeu_si128(DK_mm , EK_mm[12]); - _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(EK_mm[11])); - _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(EK_mm[10])); - _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(EK_mm[9])); - _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(EK_mm[8])); - _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(EK_mm[7])); - _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(EK_mm[6])); - _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(EK_mm[5])); - _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(EK_mm[4])); - _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(EK_mm[3])); - _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(EK_mm[2])); - _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(EK_mm[1])); - _mm_storeu_si128(DK_mm + 12, EK_mm[0]); + _mm_storeu_si128(DK_mm , _mm_loadu_si128(EK_mm + 12)); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 11))); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 10))); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 9))); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 8))); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 7))); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 6))); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 5))); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 4))); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 3))); + _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 2))); + _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 1))); + _mm_storeu_si128(DK_mm + 12, _mm_loadu_si128(EK_mm + 0)); } /* @@ -776,4 +779,9 @@ void AES_256_NI::clear() zeroise(DK); } +#undef AES_ENC_4_ROUNDS +#undef AES_ENC_4_LAST_ROUNDS +#undef AES_DEC_4_ROUNDS +#undef AES_DEC_4_LAST_ROUNDS + } |