diff options
4 files changed, 32 insertions, 0 deletions
diff --git a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp index 3438440da..c5bda524e 100644 --- a/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp +++ b/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp @@ -53,6 +53,8 @@ namespace Botan { BOTAN_FUNC_ISA("avx2") void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const { + SIMD_8x32::reset_registers(); + SIMD_8x32 B0 = SIMD_8x32::load_le(in); SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32); SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64); @@ -98,11 +100,15 @@ void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const B1.store_le(out + 32); B2.store_le(out + 64); B3.store_le(out + 96); + + SIMD_8x32::zero_registers(); } BOTAN_FUNC_ISA("avx2") void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const { + SIMD_8x32::reset_registers(); + SIMD_8x32 B0 = SIMD_8x32::load_le(in); SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32); SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64); @@ -152,6 +158,8 @@ void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const B1.store_le(out + 32); B2.store_le(out + 64); B3.store_le(out + 96); + + SIMD_8x32::zero_registers(); } #undef key_xor diff --git a/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp b/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp index 7aabf120a..cbdd09c20 100644 --- a/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp +++ b/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp @@ -77,6 +77,8 @@ inline void rotate_keys(__m256i& R0, __m256i& R1, __m256i R2) BOTAN_FUNC_ISA("avx2") void Threefish_512::avx2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { + _mm256_zeroupper(); + const uint64_t* K = m_K.data(); const uint64_t* T_64 = m_T.data(); @@ -241,6 +243,8 @@ void Threefish_512::avx2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blo _mm256_storeu_si256(out_mm++, X1); } + _mm256_zeroall(); + #undef THREEFISH_ENC_8_ROUNDS #undef THREEFISH_ROUND #undef THREEFISH_INJECT_KEY @@ -252,6 +256,8 @@ void Threefish_512::avx2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blo BOTAN_FUNC_ISA("avx2") void Threefish_512::avx2_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { + _mm256_zeroupper(); + const uint64_t* K = m_K.data(); const uint64_t* T_64 = m_T.data(); @@ -431,6 +437,8 @@ void Threefish_512::avx2_decrypt_n(const uint8_t in[], uint8_t out[], size_t blo #undef THREEFISH_DEC_2_8_ROUNDS #undef THREEFISH_ROUND_2 #undef THREEFISH_INJECT_KEY_2 + + _mm256_zeroall(); } } diff --git a/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp b/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp index fd2504754..e9c3aeb75 100644 --- a/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp +++ b/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp @@ -13,6 +13,8 @@ namespace Botan { BOTAN_FUNC_ISA("avx2") void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rounds) { + SIMD_8x32::reset_registers(); + BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds"); const SIMD_8x32 CTR0 = SIMD_8x32(0, 1, 2, 3, 4, 5, 6, 7); @@ -202,6 +204,8 @@ void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rou _mm256_storeu_si256(output_mm + 14, _mm256_permute2x128_si256(R03.handle(), R07.handle(), 1 + (3 << 4))); _mm256_storeu_si256(output_mm + 15, _mm256_permute2x128_si256(R11.handle(), R15.handle(), 1 + (3 << 4))); + SIMD_8x32::zero_registers(); + state[12] += 8; if(state[12] < 8) state[13]++; diff --git a/src/lib/utils/simd/simd_avx2/simd_avx2.h b/src/lib/utils/simd/simd_avx2/simd_avx2.h index 6e8c04f4b..fde086e53 100644 --- a/src/lib/utils/simd/simd_avx2/simd_avx2.h +++ b/src/lib/utils/simd/simd_avx2/simd_avx2.h @@ -227,6 +227,18 @@ class SIMD_8x32 final B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3); } + BOTAN_FUNC_ISA("avx2") + static void reset_registers() + { + _mm256_zeroupper(); + } + + BOTAN_FUNC_ISA("avx2") + static void zero_registers() + { + _mm256_zeroall(); + } + __m256i handle() const { return m_avx2; } private: |