diff options
author | Jack Lloyd <[email protected]> | 2018-11-10 13:42:31 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2018-11-10 13:43:34 -0500 |
commit | 82324d3993aa40ac7a24db6b2d40d7edf129276e (patch) | |
tree | 78a34d1454b67415f1aee1f75996e1ea6c8e3c4d /src/lib/stream | |
parent | b1e1e618eac3ca317414a57269b1b4b28cc10098 (diff) |
Use vzeroupper/vzeroall to transition between AVX and SSE states.
Otherwise some CPUs suffer serious stalls. Using vzeroall on exit
also has the nice effect that we don't have to worry about register
contents leaking.
HT to @noloader for doing the background research on this.
Diffstat (limited to 'src/lib/stream')
-rw-r--r-- | src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp b/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp index fd2504754..e9c3aeb75 100644 --- a/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp +++ b/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp @@ -13,6 +13,8 @@ namespace Botan { BOTAN_FUNC_ISA("avx2") void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rounds) { + SIMD_8x32::reset_registers(); + BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds"); const SIMD_8x32 CTR0 = SIMD_8x32(0, 1, 2, 3, 4, 5, 6, 7); @@ -202,6 +204,8 @@ void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rou _mm256_storeu_si256(output_mm + 14, _mm256_permute2x128_si256(R03.handle(), R07.handle(), 1 + (3 << 4))); _mm256_storeu_si256(output_mm + 15, _mm256_permute2x128_si256(R11.handle(), R15.handle(), 1 + (3 << 4))); + SIMD_8x32::zero_registers(); + state[12] += 8; if(state[12] < 8) state[13]++; |