aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/stream
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2018-11-10 13:42:31 -0500
committerJack Lloyd <[email protected]>2018-11-10 13:43:34 -0500
commit82324d3993aa40ac7a24db6b2d40d7edf129276e (patch)
tree78a34d1454b67415f1aee1f75996e1ea6c8e3c4d /src/lib/stream
parentb1e1e618eac3ca317414a57269b1b4b28cc10098 (diff)
Use vzeroupper/vzeroall to transition between AVX and SSE states.
Otherwise some CPUs suffer serious stalls. Using vzeroall on exit also has the nice effect that we don't have to worry about register contents leaking. HT to @noloader for doing the background research on this.
Diffstat (limited to 'src/lib/stream')
-rw-r--r--src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp4
1 files changed, 4 insertions, 0 deletions
diff --git a/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp b/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp
index fd2504754..e9c3aeb75 100644
--- a/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp
+++ b/src/lib/stream/chacha/chacha_avx2/chacha_avx2.cpp
@@ -13,6 +13,8 @@ namespace Botan {
BOTAN_FUNC_ISA("avx2")
void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rounds)
{
+ SIMD_8x32::reset_registers();
+
BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
const SIMD_8x32 CTR0 = SIMD_8x32(0, 1, 2, 3, 4, 5, 6, 7);
@@ -202,6 +204,8 @@ void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rou
_mm256_storeu_si256(output_mm + 14, _mm256_permute2x128_si256(R03.handle(), R07.handle(), 1 + (3 << 4)));
_mm256_storeu_si256(output_mm + 15, _mm256_permute2x128_si256(R11.handle(), R15.handle(), 1 + (3 << 4)));
+ SIMD_8x32::zero_registers();
+
state[12] += 8;
if(state[12] < 8)
state[13]++;