diff options
author | Jack Lloyd <[email protected]> | 2017-10-17 18:30:40 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-10-18 11:13:42 -0400 |
commit | eab327defc290e21b36591a09d93609d6deca940 (patch) | |
tree | 75d8c372dfbd90e37203a7600ef513654d895fd1 /src/lib/stream/ctr | |
parent | f01f37d142ef230b03ca6af46f1e1a0615e4879a (diff) |
GCM and CTR optimizations
In CTR, special case for counter widths of special interest.
In GHASH, uses a 4x reduction technique suggested by Intel.
Split out GHASH to its own source file and header.
With these changes GCM is over twice as fast on Skylake and about
50% faster on Westmere.
Diffstat (limited to 'src/lib/stream/ctr')
-rw-r--r-- | src/lib/stream/ctr/ctr.cpp | 138 | ||||
-rw-r--r-- | src/lib/stream/ctr/ctr.h | 8 |
2 files changed, 99 insertions, 47 deletions
diff --git a/src/lib/stream/ctr/ctr.cpp b/src/lib/stream/ctr/ctr.cpp index e81373a82..cc2825ee6 100644 --- a/src/lib/stream/ctr/ctr.cpp +++ b/src/lib/stream/ctr/ctr.cpp @@ -6,27 +6,30 @@ */ #include <botan/ctr.h> +#include <botan/loadstor.h> namespace Botan { CTR_BE::CTR_BE(BlockCipher* ciph) : m_cipher(ciph), + m_block_size(m_cipher->block_size()), + m_ctr_size(m_block_size), + m_ctr_blocks(m_cipher->parallel_bytes() / m_block_size), m_counter(m_cipher->parallel_bytes()), m_pad(m_counter.size()), m_iv(m_cipher->block_size()), - m_block_size(m_cipher->block_size()), - m_ctr_size(m_block_size), m_pad_pos(0) { } CTR_BE::CTR_BE(BlockCipher* cipher, size_t ctr_size) : m_cipher(cipher), + m_block_size(m_cipher->block_size()), + m_ctr_size(ctr_size), + m_ctr_blocks(m_cipher->parallel_bytes() / m_block_size), m_counter(m_cipher->parallel_bytes()), m_pad(m_counter.size()), m_iv(m_cipher->block_size()), - m_block_size(m_cipher->block_size()), - m_ctr_size(ctr_size), m_pad_pos(0) { if(m_ctr_size == 0 || m_ctr_size > m_block_size) @@ -57,15 +60,36 @@ std::string CTR_BE::name() const void CTR_BE::cipher(const uint8_t in[], uint8_t out[], size_t length) { - while(length >= m_pad.size() - m_pad_pos) + if(m_pad_pos > 0) + { + const size_t avail = m_pad.size() - m_pad_pos; + const size_t take = std::min(length, avail); + xor_buf(out, in, &m_pad[m_pad_pos], take); + length -= take; + in += take; + out += take; + m_pad_pos += take; + + if(take == avail) + { + add_counter(m_ctr_blocks); + m_cipher->encrypt_n(m_counter.data(), m_pad.data(), m_ctr_blocks); + m_pad_pos = 0; + } + } + + while(length >= m_pad.size()) { - xor_buf(out, in, &m_pad[m_pad_pos], m_pad.size() - m_pad_pos); - length -= (m_pad.size() - m_pad_pos); - in += (m_pad.size() - m_pad_pos); - out += (m_pad.size() - m_pad_pos); - increment_counter(); + xor_buf(out, in, &m_pad[0], m_pad.size()); + length -= m_pad.size(); + in += m_pad.size(); + out += m_pad.size(); + + add_counter(m_ctr_blocks); + m_cipher->encrypt_n(m_counter.data(), m_pad.data(), m_ctr_blocks); } - xor_buf(out, in, &m_pad[m_pad_pos], length); + + xor_buf(out, in, &m_pad[0], length); m_pad_pos += length; } @@ -80,63 +104,89 @@ void CTR_BE::set_iv(const uint8_t iv[], size_t iv_len) seek(0); } -/* -* Increment the counter and update the buffer -*/ -void CTR_BE::increment_counter() - { - const size_t n_wide = m_counter.size() / m_block_size; - - add_counter(n_wide); - - m_cipher->encrypt_n(m_counter.data(), m_pad.data(), n_wide); - m_pad_pos = 0; - } - void CTR_BE::add_counter(const uint64_t counter) { - const size_t n_wide = m_counter.size() / m_block_size; + const size_t ctr_size = m_ctr_size; + const size_t ctr_blocks = m_ctr_blocks; + const size_t BS = m_block_size; - for(size_t i = 0; i != n_wide; ++i) + if(ctr_size == 4) + { + size_t off = (BS - 4); + for(size_t i = 0; i != ctr_blocks; ++i) + { + uint32_t low32 = load_be<uint32_t>(&m_counter[off], 0); + low32 += counter; + store_be(low32, &m_counter[off]); + off += BS; + } + } + else if(ctr_size == 8) { - uint64_t local_counter = counter; - uint16_t carry = static_cast<uint8_t>(local_counter); - for(size_t j = 0; (carry || local_counter) && j != m_ctr_size; ++j) + size_t off = (BS - 8); + for(size_t i = 0; i != ctr_blocks; ++i) { - const size_t off = i*m_block_size + (m_block_size-1-j); - const uint16_t cnt = static_cast<uint16_t>(m_counter[off]) + carry; - m_counter[off] = static_cast<uint8_t>(cnt); - local_counter = (local_counter >> 8); - carry = (cnt >> 8) + static_cast<uint8_t>(local_counter); + uint64_t low64 = load_be<uint64_t>(&m_counter[off], 0); + low64 += counter; + store_be(low64, &m_counter[off]); + off += BS; + } + } + else if(ctr_size == 16) + { + size_t off = (BS - 16); + for(size_t i = 0; i != ctr_blocks; ++i) + { + uint64_t b0 = load_be<uint64_t>(&m_counter[off], 0); + uint64_t b1 = load_be<uint64_t>(&m_counter[off], 1); + b1 += counter; + b1 += (b1 < counter); // carry + store_be(b0, &m_counter[off]); + store_be(b1, &m_counter[off+8]); + off += BS; + } + } + else + { + for(size_t i = 0; i != ctr_blocks; ++i) + { + uint64_t local_counter = counter; + uint16_t carry = static_cast<uint8_t>(local_counter); + for(size_t j = 0; (carry || local_counter) && j != ctr_size; ++j) + { + const size_t off = i*BS + (BS-1-j); + const uint16_t cnt = static_cast<uint16_t>(m_counter[off]) + carry; + m_counter[off] = static_cast<uint8_t>(cnt); + local_counter = (local_counter >> 8); + carry = (cnt >> 8) + static_cast<uint8_t>(local_counter); + } } } } void CTR_BE::seek(uint64_t offset) { - const size_t n_wide = m_counter.size() / m_block_size; - const uint64_t base_counter = n_wide * (offset / m_counter.size()); + const uint64_t base_counter = m_ctr_blocks * (offset / m_counter.size()); zeroise(m_counter); buffer_insert(m_counter, 0, m_iv); + const size_t BS = m_block_size; + // Set m_counter blocks to IV, IV + 1, ... IV + n - for(size_t i = 1; i != n_wide; ++i) + for(size_t i = 1; i != m_ctr_blocks; ++i) { - buffer_insert(m_counter, - i*m_block_size, - &m_counter[(i-1)*m_block_size], - m_block_size); + buffer_insert(m_counter, i*BS, &m_counter[(i-1)*BS], BS); for(size_t j = 0; j != m_ctr_size; ++j) - if(++m_counter[i*m_block_size + (m_block_size - 1 - j)]) + if(++m_counter[i*BS + (BS - 1 - j)]) break; } - if (base_counter > 0) + if(base_counter > 0) add_counter(base_counter); - m_cipher->encrypt_n(m_counter.data(), m_pad.data(), n_wide); + m_cipher->encrypt_n(m_counter.data(), m_pad.data(), m_ctr_blocks); m_pad_pos = offset % m_counter.size(); } } diff --git a/src/lib/stream/ctr/ctr.h b/src/lib/stream/ctr/ctr.h index e174848b8..3ff63b8e5 100644 --- a/src/lib/stream/ctr/ctr.h +++ b/src/lib/stream/ctr/ctr.h @@ -48,14 +48,16 @@ class BOTAN_PUBLIC_API(2,0) CTR_BE final : public StreamCipher void seek(uint64_t offset) override; private: void key_schedule(const uint8_t key[], size_t key_len) override; - void increment_counter(); void add_counter(const uint64_t counter); std::unique_ptr<BlockCipher> m_cipher; + + const size_t m_block_size; + const size_t m_ctr_size; + const size_t m_ctr_blocks; + secure_vector<uint8_t> m_counter, m_pad; std::vector<uint8_t> m_iv; - const size_t m_block_size; - size_t m_ctr_size; size_t m_pad_pos; }; |