aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/stream/ctr
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-10-17 18:30:40 -0400
committerJack Lloyd <[email protected]>2017-10-18 11:13:42 -0400
commiteab327defc290e21b36591a09d93609d6deca940 (patch)
tree75d8c372dfbd90e37203a7600ef513654d895fd1 /src/lib/stream/ctr
parentf01f37d142ef230b03ca6af46f1e1a0615e4879a (diff)
GCM and CTR optimizations
In CTR, special case for counter widths of special interest. In GHASH, uses a 4x reduction technique suggested by Intel. Split out GHASH to its own source file and header. With these changes GCM is over twice as fast on Skylake and about 50% faster on Westmere.
Diffstat (limited to 'src/lib/stream/ctr')
-rw-r--r--src/lib/stream/ctr/ctr.cpp138
-rw-r--r--src/lib/stream/ctr/ctr.h8
2 files changed, 99 insertions, 47 deletions
diff --git a/src/lib/stream/ctr/ctr.cpp b/src/lib/stream/ctr/ctr.cpp
index e81373a82..cc2825ee6 100644
--- a/src/lib/stream/ctr/ctr.cpp
+++ b/src/lib/stream/ctr/ctr.cpp
@@ -6,27 +6,30 @@
*/
#include <botan/ctr.h>
+#include <botan/loadstor.h>
namespace Botan {
CTR_BE::CTR_BE(BlockCipher* ciph) :
m_cipher(ciph),
+ m_block_size(m_cipher->block_size()),
+ m_ctr_size(m_block_size),
+ m_ctr_blocks(m_cipher->parallel_bytes() / m_block_size),
m_counter(m_cipher->parallel_bytes()),
m_pad(m_counter.size()),
m_iv(m_cipher->block_size()),
- m_block_size(m_cipher->block_size()),
- m_ctr_size(m_block_size),
m_pad_pos(0)
{
}
CTR_BE::CTR_BE(BlockCipher* cipher, size_t ctr_size) :
m_cipher(cipher),
+ m_block_size(m_cipher->block_size()),
+ m_ctr_size(ctr_size),
+ m_ctr_blocks(m_cipher->parallel_bytes() / m_block_size),
m_counter(m_cipher->parallel_bytes()),
m_pad(m_counter.size()),
m_iv(m_cipher->block_size()),
- m_block_size(m_cipher->block_size()),
- m_ctr_size(ctr_size),
m_pad_pos(0)
{
if(m_ctr_size == 0 || m_ctr_size > m_block_size)
@@ -57,15 +60,36 @@ std::string CTR_BE::name() const
void CTR_BE::cipher(const uint8_t in[], uint8_t out[], size_t length)
{
- while(length >= m_pad.size() - m_pad_pos)
+ if(m_pad_pos > 0)
+ {
+ const size_t avail = m_pad.size() - m_pad_pos;
+ const size_t take = std::min(length, avail);
+ xor_buf(out, in, &m_pad[m_pad_pos], take);
+ length -= take;
+ in += take;
+ out += take;
+ m_pad_pos += take;
+
+ if(take == avail)
+ {
+ add_counter(m_ctr_blocks);
+ m_cipher->encrypt_n(m_counter.data(), m_pad.data(), m_ctr_blocks);
+ m_pad_pos = 0;
+ }
+ }
+
+ while(length >= m_pad.size())
{
- xor_buf(out, in, &m_pad[m_pad_pos], m_pad.size() - m_pad_pos);
- length -= (m_pad.size() - m_pad_pos);
- in += (m_pad.size() - m_pad_pos);
- out += (m_pad.size() - m_pad_pos);
- increment_counter();
+ xor_buf(out, in, &m_pad[0], m_pad.size());
+ length -= m_pad.size();
+ in += m_pad.size();
+ out += m_pad.size();
+
+ add_counter(m_ctr_blocks);
+ m_cipher->encrypt_n(m_counter.data(), m_pad.data(), m_ctr_blocks);
}
- xor_buf(out, in, &m_pad[m_pad_pos], length);
+
+ xor_buf(out, in, &m_pad[0], length);
m_pad_pos += length;
}
@@ -80,63 +104,89 @@ void CTR_BE::set_iv(const uint8_t iv[], size_t iv_len)
seek(0);
}
-/*
-* Increment the counter and update the buffer
-*/
-void CTR_BE::increment_counter()
- {
- const size_t n_wide = m_counter.size() / m_block_size;
-
- add_counter(n_wide);
-
- m_cipher->encrypt_n(m_counter.data(), m_pad.data(), n_wide);
- m_pad_pos = 0;
- }
-
void CTR_BE::add_counter(const uint64_t counter)
{
- const size_t n_wide = m_counter.size() / m_block_size;
+ const size_t ctr_size = m_ctr_size;
+ const size_t ctr_blocks = m_ctr_blocks;
+ const size_t BS = m_block_size;
- for(size_t i = 0; i != n_wide; ++i)
+ if(ctr_size == 4)
+ {
+ size_t off = (BS - 4);
+ for(size_t i = 0; i != ctr_blocks; ++i)
+ {
+ uint32_t low32 = load_be<uint32_t>(&m_counter[off], 0);
+ low32 += counter;
+ store_be(low32, &m_counter[off]);
+ off += BS;
+ }
+ }
+ else if(ctr_size == 8)
{
- uint64_t local_counter = counter;
- uint16_t carry = static_cast<uint8_t>(local_counter);
- for(size_t j = 0; (carry || local_counter) && j != m_ctr_size; ++j)
+ size_t off = (BS - 8);
+ for(size_t i = 0; i != ctr_blocks; ++i)
{
- const size_t off = i*m_block_size + (m_block_size-1-j);
- const uint16_t cnt = static_cast<uint16_t>(m_counter[off]) + carry;
- m_counter[off] = static_cast<uint8_t>(cnt);
- local_counter = (local_counter >> 8);
- carry = (cnt >> 8) + static_cast<uint8_t>(local_counter);
+ uint64_t low64 = load_be<uint64_t>(&m_counter[off], 0);
+ low64 += counter;
+ store_be(low64, &m_counter[off]);
+ off += BS;
+ }
+ }
+ else if(ctr_size == 16)
+ {
+ size_t off = (BS - 16);
+ for(size_t i = 0; i != ctr_blocks; ++i)
+ {
+ uint64_t b0 = load_be<uint64_t>(&m_counter[off], 0);
+ uint64_t b1 = load_be<uint64_t>(&m_counter[off], 1);
+ b1 += counter;
+ b1 += (b1 < counter); // carry
+ store_be(b0, &m_counter[off]);
+ store_be(b1, &m_counter[off+8]);
+ off += BS;
+ }
+ }
+ else
+ {
+ for(size_t i = 0; i != ctr_blocks; ++i)
+ {
+ uint64_t local_counter = counter;
+ uint16_t carry = static_cast<uint8_t>(local_counter);
+ for(size_t j = 0; (carry || local_counter) && j != ctr_size; ++j)
+ {
+ const size_t off = i*BS + (BS-1-j);
+ const uint16_t cnt = static_cast<uint16_t>(m_counter[off]) + carry;
+ m_counter[off] = static_cast<uint8_t>(cnt);
+ local_counter = (local_counter >> 8);
+ carry = (cnt >> 8) + static_cast<uint8_t>(local_counter);
+ }
}
}
}
void CTR_BE::seek(uint64_t offset)
{
- const size_t n_wide = m_counter.size() / m_block_size;
- const uint64_t base_counter = n_wide * (offset / m_counter.size());
+ const uint64_t base_counter = m_ctr_blocks * (offset / m_counter.size());
zeroise(m_counter);
buffer_insert(m_counter, 0, m_iv);
+ const size_t BS = m_block_size;
+
// Set m_counter blocks to IV, IV + 1, ... IV + n
- for(size_t i = 1; i != n_wide; ++i)
+ for(size_t i = 1; i != m_ctr_blocks; ++i)
{
- buffer_insert(m_counter,
- i*m_block_size,
- &m_counter[(i-1)*m_block_size],
- m_block_size);
+ buffer_insert(m_counter, i*BS, &m_counter[(i-1)*BS], BS);
for(size_t j = 0; j != m_ctr_size; ++j)
- if(++m_counter[i*m_block_size + (m_block_size - 1 - j)])
+ if(++m_counter[i*BS + (BS - 1 - j)])
break;
}
- if (base_counter > 0)
+ if(base_counter > 0)
add_counter(base_counter);
- m_cipher->encrypt_n(m_counter.data(), m_pad.data(), n_wide);
+ m_cipher->encrypt_n(m_counter.data(), m_pad.data(), m_ctr_blocks);
m_pad_pos = offset % m_counter.size();
}
}
diff --git a/src/lib/stream/ctr/ctr.h b/src/lib/stream/ctr/ctr.h
index e174848b8..3ff63b8e5 100644
--- a/src/lib/stream/ctr/ctr.h
+++ b/src/lib/stream/ctr/ctr.h
@@ -48,14 +48,16 @@ class BOTAN_PUBLIC_API(2,0) CTR_BE final : public StreamCipher
void seek(uint64_t offset) override;
private:
void key_schedule(const uint8_t key[], size_t key_len) override;
- void increment_counter();
void add_counter(const uint64_t counter);
std::unique_ptr<BlockCipher> m_cipher;
+
+ const size_t m_block_size;
+ const size_t m_ctr_size;
+ const size_t m_ctr_blocks;
+
secure_vector<uint8_t> m_counter, m_pad;
std::vector<uint8_t> m_iv;
- const size_t m_block_size;
- size_t m_ctr_size;
size_t m_pad_pos;
};