From 4c1129afb9c712f3de01d47992c9f52edfb7eee0 Mon Sep 17 00:00:00 2001 From: Jack Lloyd Date: Fri, 10 Aug 2018 20:14:36 -0400 Subject: Optimize computation of CTR input blocks We don't need to read each block since we know what is there Improves CTR perf with AES-NI by 5-6%, also helps GCM GH #969 --- src/lib/stream/ctr/ctr.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/lib/stream/ctr') diff --git a/src/lib/stream/ctr/ctr.cpp b/src/lib/stream/ctr/ctr.cpp index 3608eedf9..22cfade9b 100644 --- a/src/lib/stream/ctr/ctr.cpp +++ b/src/lib/stream/ctr/ctr.cpp @@ -142,37 +142,42 @@ void CTR_BE::add_counter(const uint64_t counter) if(ctr_size == 4) { size_t off = (BS - 4); + uint32_t low32 = counter + load_be(&m_counter[off], 0); + for(size_t i = 0; i != ctr_blocks; ++i) { - uint32_t low32 = load_be(&m_counter[off], 0); - low32 += counter; store_be(low32, &m_counter[off]); off += BS; + low32 += 1; } } else if(ctr_size == 8) { size_t off = (BS - 8); + uint64_t low64 = counter + load_be(&m_counter[off], 0); + for(size_t i = 0; i != ctr_blocks; ++i) { - uint64_t low64 = load_be(&m_counter[off], 0); - low64 += counter; store_be(low64, &m_counter[off]); off += BS; + low64 += 1; } } else if(ctr_size == 16) { size_t off = (BS - 16); + uint64_t b0 = load_be(&m_counter[off], 0); + uint64_t b1 = load_be(&m_counter[off], 1); + b1 += counter; + b0 += (b1 < counter) ? 1 : 0; // carry + for(size_t i = 0; i != ctr_blocks; ++i) { - uint64_t b0 = load_be(&m_counter[off], 0); - uint64_t b1 = load_be(&m_counter[off], 1); - b1 += counter; - b0 += (b1 < counter) ? 1 : 0; // carry store_be(b0, &m_counter[off]); store_be(b1, &m_counter[off+8]); off += BS; + b1 += 1; + b0 += (b1 == 0); // carry } } else -- cgit v1.2.3