diff options
author | Jack Lloyd <[email protected]> | 2018-12-27 21:04:28 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2019-01-04 21:23:25 -0500 |
commit | e2ee85c5de7df54953559d80e9889dd550550699 (patch) | |
tree | 7d2d7daa45833cc6722ed3191e6c79374b141d6d | |
parent | 4b1252bc1410d9c6af61ca20f5a1e72fa2d440d4 (diff) |
New Memory_Pool implementation
Quite a bit faster than the old version, and with better properties
wrt alignment
-rw-r--r-- | src/build-data/buildh.in | 7 | ||||
-rw-r--r-- | src/lib/utils/locking_allocator/locking_allocator.cpp | 11 | ||||
-rw-r--r-- | src/lib/utils/mem_pool/mem_pool.cpp | 427 | ||||
-rw-r--r-- | src/lib/utils/mem_pool/mem_pool.h | 28 |
4 files changed, 335 insertions, 138 deletions
diff --git a/src/build-data/buildh.in b/src/build-data/buildh.in index d37c183cf..6cf73200e 100644 --- a/src/build-data/buildh.in +++ b/src/build-data/buildh.in @@ -120,13 +120,6 @@ /* How much to allocate for a buffer of no particular size */ #define BOTAN_DEFAULT_BUFFER_SIZE 1024 -/* Minimum and maximum sizes to allocate out of the mlock pool (bytes) - Default min is 16 as smaller values are easily bruteforceable and thus - likely not cryptographic keys. -*/ -#define BOTAN_MLOCK_ALLOCATOR_MIN_ALLOCATION 16 -#define BOTAN_MLOCK_ALLOCATOR_MAX_ALLOCATION 128 - /* * Total maximum amount of RAM (in KiB) we will lock into memory, even * if the OS would let us lock more diff --git a/src/lib/utils/locking_allocator/locking_allocator.cpp b/src/lib/utils/locking_allocator/locking_allocator.cpp index 9d05cfbff..401f00848 100644 --- a/src/lib/utils/locking_allocator/locking_allocator.cpp +++ b/src/lib/utils/locking_allocator/locking_allocator.cpp @@ -43,20 +43,19 @@ bool mlock_allocator::deallocate(void* p, size_t num_elems, size_t elem_size) no mlock_allocator::mlock_allocator() { const size_t mem_to_lock = OS::get_memory_locking_limit(); + const size_t page_size = OS::system_page_size(); - if(mem_to_lock) + if(mem_to_lock > 0 && mem_to_lock % page_size == 0) { m_locked_pages = static_cast<uint8_t*>(OS::allocate_locked_pages(mem_to_lock)); if(m_locked_pages) { m_locked_pages_size = mem_to_lock; + m_pool.reset(new Memory_Pool(m_locked_pages, - m_locked_pages_size, - OS::system_page_size(), - BOTAN_MLOCK_ALLOCATOR_MIN_ALLOCATION, - BOTAN_MLOCK_ALLOCATOR_MAX_ALLOCATION, - 4)); + m_locked_pages_size / page_size, + page_size)); } } } diff --git a/src/lib/utils/mem_pool/mem_pool.cpp b/src/lib/utils/mem_pool/mem_pool.cpp index 115dbbac0..d89ed6631 100644 --- a/src/lib/utils/mem_pool/mem_pool.cpp +++ b/src/lib/utils/mem_pool/mem_pool.cpp @@ -6,15 +6,91 @@ #include <botan/internal/mem_pool.h> #include <botan/mem_ops.h> -#include <botan/exceptn.h> -#include <algorithm> -#include <cstdlib> -#include <string> namespace Botan { +/* +* Memory pool theory of operation +* +* This allocator is not useful for general purpose but works well within the +* context of allocating cryptographic keys. It makes several assumptions which +* don't work for a malloc but simplify and speed up the implementation: +* +* - There is a single fixed block of memory, which cannot be expanded. This is +* the block that was allocated, mlocked and passed to the Memory_Pool +* constructor. It is assumed to be page-aligned. +* +* - The allocator is allowed to return null anytime it feels like not servicing +* a request, in which case the request will be sent to calloc instead. In +* particular values which are too small or too large are given to calloc. +* +* - Most allocations are powers of 2, the remainder are usually a multiple of 4 +* or 8. +* +* - Free requests include the size of the allocation, so there is no need to +* track this within the pool. +* +* - Alignment is important to the caller. For this allocator, any allocation of +* size N is aligned evenly at N bytes. +* +* The block of memory is split up into pages. Initially each page is in the free +* page list. Each page is used for just one size of allocation, with requests +* bucketed into a small number of common sizes. If the allocation would be too +* big, too small, or with too much slack, it is rejected by the pool. +* +* The free list is maintained by a bitmap, one per page/Bucket. Since each +* Bucket only maintains objects of a single size, each bit set or clear +* indicates the status of one object. +* +* An allocation walks the list of buckets and asks each in turn if there is +* space. If a Bucket does not have any space, it sets a boolean flag m_is_full +* so that it does not need to rescan when asked again. The flag is cleared on +* first free from that bucket. If no bucket has space, but there are some free +* pages left, a free page is claimed as a new Bucket for that size. In this case +* it is pushed to the front of the list so it is first in line to service new +* requests. +* +* A deallocation also walks the list of buckets for the size and asks each +* Bucket in turn if it recognizes the pointer. When a Bucket becomes empty as a +* result of a deallocation, it is recycled back into the free pool. When this +* happens, the Buckets pointer goes to the end of the free list. This will delay +* slightly the reuse of this page, which may offer some slight help wrt use +* after free issues. +* +* It may be worthwhile to optimize deallocation by storing the Buckets in order +* (by pointer value) which would allow binary search to find the owning bucket. +*/ + namespace { +size_t choose_bucket(size_t n) + { + const size_t MINIMUM_ALLOCATION = 16; + const size_t MAXIMUM_ALLOCATION = 512; + const size_t MAXIMUM_SLACK = 31; + + if(n < MINIMUM_ALLOCATION|| n > MAXIMUM_ALLOCATION) + return 0; + + // Need to tune these + const size_t buckets[] = { + 16, 24, 32, 48, 64, 80, 96, 112, 128, 160, 192, 256, 320, 384, 448, 512, 0 + }; + + for(size_t i = 0; buckets[i]; ++i) + { + if(n <= buckets[i]) + { + const size_t slack = buckets[i] - n; + if(slack > MAXIMUM_SLACK) + return 0; + return buckets[i]; + } + } + + return 0; + } + inline bool ptr_in_pool(const void* pool_ptr, size_t poolsize, const void* buf_ptr, size_t bufsize) { @@ -23,168 +99,297 @@ inline bool ptr_in_pool(const void* pool_ptr, size_t poolsize, return (buf >= pool) && (buf + bufsize <= pool + poolsize); } -inline size_t padding_for_alignment(size_t n, size_t alignment) +// return index of first set bit +template<typename T> +size_t find_set_bit(T b) { - const size_t mod = n % alignment; - if(mod == 0) - return 0; - return alignment - mod; - } + size_t s = 8*sizeof(T) / 2; + size_t bit = 0; -} + // In this context we don't need to be const-time + while(s > 0) + { + const T mask = (static_cast<T>(1) << s) - 1; + if((b & mask) == 0) + { + bit += s; + b >>= s; + } + s /= 2; + } + + return bit; + } -Memory_Pool::Memory_Pool(uint8_t* pool, - size_t pool_size, - size_t page_size, - size_t min_alloc, - size_t max_alloc, - uint8_t align_bit) : - m_page_size(page_size), - m_min_alloc(min_alloc), - m_max_alloc(max_alloc), - m_align_bit(align_bit) +class BitMap final { - if(pool == nullptr) - throw Invalid_Argument("Memory_Pool pool was null"); + public: + BitMap(size_t bits) : m_len(bits) + { + m_bits.resize((bits + BITMASK_BITS - 1) / BITMASK_BITS); + m_main_mask = ~static_cast<bitmask_type>(0); + m_last_mask = m_main_mask; - if(m_min_alloc > m_max_alloc) - throw Invalid_Argument("Memory_Pool min_alloc > max_alloc"); + if(bits % BITMASK_BITS != 0) + m_last_mask = (static_cast<bitmask_type>(1) << (bits % BITMASK_BITS)) - 1; + } - if(m_align_bit > 6) - throw Invalid_Argument("Memory_Pool invalid align_bit"); + bool find_free(size_t* bit); - // This is basically just to verify that the range is valid - clear_mem(pool, pool_size); + void free(size_t bit); - m_pool = pool; - m_pool_size = pool_size; - m_freelist.push_back(std::make_pair(0, m_pool_size)); + bool empty() const; + + private: +#if defined(BOTAN_ENABLE_DEBUG_ASSERTS) + typedef uint8_t bitmask_type; + enum { BITMASK_BITS = 8 }; +#else + typedef word bitmask_type; + enum { BITMASK_BITS = BOTAN_MP_WORD_BITS }; +#endif + + size_t m_len; + bitmask_type m_main_mask; + bitmask_type m_last_mask; + std::vector<bitmask_type> m_bits; + }; + +bool BitMap::find_free(size_t* bit) + { + for(size_t i = 0; i != m_bits.size(); ++i) + { + const bitmask_type mask = (i == m_bits.size() - 1) ? m_last_mask : m_main_mask; + if((m_bits[i] & mask) != mask) + { + size_t free_bit = find_set_bit(~m_bits[i]); + const size_t bmask = static_cast<bitmask_type>(1) << (free_bit % BITMASK_BITS); + BOTAN_ASSERT_NOMSG((m_bits[i] & bmask) == 0); + m_bits[i] |= bmask; + *bit = BITMASK_BITS*i + free_bit; + return true; + } + } + + return false; } -void* Memory_Pool::allocate(size_t req) +void BitMap::free(size_t bit) { - const size_t alignment = (static_cast<size_t>(1) << m_align_bit); + BOTAN_ASSERT_NOMSG(bit <= m_len); + const size_t w = bit / BITMASK_BITS; + BOTAN_ASSERT_NOMSG(w < m_bits.size()); + const size_t mask = static_cast<bitmask_type>(1) << (bit % BITMASK_BITS); + m_bits[w] = m_bits[w] & (~mask); + } - if(req > m_pool_size) - return nullptr; - if(req < m_min_alloc || req > m_max_alloc) - return nullptr; +bool BitMap::empty() const + { + for(size_t i = 0; i != m_bits.size(); ++i) + { + if(m_bits[i] != 0) + { + return false; + } + } - lock_guard_type<mutex_type> lock(m_mutex); + return true; + } - auto best_fit = m_freelist.end(); +} - for(auto i = m_freelist.begin(); i != m_freelist.end(); ++i) - { - // If we have a perfect fit, use it immediately - if(i->second == req && (i->first % alignment) == 0) +class Bucket final + { + public: + Bucket(uint8_t* mem, size_t mem_size, size_t item_size) : + m_item_size(item_size), + m_page_size(mem_size), + m_range(mem), + m_bitmap(mem_size / item_size), + m_is_full(false) { - const size_t offset = i->first; - m_freelist.erase(i); - clear_mem(m_pool + offset, req); + } - BOTAN_ASSERT((reinterpret_cast<uintptr_t>(m_pool) + offset) % alignment == 0, - "Returning correctly aligned pointer"); + uint8_t* alloc(); - return m_pool + offset; + bool free(void* p); + + bool in_this_bucket(void* p) const + { + return ptr_in_pool(m_range, m_page_size, p, m_item_size); } + bool empty() const + { + return m_bitmap.empty(); + } - if(((best_fit == m_freelist.end()) || (best_fit->second > i->second)) && - (i->second >= (req + padding_for_alignment(i->first, alignment)))) + uint8_t* ptr() const { - best_fit = i; + return m_range; } + + + private: + size_t m_item_size; + size_t m_page_size; + uint8_t* m_range; + BitMap m_bitmap; + bool m_is_full; + }; + + +uint8_t* Bucket::alloc() + { + if(m_is_full) + { + // I know I am full + return nullptr; } - if(best_fit != m_freelist.end()) + size_t offset; + if(!m_bitmap.find_free(&offset)) { - const size_t offset = best_fit->first; + // I just found out I am full + m_is_full = true; + return nullptr; + } - const size_t alignment_padding = padding_for_alignment(offset, alignment); + BOTAN_ASSERT(offset * m_item_size < m_page_size, "Offset is in range"); + return m_range + m_item_size*offset; + } - best_fit->first += req + alignment_padding; - best_fit->second -= req + alignment_padding; +bool Bucket::free(void* p) + { + if(!in_this_bucket(p)) + return false; - // Need to realign, split the block - if(alignment_padding) - { - /* - If we used the entire block except for small piece used for - alignment at the beginning, so just update the entry already - in place (as it is in the correct location), rather than - deleting the empty range and inserting the new one in the - same location. - */ - if(best_fit->second == 0) - { - best_fit->first = offset; - best_fit->second = alignment_padding; - } - else - m_freelist.insert(best_fit, std::make_pair(offset, alignment_padding)); - } + const size_t offset = (reinterpret_cast<uintptr_t>(p) - reinterpret_cast<uintptr_t>(m_range)) / m_item_size; + + m_bitmap.free(offset); + m_is_full = false; + + return true; + } + +Memory_Pool::Memory_Pool(uint8_t* pool, size_t num_pages, size_t page_size) : + m_page_size(page_size) + { + BOTAN_ARG_CHECK(pool != nullptr, "Memory_Pool pool was null"); - clear_mem(m_pool + offset + alignment_padding, req); + // This is basically just to verify that the range is valid + clear_mem(pool, num_pages * page_size); - BOTAN_ASSERT((reinterpret_cast<uintptr_t>(m_pool) + offset + alignment_padding) % alignment == 0, - "Returning correctly aligned pointer"); + m_pool = pool; + m_pool_size = num_pages * page_size; - return m_pool + offset + alignment_padding; + for(size_t i = 0; i != num_pages; ++i) + { + m_free_pages.push_back(pool + page_size*i); } + } - return nullptr; +Memory_Pool::~Memory_Pool() + { } -bool Memory_Pool::deallocate(void* p, size_t n) noexcept +void* Memory_Pool::allocate(size_t n) { - if(!ptr_in_pool(m_pool, m_pool_size, p, n)) - return false; + if(n > m_page_size) + return nullptr; + + const size_t n_bucket = choose_bucket(n); - std::memset(p, 0, n); + if(n_bucket == 0) + return nullptr; lock_guard_type<mutex_type> lock(m_mutex); - const size_t start = static_cast<uint8_t*>(p) - m_pool; + std::deque<Bucket>& buckets = m_buckets_for[n_bucket]; - auto comp = [](std::pair<size_t, size_t> x, std::pair<size_t, size_t> y){ return x.first < y.first; }; + for(auto& bucket : buckets) + { + if(uint8_t* p = bucket.alloc()) + return p; - auto i = std::lower_bound(m_freelist.begin(), m_freelist.end(), - std::make_pair(start, 0), comp); + // If the bucket is full, maybe move it to the end of the list? + // Otoh bucket search should be very fast + } - // try to merge with later block - if(i != m_freelist.end() && start + n == i->first) + if(m_free_pages.size() > 0) { - i->first = start; - i->second += n; - n = 0; + uint8_t* ptr = m_free_pages[0]; + m_free_pages.pop_front(); + buckets.push_front(Bucket(ptr, m_page_size, n_bucket)); + void* p = buckets[0].alloc(); + BOTAN_ASSERT_NOMSG(p != nullptr); + return p; } - // try to merge with previous block - if(i != m_freelist.begin()) + // out of room + return nullptr; + } + +bool Memory_Pool::deallocate(void* p, size_t len) noexcept + { + if(!ptr_in_pool(m_pool, m_pool_size, p, len)) + return false; + + const size_t n_bucket = choose_bucket(len); + + if(n_bucket != 0) { - auto prev = std::prev(i); + /* + Zero also any trailing bytes, which should not have been written to, + but maybe the user was bad and wrote past the end. + */ + std::memset(p, 0, n_bucket); + + lock_guard_type<mutex_type> lock(m_mutex); + + std::deque<Bucket>& buckets = m_buckets_for[n_bucket]; - if(prev->first + prev->second == start) + for(size_t i = 0; i != buckets.size(); ++i) { - if(n) - { - prev->second += n; - n = 0; - } - else + Bucket& bucket = buckets[i]; + if(bucket.free(p) == false) + continue; + + if(bucket.empty()) { - // merge adjoining - prev->second += i->second; - m_freelist.erase(i); + m_free_pages.push_back(bucket.ptr()); + + if(i != buckets.size() - 1) + std::swap(buckets.back(), buckets[i]); + buckets.pop_back(); } + + return true; } } - if(n != 0) // no merge possible? - m_freelist.insert(i, std::make_pair(start, n)); - - return true; + /* + * If we reach this point, something bad has occurred. We know the pointer + * passed in is inside the range of the pool, but no bucket recognized it, + * either because n_bucket was zero or no Bucket::free call returned true. Our + * options (since this function is noexcept) are to either ignore it and + * return false, ignore it and return true, or to crash. + * + * Returning false means the pointer will be considered a standard heap + * pointer and passed on to free, which will almost certainly cause a heap + * corruption. + * + * There is some robustness argument for just memseting the pointer and + * returning true. In this case it will be assumed to be freed. But, since + * this pointer *is* within the range of the pool, but no bucket claimed it, + * that seems to indicate some existing allocator corruption. + * + * Crashing is bad, heap corruption is worse. So we crash, in this case by + * calling BOTAN_ASSERT and letting the exception handling mechanism + * terminate the process. + */ + BOTAN_ASSERT(false, "Pointer from pool, but no bucket recognized it"); + return false; } } diff --git a/src/lib/utils/mem_pool/mem_pool.h b/src/lib/utils/mem_pool/mem_pool.h index c9c836a9c..8f497310c 100644 --- a/src/lib/utils/mem_pool/mem_pool.h +++ b/src/lib/utils/mem_pool/mem_pool.h @@ -9,10 +9,13 @@ #include <botan/types.h> #include <botan/mutex.h> -#include <vector> +#include <deque> +#include <map> namespace Botan { +class Bucket; + class BOTAN_TEST_API Memory_Pool final { public: @@ -20,37 +23,34 @@ class BOTAN_TEST_API Memory_Pool final * Initialize a memory pool. The memory is not owned by *this, * it must be freed by the caller. * @param pool the pool - * @param pool_size size of pool + * @param page_count size of pool in page_size chunks * @param page_size some nominal page size (does not need to match * the system page size) - * @param min_allocation return null for allocs for smaller amounts - * @param max_allocation return null for allocs of larger amounts - * @param align_bit align all returned memory to (1<<align_bit) bytes */ Memory_Pool(uint8_t* pool, - size_t pool_size, - size_t page_size, - size_t min_allocation, - size_t max_allocation, - uint8_t align_bit); + size_t page_count, + size_t page_size); + + ~Memory_Pool(); void* allocate(size_t size); bool deallocate(void* p, size_t size) noexcept; Memory_Pool(const Memory_Pool&) = delete; + Memory_Pool(Memory_Pool&&) = delete; Memory_Pool& operator=(const Memory_Pool&) = delete; + Memory_Pool& operator=(Memory_Pool&&) = delete; private: const size_t m_page_size = 0; - const size_t m_min_alloc = 0; - const size_t m_max_alloc = 0; - const uint8_t m_align_bit = 0; mutex_type m_mutex; - std::vector<std::pair<size_t, size_t>> m_freelist; + std::deque<uint8_t*> m_free_pages; + std::map<size_t, std::deque<Bucket>> m_buckets_for; + uint8_t* m_pool = nullptr; size_t m_pool_size = 0; }; |