diff options
author | Matthias Gierlings <[email protected]> | 2017-11-21 19:34:41 +0100 |
---|---|---|
committer | Matthias Gierlings <[email protected]> | 2017-11-23 00:19:31 +0100 |
commit | 0e28426ca870e1e560b0f89baad92071f6813c4e (patch) | |
tree | 24c1cef78e5f1235f52d591e03e2b615c736963c /src/lib | |
parent | f0af55db4e0f3b4424a56f36e2d1885445ce9535 (diff) |
Adds runtime benchmark to guess phys. core count.
Adds a small runtime benchmark to prevent performance degradation by
overprovisioning SMT CPUs with too many threads. This is a temporary
workaround until a hardware and OS independent detection of the
physical core count through Botan::CPUID is in place.
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/pubkey/xmss/xmss_privatekey.cpp | 6 | ||||
-rw-r--r-- | src/lib/pubkey/xmss/xmss_tools.cpp | 79 | ||||
-rw-r--r-- | src/lib/pubkey/xmss/xmss_tools.h | 40 |
3 files changed, 122 insertions, 3 deletions
diff --git a/src/lib/pubkey/xmss/xmss_privatekey.cpp b/src/lib/pubkey/xmss/xmss_privatekey.cpp index 9ac89a571..8cfab7f75 100644 --- a/src/lib/pubkey/xmss/xmss_privatekey.cpp +++ b/src/lib/pubkey/xmss/xmss_privatekey.cpp @@ -100,7 +100,7 @@ XMSS_PrivateKey::tree_hash(size_t start_idx, { target_node_height, static_cast<size_t>( - std::ceil(std::log2(std::thread::hardware_concurrency()))) + std::ceil(std::log2(XMSS_Tools::max_threads()))) }); // skip parallelization overhead for leaf nodes. @@ -171,7 +171,7 @@ XMSS_PrivateKey::tree_hash(size_t start_idx, node_addresses[i].set_tree_index( (node_addresses[2 * i + 1].get_tree_index() - 1) >> 1); using rnd_tree_hash_fn_t = - void (XMSS_Common_Ops::*)(secure_vector<uint8_t>&, + void (XMSS_PrivateKey::*)(secure_vector<uint8_t>&, const secure_vector<uint8_t>&, const secure_vector<uint8_t>&, XMSS_Address& adrs, @@ -181,7 +181,7 @@ XMSS_PrivateKey::tree_hash(size_t start_idx, threads.emplace_back( std::thread( static_cast<rnd_tree_hash_fn_t>( - &XMSS_Common_Ops::randomize_tree_hash), + &XMSS_PrivateKey::randomize_tree_hash), this, std::ref(nodes[i]), std::ref(ro_nodes[2 * i]), diff --git a/src/lib/pubkey/xmss/xmss_tools.cpp b/src/lib/pubkey/xmss/xmss_tools.cpp new file mode 100644 index 000000000..24553b144 --- /dev/null +++ b/src/lib/pubkey/xmss/xmss_tools.cpp @@ -0,0 +1,79 @@ +/* + * XMSS Tools + * (C) 2017 Matthias Gierlings + * + * Botan is released under the Simplified BSD License (see license.txt) + **/ + +#include <botan/xmss_tools.h> + +namespace Botan { + +#if defined(BOTAN_TARGET_OS_HAS_THREADS) + +size_t XMSS_Tools::max_threads() + { + static const size_t threads { bench_threads() }; + return threads; + } + +size_t XMSS_Tools::bench_threads() + { + if(std::thread::hardware_concurrency() <= 1) + { + return 1; + } + const size_t BENCH_ITERATIONS = 1000; + std::vector<std::thread> threads; + threads.reserve(std::thread::hardware_concurrency()); + std::vector<std::chrono::nanoseconds> durations; + + std::vector<size_t> concurrency { std::thread::hardware_concurrency(), + std::thread::hardware_concurrency() / 2 }; + + for(const auto& cc : concurrency) + { + AutoSeeded_RNG rng; + std::vector<XMSS_Hash> hash(std::thread::hardware_concurrency(), + XMSS_Hash("SHA-256")); + std::vector<secure_vector<uint8_t>> data( + std::thread::hardware_concurrency(), + rng.random_vec(hash[0].output_length())); + auto start = std::chrono::high_resolution_clock::now(); + for(size_t i = 0; i < cc; ++i) + { + threads.emplace_back( + std::thread([&i, &cc, &hs = hash[i], &d = data[i]]() + { + for(size_t n = 0; + n < BENCH_ITERATIONS * (std::thread::hardware_concurrency() / + cc); + n++) + { + hs.h(d, d, d); + } + } + )); + } + durations.emplace_back(std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start)); + for(auto& t : threads) + { + t.join(); + } + threads.clear(); + } + + if(durations[0].count() < durations[1].count()) + { + return concurrency[0]; + } + else + { + return concurrency[1]; + } + } + +#endif + +} + diff --git a/src/lib/pubkey/xmss/xmss_tools.h b/src/lib/pubkey/xmss/xmss_tools.h index bbd31fd9f..6e45e882d 100644 --- a/src/lib/pubkey/xmss/xmss_tools.h +++ b/src/lib/pubkey/xmss/xmss_tools.h @@ -12,6 +12,12 @@ #include <botan/secmem.h> #include <iterator> #include <type_traits> +#if defined(BOTAN_TARGET_OS_HAS_THREADS) + #include <thread> + #include <chrono> + #include <botan/xmss_hash.h> + #include <botan/auto_rng.h> +#endif namespace Botan { @@ -53,8 +59,42 @@ class XMSS_Tools final void>::type> static void concat(secure_vector<uint8_t>& target, const T& src, size_t len); + /** + * @deprecated Determines the maximum number of threads to be used + * efficiently, based on runtime timining measurements. Ideally the + * result will correspond to the physical number of cores. On systems + * supporting simultaneous multi threading (SMT) + * std::thread::hardware_concurrency() usually reports a supported + * number of threads which is bigger (typically by a factor of 2) than + * the number of physical cores available. Using more threads than + * physically available cores for computationally intesive tasks + * resulted in slowdowns compared to using a number of threads equal to + * the number of physical cores on test systems. This function is a + * temporary workaround to prevent performance degradation due to + * overstressing the CPU with too many threads. + * + * @return Presumed number of physical cores based on timing measurements. + **/ + static size_t max_threads(); // TODO: Remove max_threads() and use + // Botan::CPUID once proper plattform + // independent detection of physical cores is + // available. + private: XMSS_Tools(); + /** + * @deprecated Measures the time t1 it takes to calculate hashes using + * std::thread::hardware_concurrency() many threads and the time t2 + * calculating the same number of hashes using + * std::thread::hardware_concurrency() / 2 threads. + * + * @return std::thread::hardware_concurrency() if t1 < t2 + * std::thread::hardware_concurrency() / 2 otherwise. + **/ + static size_t bench_threads(); // TODO: Remove bench_threads() and use + // Botan::CPUID once proper plattform + // independent detection of physical cores + // is //available. }; template <typename T, typename U> |