aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorMatthias Gierlings <[email protected]>2017-11-21 19:34:41 +0100
committerMatthias Gierlings <[email protected]>2017-11-23 00:19:31 +0100
commit0e28426ca870e1e560b0f89baad92071f6813c4e (patch)
tree24c1cef78e5f1235f52d591e03e2b615c736963c /src/lib
parentf0af55db4e0f3b4424a56f36e2d1885445ce9535 (diff)
Adds runtime benchmark to guess phys. core count.
Adds a small runtime benchmark to prevent performance degradation by overprovisioning SMT CPUs with too many threads. This is a temporary workaround until a hardware and OS independent detection of the physical core count through Botan::CPUID is in place.
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/pubkey/xmss/xmss_privatekey.cpp6
-rw-r--r--src/lib/pubkey/xmss/xmss_tools.cpp79
-rw-r--r--src/lib/pubkey/xmss/xmss_tools.h40
3 files changed, 122 insertions, 3 deletions
diff --git a/src/lib/pubkey/xmss/xmss_privatekey.cpp b/src/lib/pubkey/xmss/xmss_privatekey.cpp
index 9ac89a571..8cfab7f75 100644
--- a/src/lib/pubkey/xmss/xmss_privatekey.cpp
+++ b/src/lib/pubkey/xmss/xmss_privatekey.cpp
@@ -100,7 +100,7 @@ XMSS_PrivateKey::tree_hash(size_t start_idx,
{
target_node_height,
static_cast<size_t>(
- std::ceil(std::log2(std::thread::hardware_concurrency())))
+ std::ceil(std::log2(XMSS_Tools::max_threads())))
});
// skip parallelization overhead for leaf nodes.
@@ -171,7 +171,7 @@ XMSS_PrivateKey::tree_hash(size_t start_idx,
node_addresses[i].set_tree_index(
(node_addresses[2 * i + 1].get_tree_index() - 1) >> 1);
using rnd_tree_hash_fn_t =
- void (XMSS_Common_Ops::*)(secure_vector<uint8_t>&,
+ void (XMSS_PrivateKey::*)(secure_vector<uint8_t>&,
const secure_vector<uint8_t>&,
const secure_vector<uint8_t>&,
XMSS_Address& adrs,
@@ -181,7 +181,7 @@ XMSS_PrivateKey::tree_hash(size_t start_idx,
threads.emplace_back(
std::thread(
static_cast<rnd_tree_hash_fn_t>(
- &XMSS_Common_Ops::randomize_tree_hash),
+ &XMSS_PrivateKey::randomize_tree_hash),
this,
std::ref(nodes[i]),
std::ref(ro_nodes[2 * i]),
diff --git a/src/lib/pubkey/xmss/xmss_tools.cpp b/src/lib/pubkey/xmss/xmss_tools.cpp
new file mode 100644
index 000000000..24553b144
--- /dev/null
+++ b/src/lib/pubkey/xmss/xmss_tools.cpp
@@ -0,0 +1,79 @@
+/*
+ * XMSS Tools
+ * (C) 2017 Matthias Gierlings
+ *
+ * Botan is released under the Simplified BSD License (see license.txt)
+ **/
+
+#include <botan/xmss_tools.h>
+
+namespace Botan {
+
+#if defined(BOTAN_TARGET_OS_HAS_THREADS)
+
+size_t XMSS_Tools::max_threads()
+ {
+ static const size_t threads { bench_threads() };
+ return threads;
+ }
+
+size_t XMSS_Tools::bench_threads()
+ {
+ if(std::thread::hardware_concurrency() <= 1)
+ {
+ return 1;
+ }
+ const size_t BENCH_ITERATIONS = 1000;
+ std::vector<std::thread> threads;
+ threads.reserve(std::thread::hardware_concurrency());
+ std::vector<std::chrono::nanoseconds> durations;
+
+ std::vector<size_t> concurrency { std::thread::hardware_concurrency(),
+ std::thread::hardware_concurrency() / 2 };
+
+ for(const auto& cc : concurrency)
+ {
+ AutoSeeded_RNG rng;
+ std::vector<XMSS_Hash> hash(std::thread::hardware_concurrency(),
+ XMSS_Hash("SHA-256"));
+ std::vector<secure_vector<uint8_t>> data(
+ std::thread::hardware_concurrency(),
+ rng.random_vec(hash[0].output_length()));
+ auto start = std::chrono::high_resolution_clock::now();
+ for(size_t i = 0; i < cc; ++i)
+ {
+ threads.emplace_back(
+ std::thread([&i, &cc, &hs = hash[i], &d = data[i]]()
+ {
+ for(size_t n = 0;
+ n < BENCH_ITERATIONS * (std::thread::hardware_concurrency() /
+ cc);
+ n++)
+ {
+ hs.h(d, d, d);
+ }
+ }
+ ));
+ }
+ durations.emplace_back(std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - start));
+ for(auto& t : threads)
+ {
+ t.join();
+ }
+ threads.clear();
+ }
+
+ if(durations[0].count() < durations[1].count())
+ {
+ return concurrency[0];
+ }
+ else
+ {
+ return concurrency[1];
+ }
+ }
+
+#endif
+
+}
+
diff --git a/src/lib/pubkey/xmss/xmss_tools.h b/src/lib/pubkey/xmss/xmss_tools.h
index bbd31fd9f..6e45e882d 100644
--- a/src/lib/pubkey/xmss/xmss_tools.h
+++ b/src/lib/pubkey/xmss/xmss_tools.h
@@ -12,6 +12,12 @@
#include <botan/secmem.h>
#include <iterator>
#include <type_traits>
+#if defined(BOTAN_TARGET_OS_HAS_THREADS)
+ #include <thread>
+ #include <chrono>
+ #include <botan/xmss_hash.h>
+ #include <botan/auto_rng.h>
+#endif
namespace Botan {
@@ -53,8 +59,42 @@ class XMSS_Tools final
void>::type>
static void concat(secure_vector<uint8_t>& target, const T& src, size_t len);
+ /**
+ * @deprecated Determines the maximum number of threads to be used
+ * efficiently, based on runtime timining measurements. Ideally the
+ * result will correspond to the physical number of cores. On systems
+ * supporting simultaneous multi threading (SMT)
+ * std::thread::hardware_concurrency() usually reports a supported
+ * number of threads which is bigger (typically by a factor of 2) than
+ * the number of physical cores available. Using more threads than
+ * physically available cores for computationally intesive tasks
+ * resulted in slowdowns compared to using a number of threads equal to
+ * the number of physical cores on test systems. This function is a
+ * temporary workaround to prevent performance degradation due to
+ * overstressing the CPU with too many threads.
+ *
+ * @return Presumed number of physical cores based on timing measurements.
+ **/
+ static size_t max_threads(); // TODO: Remove max_threads() and use
+ // Botan::CPUID once proper plattform
+ // independent detection of physical cores is
+ // available.
+
private:
XMSS_Tools();
+ /**
+ * @deprecated Measures the time t1 it takes to calculate hashes using
+ * std::thread::hardware_concurrency() many threads and the time t2
+ * calculating the same number of hashes using
+ * std::thread::hardware_concurrency() / 2 threads.
+ *
+ * @return std::thread::hardware_concurrency() if t1 < t2
+ * std::thread::hardware_concurrency() / 2 otherwise.
+ **/
+ static size_t bench_threads(); // TODO: Remove bench_threads() and use
+ // Botan::CPUID once proper plattform
+ // independent detection of physical cores
+ // is //available.
};
template <typename T, typename U>