aboutsummaryrefslogtreecommitdiffstats
path: root/module/os/linux/spl
diff options
context:
space:
mode:
authorRichard Yao <[email protected]>2022-10-20 17:14:42 -0400
committerGitHub <[email protected]>2022-10-20 14:14:42 -0700
commita06df8d7c1ec878d3717c16f6457f2b8693893c4 (patch)
tree918a7abc300cfe0a06fc659fce97728dea602116 /module/os/linux/spl
parent9dcdee788985b4aa9bbf250af3e018056402ba9f (diff)
Linux: Upgrade random_get_pseudo_bytes() to xoshiro256++ 1.0
The motivation for upgrading our PRNG is the recent buildbot failures in the ZTS' tests/functional/fault/decompress_fault test. The probability of a failure in that test is 0.8^256, which is ~1.6e-25 out of 1, yet we have observed multiple test failures in it. This suggests a problem with our random number generation. The xorshift128+ generator that we were using has been replaced by newer generators that have "better statistical properties". After doing some reading, it turns out that these generators have "low linear complexity of the lowest bits", which could explain the ZTS test failures. We do two things to try to fix this: 1. We upgrade from xorshift128+ to xoshiro256++ 1.0. 2. We tweak random_get_pseudo_bytes() to copy the higher order bytes first. It is hoped that this will fix the test failures in tests/functional/fault/decompress_fault, although I have not done simulations. I am skeptical that any simulations I do on a PRNG with a period of 2^256 - 1 would be meaningful. Since we have raised the minimum kernel version to 3.10 since this was first implemented, we have the option of using the Linux kernel's get_random_int(). However, I am not currently prepared to do performance tests to ensure that this would not be a regression (for the time being), so we opt for upgrading our PRNG to a newer one from Sebastiano Vigna. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tino Reichardt <[email protected]> Signed-off-by: Richard Yao <[email protected]> Closes #13983
Diffstat (limited to 'module/os/linux/spl')
-rw-r--r--module/os/linux/spl/spl-generic.c86
1 files changed, 61 insertions, 25 deletions
diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
index e3aa900ba..71eedf635 100644
--- a/module/os/linux/spl/spl-generic.c
+++ b/module/os/linux/spl/spl-generic.c
@@ -23,6 +23,7 @@
* Solaris Porting Layer (SPL) Generic Implementation.
*/
+#include <sys/isa_defs.h>
#include <sys/sysmacros.h>
#include <sys/systeminfo.h>
#include <sys/vmsystm.h>
@@ -61,10 +62,10 @@ proc_t p0;
EXPORT_SYMBOL(p0);
/*
- * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
+ * xoshiro256++ 1.0 PRNG by David Blackman and Sebastiano Vigna
*
- * "Further scramblings of Marsaglia's xorshift generators"
- * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ * "Scrambled Linear Pseudorandom Number Generators∗"
+ * https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf
*
* random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
* is to provide bytes containing random numbers. It is mapped to /dev/urandom
@@ -76,14 +77,14 @@ EXPORT_SYMBOL(p0);
* free of atomic instructions.
*
* A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
- * to generate words larger than 128 bits will paradoxically be limited to
- * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
- * 128-bit words and selecting the first will implicitly select the second. If
+ * to generate words larger than 256 bits will paradoxically be limited to
+ * `2^256 - 1` possibilities. This is because we have a sequence of `2^256 - 1`
+ * 256-bit words and selecting the first will implicitly select the second. If
* a caller finds this behavior undesirable, random_get_bytes() should be used
* instead.
*
* XXX: Linux interrupt handlers that trigger within the critical section
- * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
+ * formed by `s[3] = xp[3];` and `xp[0] = s[0];` and call this function will
* see the same numbers. Nothing in the code currently calls this in an
* interrupt handler, so this is considered to be okay. If that becomes a
* problem, we could create a set of per-cpu variables for interrupt handlers
@@ -93,49 +94,68 @@ EXPORT_SYMBOL(p0);
static void __percpu *spl_pseudo_entropy;
/*
- * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
- * file:
+ * rotl()/spl_rand_next()/spl_rand_jump() are copied from the following CC-0
+ * licensed file:
*
- * http://xorshift.di.unimi.it/xorshift128plus.c
+ * https://prng.di.unimi.it/xoshiro256plusplus.c
*/
+static inline uint64_t rotl(const uint64_t x, int k)
+{
+ return ((x << k) | (x >> (64 - k)));
+}
+
static inline uint64_t
spl_rand_next(uint64_t *s)
{
- uint64_t s1 = s[0];
- const uint64_t s0 = s[1];
- s[0] = s0;
- s1 ^= s1 << 23; // a
- s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
- return (s[1] + s0);
+ const uint64_t result = rotl(s[0] + s[3], 23) + s[0];
+
+ const uint64_t t = s[1] << 17;
+
+ s[2] ^= s[0];
+ s[3] ^= s[1];
+ s[1] ^= s[2];
+ s[0] ^= s[3];
+
+ s[2] ^= t;
+
+ s[3] = rotl(s[3], 45);
+
+ return (result);
}
static inline void
spl_rand_jump(uint64_t *s)
{
- static const uint64_t JUMP[] =
- { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
+ static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba,
+ 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
uint64_t s0 = 0;
uint64_t s1 = 0;
+ uint64_t s2 = 0;
+ uint64_t s3 = 0;
int i, b;
for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++)
for (b = 0; b < 64; b++) {
if (JUMP[i] & 1ULL << b) {
s0 ^= s[0];
s1 ^= s[1];
+ s2 ^= s[2];
+ s3 ^= s[3];
}
(void) spl_rand_next(s);
}
s[0] = s0;
s[1] = s1;
+ s[2] = s2;
+ s[3] = s3;
}
int
random_get_pseudo_bytes(uint8_t *ptr, size_t len)
{
- uint64_t *xp, s[2];
+ uint64_t *xp, s[4];
ASSERT(ptr);
@@ -143,6 +163,8 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
s[0] = xp[0];
s[1] = xp[1];
+ s[2] = xp[2];
+ s[3] = xp[3];
while (len) {
union {
@@ -154,12 +176,22 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len)
len -= i;
entropy.ui64 = spl_rand_next(s);
+ /*
+ * xoshiro256++ has low entropy lower bytes, so we copy the
+ * higher order bytes first.
+ */
while (i--)
+#ifdef _ZFS_BIG_ENDIAN
*ptr++ = entropy.byte[i];
+#else
+ *ptr++ = entropy.byte[7 - i];
+#endif
}
xp[0] = s[0];
xp[1] = s[1];
+ xp[2] = s[2];
+ xp[3] = s[3];
put_cpu_ptr(spl_pseudo_entropy);
@@ -765,10 +797,10 @@ spl_kvmem_init(void)
static int __init
spl_random_init(void)
{
- uint64_t s[2];
+ uint64_t s[4];
int i = 0;
- spl_pseudo_entropy = __alloc_percpu(2 * sizeof (uint64_t),
+ spl_pseudo_entropy = __alloc_percpu(4 * sizeof (uint64_t),
sizeof (uint64_t));
if (!spl_pseudo_entropy)
@@ -776,17 +808,19 @@ spl_random_init(void)
get_random_bytes(s, sizeof (s));
- if (s[0] == 0 && s[1] == 0) {
+ if (s[0] == 0 && s[1] == 0 && s[2] == 0 && s[3] == 0) {
if (jiffies != 0) {
s[0] = jiffies;
s[1] = ~0 - jiffies;
+ s[2] = ~jiffies;
+ s[3] = jiffies - ~0;
} else {
- (void) memcpy(s, "improbable seed", sizeof (s));
+ (void) memcpy(s, "improbable seed", 16);
}
printk("SPL: get_random_bytes() returned 0 "
"when generating random seed. Setting initial seed to "
- "0x%016llx%016llx.\n", cpu_to_be64(s[0]),
- cpu_to_be64(s[1]));
+ "0x%016llx%016llx%016llx%016llx.\n", cpu_to_be64(s[0]),
+ cpu_to_be64(s[1]), cpu_to_be64(s[2]), cpu_to_be64(s[3]));
}
for_each_possible_cpu(i) {
@@ -796,6 +830,8 @@ spl_random_init(void)
wordp[0] = s[0];
wordp[1] = s[1];
+ wordp[2] = s[2];
+ wordp[3] = s[3];
}
return (0);