diff options
author | Richard Yao <[email protected]> | 2022-10-20 17:14:42 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2022-10-20 14:14:42 -0700 |
commit | a06df8d7c1ec878d3717c16f6457f2b8693893c4 (patch) | |
tree | 918a7abc300cfe0a06fc659fce97728dea602116 /module/os/linux/spl | |
parent | 9dcdee788985b4aa9bbf250af3e018056402ba9f (diff) |
Linux: Upgrade random_get_pseudo_bytes() to xoshiro256++ 1.0
The motivation for upgrading our PRNG is the recent buildbot failures in
the ZTS' tests/functional/fault/decompress_fault test. The probability
of a failure in that test is 0.8^256, which is ~1.6e-25 out of 1, yet we
have observed multiple test failures in it. This suggests a problem with
our random number generation.
The xorshift128+ generator that we were using has been replaced by newer
generators that have "better statistical properties". After doing some
reading, it turns out that these generators have "low linear complexity
of the lowest bits", which could explain the ZTS test failures.
We do two things to try to fix this:
1. We upgrade from xorshift128+ to xoshiro256++ 1.0.
2. We tweak random_get_pseudo_bytes() to copy the higher order
bytes first.
It is hoped that this will fix the test failures in
tests/functional/fault/decompress_fault, although I have not done
simulations. I am skeptical that any simulations I do on a PRNG with a
period of 2^256 - 1 would be meaningful.
Since we have raised the minimum kernel version to 3.10 since this was
first implemented, we have the option of using the Linux kernel's
get_random_int(). However, I am not currently prepared to do performance
tests to ensure that this would not be a regression (for the time
being), so we opt for upgrading our PRNG to a newer one from Sebastiano
Vigna.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Tino Reichardt <[email protected]>
Signed-off-by: Richard Yao <[email protected]>
Closes #13983
Diffstat (limited to 'module/os/linux/spl')
-rw-r--r-- | module/os/linux/spl/spl-generic.c | 86 |
1 files changed, 61 insertions, 25 deletions
diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index e3aa900ba..71eedf635 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -23,6 +23,7 @@ * Solaris Porting Layer (SPL) Generic Implementation. */ +#include <sys/isa_defs.h> #include <sys/sysmacros.h> #include <sys/systeminfo.h> #include <sys/vmsystm.h> @@ -61,10 +62,10 @@ proc_t p0; EXPORT_SYMBOL(p0); /* - * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna + * xoshiro256++ 1.0 PRNG by David Blackman and Sebastiano Vigna * - * "Further scramblings of Marsaglia's xorshift generators" - * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf + * "Scrambled Linear Pseudorandom Number Generators∗" + * https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf * * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose * is to provide bytes containing random numbers. It is mapped to /dev/urandom @@ -76,14 +77,14 @@ EXPORT_SYMBOL(p0); * free of atomic instructions. * * A consequence of using a fast PRNG is that using random_get_pseudo_bytes() - * to generate words larger than 128 bits will paradoxically be limited to - * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` - * 128-bit words and selecting the first will implicitly select the second. If + * to generate words larger than 256 bits will paradoxically be limited to + * `2^256 - 1` possibilities. This is because we have a sequence of `2^256 - 1` + * 256-bit words and selecting the first will implicitly select the second. If * a caller finds this behavior undesirable, random_get_bytes() should be used * instead. * * XXX: Linux interrupt handlers that trigger within the critical section - * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will + * formed by `s[3] = xp[3];` and `xp[0] = s[0];` and call this function will * see the same numbers. Nothing in the code currently calls this in an * interrupt handler, so this is considered to be okay. If that becomes a * problem, we could create a set of per-cpu variables for interrupt handlers @@ -93,49 +94,68 @@ EXPORT_SYMBOL(p0); static void __percpu *spl_pseudo_entropy; /* - * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed - * file: + * rotl()/spl_rand_next()/spl_rand_jump() are copied from the following CC-0 + * licensed file: * - * http://xorshift.di.unimi.it/xorshift128plus.c + * https://prng.di.unimi.it/xoshiro256plusplus.c */ +static inline uint64_t rotl(const uint64_t x, int k) +{ + return ((x << k) | (x >> (64 - k))); +} + static inline uint64_t spl_rand_next(uint64_t *s) { - uint64_t s1 = s[0]; - const uint64_t s0 = s[1]; - s[0] = s0; - s1 ^= s1 << 23; // a - s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c - return (s[1] + s0); + const uint64_t result = rotl(s[0] + s[3], 23) + s[0]; + + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return (result); } static inline void spl_rand_jump(uint64_t *s) { - static const uint64_t JUMP[] = - { 0x8a5cd789635d2dff, 0x121fd2155c472f96 }; + static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, + 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; uint64_t s0 = 0; uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; int i, b; for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++) for (b = 0; b < 64; b++) { if (JUMP[i] & 1ULL << b) { s0 ^= s[0]; s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; } (void) spl_rand_next(s); } s[0] = s0; s[1] = s1; + s[2] = s2; + s[3] = s3; } int random_get_pseudo_bytes(uint8_t *ptr, size_t len) { - uint64_t *xp, s[2]; + uint64_t *xp, s[4]; ASSERT(ptr); @@ -143,6 +163,8 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len) s[0] = xp[0]; s[1] = xp[1]; + s[2] = xp[2]; + s[3] = xp[3]; while (len) { union { @@ -154,12 +176,22 @@ random_get_pseudo_bytes(uint8_t *ptr, size_t len) len -= i; entropy.ui64 = spl_rand_next(s); + /* + * xoshiro256++ has low entropy lower bytes, so we copy the + * higher order bytes first. + */ while (i--) +#ifdef _ZFS_BIG_ENDIAN *ptr++ = entropy.byte[i]; +#else + *ptr++ = entropy.byte[7 - i]; +#endif } xp[0] = s[0]; xp[1] = s[1]; + xp[2] = s[2]; + xp[3] = s[3]; put_cpu_ptr(spl_pseudo_entropy); @@ -765,10 +797,10 @@ spl_kvmem_init(void) static int __init spl_random_init(void) { - uint64_t s[2]; + uint64_t s[4]; int i = 0; - spl_pseudo_entropy = __alloc_percpu(2 * sizeof (uint64_t), + spl_pseudo_entropy = __alloc_percpu(4 * sizeof (uint64_t), sizeof (uint64_t)); if (!spl_pseudo_entropy) @@ -776,17 +808,19 @@ spl_random_init(void) get_random_bytes(s, sizeof (s)); - if (s[0] == 0 && s[1] == 0) { + if (s[0] == 0 && s[1] == 0 && s[2] == 0 && s[3] == 0) { if (jiffies != 0) { s[0] = jiffies; s[1] = ~0 - jiffies; + s[2] = ~jiffies; + s[3] = jiffies - ~0; } else { - (void) memcpy(s, "improbable seed", sizeof (s)); + (void) memcpy(s, "improbable seed", 16); } printk("SPL: get_random_bytes() returned 0 " "when generating random seed. Setting initial seed to " - "0x%016llx%016llx.\n", cpu_to_be64(s[0]), - cpu_to_be64(s[1])); + "0x%016llx%016llx%016llx%016llx.\n", cpu_to_be64(s[0]), + cpu_to_be64(s[1]), cpu_to_be64(s[2]), cpu_to_be64(s[3])); } for_each_possible_cpu(i) { @@ -796,6 +830,8 @@ spl_random_init(void) wordp[0] = s[0]; wordp[1] = s[1]; + wordp[2] = s[2]; + wordp[3] = s[3]; } return (0); |