aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorTino Reichardt <[email protected]>2023-02-27 16:14:37 +0100
committerBrian Behlendorf <[email protected]>2023-03-02 13:52:27 -0800
commitf9f9bef22f96eb3092d7466112f62f7ad4567d71 (patch)
tree0df1e36ff9ee5e3f14be1ba605b129f06a442f03 /module
parent4c5fec01a48acc184614ab8735e6954961990235 (diff)
Update BLAKE3 for using the new impl handling
This commit changes the BLAKE3 implementation handling and also the calls to it from the ztest command. Tested-by: Rich Ercolani <[email protected]> Tested-by: Sebastian Gottschall <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tino Reichardt <[email protected]> Closes #13741
Diffstat (limited to 'module')
-rw-r--r--module/icp/algs/blake3/blake3.c2
-rw-r--r--module/icp/algs/blake3/blake3_generic.c5
-rw-r--r--module/icp/algs/blake3/blake3_impl.c423
-rw-r--r--module/icp/algs/blake3/blake3_impl.h31
-rw-r--r--module/icp/algs/blake3/blake3_x86-64.c248
5 files changed, 241 insertions, 468 deletions
diff --git a/module/icp/algs/blake3/blake3.c b/module/icp/algs/blake3/blake3.c
index 8e441f454..4f93e4ff2 100644
--- a/module/icp/algs/blake3/blake3.c
+++ b/module/icp/algs/blake3/blake3.c
@@ -432,7 +432,7 @@ static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
memcpy(ctx->key, key, BLAKE3_KEY_LEN);
chunk_state_init(&ctx->chunk, key, flags);
ctx->cv_stack_len = 0;
- ctx->ops = blake3_impl_get_ops();
+ ctx->ops = blake3_get_ops();
}
/*
diff --git a/module/icp/algs/blake3/blake3_generic.c b/module/icp/algs/blake3/blake3_generic.c
index 94a1f1082..ca7197a26 100644
--- a/module/icp/algs/blake3/blake3_generic.c
+++ b/module/icp/algs/blake3/blake3_generic.c
@@ -187,7 +187,8 @@ static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
}
}
-static inline boolean_t blake3_is_generic_supported(void)
+/* the generic implementation is always okay */
+static boolean_t blake3_is_supported(void)
{
return (B_TRUE);
}
@@ -196,7 +197,7 @@ const blake3_ops_t blake3_generic_impl = {
.compress_in_place = blake3_compress_in_place_generic,
.compress_xof = blake3_compress_xof_generic,
.hash_many = blake3_hash_many_generic,
- .is_supported = blake3_is_generic_supported,
+ .is_supported = blake3_is_supported,
.degree = 4,
.name = "generic"
};
diff --git a/module/icp/algs/blake3/blake3_impl.c b/module/icp/algs/blake3/blake3_impl.c
index 7bc4db2c9..f68a5edfe 100644
--- a/module/icp/algs/blake3/blake3_impl.c
+++ b/module/icp/algs/blake3/blake3_impl.c
@@ -24,222 +24,266 @@
*/
#include <sys/zfs_context.h>
-#include <sys/zio_checksum.h>
+#include <sys/zfs_impl.h>
+#include <sys/blake3.h>
+#include <sys/simd.h>
#include "blake3_impl.h"
-static const blake3_ops_t *const blake3_impls[] = {
- &blake3_generic_impl,
#if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
- &blake3_sse2_impl,
-#endif
-#if defined(__aarch64__) || \
- (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
- &blake3_sse41_impl,
-#endif
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
- &blake3_avx2_impl,
-#endif
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
- &blake3_avx512_impl,
-#endif
-};
-
-/* Select BLAKE3 implementation */
-#define IMPL_FASTEST (UINT32_MAX)
-#define IMPL_CYCLE (UINT32_MAX - 1)
-
-#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
-
-/* Indicate that benchmark has been done */
-static boolean_t blake3_initialized = B_FALSE;
-
-/* Implementation that contains the fastest methods */
-static blake3_ops_t blake3_fastest_impl = {
- .name = "fastest"
-};
-/* Hold all supported implementations */
-static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
-static uint32_t blake3_supp_impls_cnt = 0;
+extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
-/* Currently selected implementation */
-static uint32_t blake3_impl_chosen = IMPL_FASTEST;
+static void blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
-static struct blake3_impl_selector {
- const char *name;
- uint32_t sel;
-} blake3_impl_selectors[] = {
- { "cycle", IMPL_CYCLE },
- { "fastest", IMPL_FASTEST }
-};
+static void blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
-/* check the supported implementations */
-static void blake3_impl_init(void)
+static boolean_t blake3_is_sse2_supported(void)
{
- int i, c;
-
- /* init only once */
- if (likely(blake3_initialized))
- return;
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse2_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
+}
- /* move supported implementations into blake3_supp_impls */
- for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
- const blake3_ops_t *impl = blake3_impls[i];
+const blake3_ops_t blake3_sse2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse2,
+ .compress_xof = blake3_compress_xof_sse2,
+ .hash_many = blake3_hash_many_sse2,
+ .is_supported = blake3_is_sse2_supported,
+ .degree = 4,
+ .name = "sse2"
+};
+#endif
- if (impl->is_supported && impl->is_supported())
- blake3_supp_impls[c++] = impl;
- }
- blake3_supp_impls_cnt = c;
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
- /* first init generic impl, may be changed via set_fastest() */
- memcpy(&blake3_fastest_impl, blake3_impls[0],
- sizeof (blake3_fastest_impl));
- blake3_initialized = B_TRUE;
+extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
}
-/* get number of supported implementations */
-uint32_t
-blake3_impl_getcnt(void)
-{
- blake3_impl_init();
- return (blake3_supp_impls_cnt);
+static void blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
}
-/* get id of selected implementation */
-uint32_t
-blake3_impl_getid(void)
-{
- return (IMPL_READ(blake3_impl_chosen));
+static void blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
}
-/* get name of selected implementation */
-const char *
-blake3_impl_getname(void)
+static boolean_t blake3_is_sse41_supported(void)
{
- uint32_t impl = IMPL_READ(blake3_impl_chosen);
-
- blake3_impl_init();
- switch (impl) {
- case IMPL_FASTEST:
- return ("fastest");
- case IMPL_CYCLE:
- return ("cycle");
- default:
- return (blake3_supp_impls[impl]->name);
- }
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse4_1_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
}
-/* setup id as fastest implementation */
-void
-blake3_impl_set_fastest(uint32_t id)
-{
- /* setup fastest impl */
- memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
- sizeof (blake3_fastest_impl));
+const blake3_ops_t blake3_sse41_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_sse41,
+ .is_supported = blake3_is_sse41_supported,
+ .degree = 4,
+ .name = "sse41"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
}
-/* set implementation by id */
-void
-blake3_impl_setid(uint32_t id)
+static boolean_t blake3_is_avx2_supported(void)
{
- blake3_impl_init();
- switch (id) {
- case IMPL_FASTEST:
- atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
- break;
- case IMPL_CYCLE:
- atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
- break;
- default:
- ASSERT3U(id, <, blake3_supp_impls_cnt);
- atomic_swap_32(&blake3_impl_chosen, id);
- break;
- }
+ return (kfpu_allowed() && zfs_sse4_1_available() &&
+ zfs_avx2_available());
}
-/* set implementation by name */
-int
-blake3_impl_setname(const char *val)
-{
- uint32_t impl = IMPL_READ(blake3_impl_chosen);
- size_t val_len;
- int i, err = -EINVAL;
-
- blake3_impl_init();
- val_len = strlen(val);
- while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
- val_len--;
-
- /* check mandatory implementations */
- for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
- const char *name = blake3_impl_selectors[i].name;
-
- if (val_len == strlen(name) &&
- strncmp(val, name, val_len) == 0) {
- impl = blake3_impl_selectors[i].sel;
- err = 0;
- break;
- }
- }
+const blake3_ops_t
+blake3_avx2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_avx2,
+ .is_supported = blake3_is_avx2_supported,
+ .degree = 8,
+ .name = "avx2"
+};
+#endif
- if (err != 0 && blake3_initialized) {
- /* check all supported implementations */
- for (i = 0; i < blake3_supp_impls_cnt; i++) {
- const char *name = blake3_supp_impls[i]->name;
-
- if (val_len == strlen(name) &&
- strncmp(val, name, val_len) == 0) {
- impl = i;
- err = 0;
- break;
- }
- }
- }
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
- if (err == 0) {
- atomic_swap_32(&blake3_impl_chosen, impl);
- }
+static void blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
- return (err);
+static void blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
}
-const blake3_ops_t *
-blake3_impl_get_ops(void)
+static boolean_t blake3_is_avx512_supported(void)
{
- const blake3_ops_t *ops = NULL;
- uint32_t impl = IMPL_READ(blake3_impl_chosen);
-
- blake3_impl_init();
- switch (impl) {
- case IMPL_FASTEST:
- ASSERT(blake3_initialized);
- ops = &blake3_fastest_impl;
- break;
- case IMPL_CYCLE:
- /* Cycle through supported implementations */
- ASSERT(blake3_initialized);
- ASSERT3U(blake3_supp_impls_cnt, >, 0);
- static uint32_t cycle_count = 0;
- uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
- ops = blake3_supp_impls[idx];
- break;
- default:
- ASSERT3U(blake3_supp_impls_cnt, >, 0);
- ASSERT3U(impl, <, blake3_supp_impls_cnt);
- ops = blake3_supp_impls[impl];
- break;
- }
-
- ASSERT3P(ops, !=, NULL);
- return (ops);
+ return (kfpu_allowed() && zfs_avx512f_available() &&
+ zfs_avx512vl_available());
}
-#if defined(_KERNEL)
+const blake3_ops_t blake3_avx512_impl = {
+ .compress_in_place = blake3_compress_in_place_avx512,
+ .compress_xof = blake3_compress_xof_avx512,
+ .hash_many = blake3_hash_many_avx512,
+ .is_supported = blake3_is_avx512_supported,
+ .degree = 16,
+ .name = "avx512"
+};
+#endif
+
+extern const blake3_ops_t blake3_generic_impl;
+
+static const blake3_ops_t *const blake3_impls[] = {
+ &blake3_generic_impl,
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse2_impl,
+#endif
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse41_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+ &blake3_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+ &blake3_avx512_impl,
+#endif
+};
+/* use the generic implementation functions */
+#define IMPL_NAME "blake3"
+#define IMPL_OPS_T blake3_ops_t
+#define IMPL_ARRAY blake3_impls
+#define IMPL_GET_OPS blake3_get_ops
+#define ZFS_IMPL_OPS zfs_blake3_ops
+#include <generic_impl.c>
+
+#ifdef _KERNEL
void **blake3_per_cpu_ctx;
void
@@ -253,9 +297,6 @@ blake3_per_cpu_ctx_init(void)
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
KM_SLEEP);
}
-
- /* init once in kernel mode */
- blake3_impl_init();
}
void
@@ -276,7 +317,7 @@ blake3_per_cpu_ctx_fini(void)
static int
blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
{
- const uint32_t impl = IMPL_READ(blake3_impl_chosen);
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
char *fmt;
int cnt = 0;
@@ -289,10 +330,11 @@ blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
/* list all supported implementations */
- for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
+ generic_impl_init();
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
fmt = IMPL_FMT(impl, i);
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
- blake3_supp_impls[i]->name);
+ blake3_impls[i]->name);
}
return (cnt);
@@ -302,7 +344,7 @@ static int
blake3_param_set(const char *val, zfs_kernel_param_t *unused)
{
(void) unused;
- return (blake3_impl_setname(val));
+ return (generic_impl_setname(val));
}
#elif defined(__FreeBSD__)
@@ -314,8 +356,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
{
int err;
+ generic_impl_init();
if (req->newptr == NULL) {
- const uint32_t impl = IMPL_READ(blake3_impl_chosen);
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
const int init_buflen = 64;
const char *fmt;
struct sbuf *s;
@@ -331,9 +374,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
(void) sbuf_printf(s, fmt, "fastest");
/* list all supported implementations */
- for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
fmt = IMPL_FMT(impl, i);
- (void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
+ (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
}
err = sbuf_finish(s);
@@ -349,7 +392,7 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
return (err);
}
- return (-blake3_impl_setname(buf));
+ return (-generic_impl_setname(buf));
}
#endif
diff --git a/module/icp/algs/blake3/blake3_impl.h b/module/icp/algs/blake3/blake3_impl.h
index ecb51e3a3..90d508fac 100644
--- a/module/icp/algs/blake3/blake3_impl.h
+++ b/module/icp/algs/blake3/blake3_impl.h
@@ -25,14 +25,13 @@
* Copyright (c) 2021-2022 Tino Reichardt <[email protected]>
*/
-#ifndef BLAKE3_IMPL_H
+#ifndef BLAKE3_IMPL_H
#define BLAKE3_IMPL_H
#ifdef __cplusplus
extern "C" {
#endif
-#include <sys/types.h>
#include <sys/blake3.h>
#include <sys/simd.h>
#include <sys/asm_linkage.h>
@@ -56,7 +55,7 @@ typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
typedef boolean_t (*blake3_is_supported_f)(void);
-typedef struct blake3_impl_ops {
+typedef struct {
blake3_compress_in_place_f compress_in_place;
blake3_compress_xof_f compress_xof;
blake3_hash_many_f hash_many;
@@ -65,30 +64,8 @@ typedef struct blake3_impl_ops {
const char *name;
} blake3_ops_t;
-/* Return selected BLAKE3 implementation ops */
-extern const blake3_ops_t *blake3_impl_get_ops(void);
-
-extern const blake3_ops_t blake3_generic_impl;
-
-#if defined(__aarch64__) || \
- (defined(__x86_64) && defined(HAVE_SSE2)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-extern const blake3_ops_t blake3_sse2_impl;
-#endif
-
-#if defined(__aarch64__) || \
- (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-extern const blake3_ops_t blake3_sse41_impl;
-#endif
-
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
-extern const blake3_ops_t blake3_avx2_impl;
-#endif
-
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
-extern const blake3_ops_t blake3_avx512_impl;
-#endif
+/* return selected BLAKE3 implementation ops */
+extern const blake3_ops_t *blake3_get_ops(void);
#if defined(__x86_64)
#define MAX_SIMD_DEGREE 16
diff --git a/module/icp/algs/blake3/blake3_x86-64.c b/module/icp/algs/blake3/blake3_x86-64.c
deleted file mode 100644
index 04a8b3333..000000000
--- a/module/icp/algs/blake3/blake3_x86-64.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2021-2022 Tino Reichardt <[email protected]>
- */
-
-#include "blake3_impl.h"
-
-#if defined(__aarch64__) || \
- (defined(__x86_64) && defined(HAVE_SSE2)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-
-extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags);
-
-extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags, uint8_t out[64]);
-
-extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_compress_in_place_sse2(uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags) {
- kfpu_begin();
- zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
- flags);
- kfpu_end();
-}
-
-static void blake3_compress_xof_sse2(const uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags, uint8_t out[64]) {
- kfpu_begin();
- zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
- out);
- kfpu_end();
-}
-
-static void blake3_hash_many_sse2(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
- kfpu_begin();
- zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
- increment_counter, flags, flags_start, flags_end, out);
- kfpu_end();
-}
-
-static boolean_t blake3_is_sse2_supported(void)
-{
-#if defined(__x86_64)
- return (kfpu_allowed() && zfs_sse2_available());
-#elif defined(__PPC64__) && defined(__linux__)
- return (kfpu_allowed() && zfs_vsx_available());
-#else
- return (kfpu_allowed());
-#endif
-}
-
-const blake3_ops_t blake3_sse2_impl = {
- .compress_in_place = blake3_compress_in_place_sse2,
- .compress_xof = blake3_compress_xof_sse2,
- .hash_many = blake3_hash_many_sse2,
- .is_supported = blake3_is_sse2_supported,
- .degree = 4,
- .name = "sse2"
-};
-#endif
-
-#if defined(__aarch64__) || \
- (defined(__x86_64) && defined(HAVE_SSE2)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-
-extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags);
-
-extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags, uint8_t out[64]);
-
-extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_compress_in_place_sse41(uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags) {
- kfpu_begin();
- zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
- flags);
- kfpu_end();
-}
-
-static void blake3_compress_xof_sse41(const uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags, uint8_t out[64]) {
- kfpu_begin();
- zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
- out);
- kfpu_end();
-}
-
-static void blake3_hash_many_sse41(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
- kfpu_begin();
- zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
- increment_counter, flags, flags_start, flags_end, out);
- kfpu_end();
-}
-
-static boolean_t blake3_is_sse41_supported(void)
-{
-#if defined(__x86_64)
- return (kfpu_allowed() && zfs_sse4_1_available());
-#elif defined(__PPC64__) && defined(__linux__)
- return (kfpu_allowed() && zfs_vsx_available());
-#else
- return (kfpu_allowed());
-#endif
-}
-
-const blake3_ops_t blake3_sse41_impl = {
- .compress_in_place = blake3_compress_in_place_sse41,
- .compress_xof = blake3_compress_xof_sse41,
- .hash_many = blake3_hash_many_sse41,
- .is_supported = blake3_is_sse41_supported,
- .degree = 4,
- .name = "sse41"
-};
-#endif
-
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
-extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_hash_many_avx2(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
- kfpu_begin();
- zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
- increment_counter, flags, flags_start, flags_end, out);
- kfpu_end();
-}
-
-static boolean_t blake3_is_avx2_supported(void)
-{
- return (kfpu_allowed() && zfs_sse4_1_available() &&
- zfs_avx2_available());
-}
-
-const blake3_ops_t blake3_avx2_impl = {
- .compress_in_place = blake3_compress_in_place_sse41,
- .compress_xof = blake3_compress_xof_sse41,
- .hash_many = blake3_hash_many_avx2,
- .is_supported = blake3_is_avx2_supported,
- .degree = 8,
- .name = "avx2"
-};
-#endif
-
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
-extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags);
-
-extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags, uint8_t out[64]);
-
-extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_compress_in_place_avx512(uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags) {
- kfpu_begin();
- zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
- flags);
- kfpu_end();
-}
-
-static void blake3_compress_xof_avx512(const uint32_t cv[8],
- const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
- uint64_t counter, uint8_t flags, uint8_t out[64]) {
- kfpu_begin();
- zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
- out);
- kfpu_end();
-}
-
-static void blake3_hash_many_avx512(const uint8_t * const *inputs,
- size_t num_inputs, size_t blocks, const uint32_t key[8],
- uint64_t counter, boolean_t increment_counter, uint8_t flags,
- uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
- kfpu_begin();
- zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
- increment_counter, flags, flags_start, flags_end, out);
- kfpu_end();
-}
-
-static boolean_t blake3_is_avx512_supported(void)
-{
- return (kfpu_allowed() && zfs_avx512f_available() &&
- zfs_avx512vl_available());
-}
-
-const blake3_ops_t blake3_avx512_impl = {
- .compress_in_place = blake3_compress_in_place_avx512,
- .compress_xof = blake3_compress_xof_avx512,
- .hash_many = blake3_hash_many_avx512,
- .is_supported = blake3_is_avx512_supported,
- .degree = 16,
- .name = "avx512"
-};
-#endif