diff options
author | Tino Reichardt <[email protected]> | 2023-02-27 16:14:37 +0100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2023-03-02 13:52:27 -0800 |
commit | f9f9bef22f96eb3092d7466112f62f7ad4567d71 (patch) | |
tree | 0df1e36ff9ee5e3f14be1ba605b129f06a442f03 /module | |
parent | 4c5fec01a48acc184614ab8735e6954961990235 (diff) |
Update BLAKE3 for using the new impl handling
This commit changes the BLAKE3 implementation handling and
also the calls to it from the ztest command.
Tested-by: Rich Ercolani <[email protected]>
Tested-by: Sebastian Gottschall <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Tino Reichardt <[email protected]>
Closes #13741
Diffstat (limited to 'module')
-rw-r--r-- | module/icp/algs/blake3/blake3.c | 2 | ||||
-rw-r--r-- | module/icp/algs/blake3/blake3_generic.c | 5 | ||||
-rw-r--r-- | module/icp/algs/blake3/blake3_impl.c | 423 | ||||
-rw-r--r-- | module/icp/algs/blake3/blake3_impl.h | 31 | ||||
-rw-r--r-- | module/icp/algs/blake3/blake3_x86-64.c | 248 |
5 files changed, 241 insertions, 468 deletions
diff --git a/module/icp/algs/blake3/blake3.c b/module/icp/algs/blake3/blake3.c index 8e441f454..4f93e4ff2 100644 --- a/module/icp/algs/blake3/blake3.c +++ b/module/icp/algs/blake3/blake3.c @@ -432,7 +432,7 @@ static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8], memcpy(ctx->key, key, BLAKE3_KEY_LEN); chunk_state_init(&ctx->chunk, key, flags); ctx->cv_stack_len = 0; - ctx->ops = blake3_impl_get_ops(); + ctx->ops = blake3_get_ops(); } /* diff --git a/module/icp/algs/blake3/blake3_generic.c b/module/icp/algs/blake3/blake3_generic.c index 94a1f1082..ca7197a26 100644 --- a/module/icp/algs/blake3/blake3_generic.c +++ b/module/icp/algs/blake3/blake3_generic.c @@ -187,7 +187,8 @@ static inline void blake3_hash_many_generic(const uint8_t * const *inputs, } } -static inline boolean_t blake3_is_generic_supported(void) +/* the generic implementation is always okay */ +static boolean_t blake3_is_supported(void) { return (B_TRUE); } @@ -196,7 +197,7 @@ const blake3_ops_t blake3_generic_impl = { .compress_in_place = blake3_compress_in_place_generic, .compress_xof = blake3_compress_xof_generic, .hash_many = blake3_hash_many_generic, - .is_supported = blake3_is_generic_supported, + .is_supported = blake3_is_supported, .degree = 4, .name = "generic" }; diff --git a/module/icp/algs/blake3/blake3_impl.c b/module/icp/algs/blake3/blake3_impl.c index 7bc4db2c9..f68a5edfe 100644 --- a/module/icp/algs/blake3/blake3_impl.c +++ b/module/icp/algs/blake3/blake3_impl.c @@ -24,222 +24,266 @@ */ #include <sys/zfs_context.h> -#include <sys/zio_checksum.h> +#include <sys/zfs_impl.h> +#include <sys/blake3.h> +#include <sys/simd.h> #include "blake3_impl.h" -static const blake3_ops_t *const blake3_impls[] = { - &blake3_generic_impl, #if defined(__aarch64__) || \ (defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) - &blake3_sse2_impl, -#endif -#if defined(__aarch64__) || \ - (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ - (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) - &blake3_sse41_impl, -#endif -#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) - &blake3_avx2_impl, -#endif -#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) - &blake3_avx512_impl, -#endif -}; - -/* Select BLAKE3 implementation */ -#define IMPL_FASTEST (UINT32_MAX) -#define IMPL_CYCLE (UINT32_MAX - 1) - -#define IMPL_READ(i) (*(volatile uint32_t *) &(i)) - -/* Indicate that benchmark has been done */ -static boolean_t blake3_initialized = B_FALSE; - -/* Implementation that contains the fastest methods */ -static blake3_ops_t blake3_fastest_impl = { - .name = "fastest" -}; -/* Hold all supported implementations */ -static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)]; -static uint32_t blake3_supp_impls_cnt = 0; +extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags); + +extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_compress_in_place_sse2(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) { + kfpu_begin(); + zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter, + flags); + kfpu_end(); +} -/* Currently selected implementation */ -static uint32_t blake3_impl_chosen = IMPL_FASTEST; +static void blake3_compress_xof_sse2(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) { + kfpu_begin(); + zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags, + out); + kfpu_end(); +} -static struct blake3_impl_selector { - const char *name; - uint32_t sel; -} blake3_impl_selectors[] = { - { "cycle", IMPL_CYCLE }, - { "fastest", IMPL_FASTEST } -}; +static void blake3_hash_many_sse2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); +} -/* check the supported implementations */ -static void blake3_impl_init(void) +static boolean_t blake3_is_sse2_supported(void) { - int i, c; - - /* init only once */ - if (likely(blake3_initialized)) - return; +#if defined(__x86_64) + return (kfpu_allowed() && zfs_sse2_available()); +#elif defined(__PPC64__) + return (kfpu_allowed() && zfs_vsx_available()); +#else + return (kfpu_allowed()); +#endif +} - /* move supported implementations into blake3_supp_impls */ - for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) { - const blake3_ops_t *impl = blake3_impls[i]; +const blake3_ops_t blake3_sse2_impl = { + .compress_in_place = blake3_compress_in_place_sse2, + .compress_xof = blake3_compress_xof_sse2, + .hash_many = blake3_hash_many_sse2, + .is_supported = blake3_is_sse2_supported, + .degree = 4, + .name = "sse2" +}; +#endif - if (impl->is_supported && impl->is_supported()) - blake3_supp_impls[c++] = impl; - } - blake3_supp_impls_cnt = c; +#if defined(__aarch64__) || \ + (defined(__x86_64) && defined(HAVE_SSE2)) || \ + (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) - /* first init generic impl, may be changed via set_fastest() */ - memcpy(&blake3_fastest_impl, blake3_impls[0], - sizeof (blake3_fastest_impl)); - blake3_initialized = B_TRUE; +extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags); + +extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) { + kfpu_begin(); + zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter, + flags); + kfpu_end(); } -/* get number of supported implementations */ -uint32_t -blake3_impl_getcnt(void) -{ - blake3_impl_init(); - return (blake3_supp_impls_cnt); +static void blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) { + kfpu_begin(); + zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags, + out); + kfpu_end(); } -/* get id of selected implementation */ -uint32_t -blake3_impl_getid(void) -{ - return (IMPL_READ(blake3_impl_chosen)); +static void blake3_hash_many_sse41(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); } -/* get name of selected implementation */ -const char * -blake3_impl_getname(void) +static boolean_t blake3_is_sse41_supported(void) { - uint32_t impl = IMPL_READ(blake3_impl_chosen); - - blake3_impl_init(); - switch (impl) { - case IMPL_FASTEST: - return ("fastest"); - case IMPL_CYCLE: - return ("cycle"); - default: - return (blake3_supp_impls[impl]->name); - } +#if defined(__x86_64) + return (kfpu_allowed() && zfs_sse4_1_available()); +#elif defined(__PPC64__) + return (kfpu_allowed() && zfs_vsx_available()); +#else + return (kfpu_allowed()); +#endif } -/* setup id as fastest implementation */ -void -blake3_impl_set_fastest(uint32_t id) -{ - /* setup fastest impl */ - memcpy(&blake3_fastest_impl, blake3_supp_impls[id], - sizeof (blake3_fastest_impl)); +const blake3_ops_t blake3_sse41_impl = { + .compress_in_place = blake3_compress_in_place_sse41, + .compress_xof = blake3_compress_xof_sse41, + .hash_many = blake3_hash_many_sse41, + .is_supported = blake3_is_sse41_supported, + .degree = 4, + .name = "sse41" +}; +#endif + +#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) +extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_hash_many_avx2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); } -/* set implementation by id */ -void -blake3_impl_setid(uint32_t id) +static boolean_t blake3_is_avx2_supported(void) { - blake3_impl_init(); - switch (id) { - case IMPL_FASTEST: - atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST); - break; - case IMPL_CYCLE: - atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE); - break; - default: - ASSERT3U(id, <, blake3_supp_impls_cnt); - atomic_swap_32(&blake3_impl_chosen, id); - break; - } + return (kfpu_allowed() && zfs_sse4_1_available() && + zfs_avx2_available()); } -/* set implementation by name */ -int -blake3_impl_setname(const char *val) -{ - uint32_t impl = IMPL_READ(blake3_impl_chosen); - size_t val_len; - int i, err = -EINVAL; - - blake3_impl_init(); - val_len = strlen(val); - while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */ - val_len--; - - /* check mandatory implementations */ - for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) { - const char *name = blake3_impl_selectors[i].name; - - if (val_len == strlen(name) && - strncmp(val, name, val_len) == 0) { - impl = blake3_impl_selectors[i].sel; - err = 0; - break; - } - } +const blake3_ops_t +blake3_avx2_impl = { + .compress_in_place = blake3_compress_in_place_sse41, + .compress_xof = blake3_compress_xof_sse41, + .hash_many = blake3_hash_many_avx2, + .is_supported = blake3_is_avx2_supported, + .degree = 8, + .name = "avx2" +}; +#endif - if (err != 0 && blake3_initialized) { - /* check all supported implementations */ - for (i = 0; i < blake3_supp_impls_cnt; i++) { - const char *name = blake3_supp_impls[i]->name; - - if (val_len == strlen(name) && - strncmp(val, name, val_len) == 0) { - impl = i; - err = 0; - break; - } - } - } +#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) +extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags); + +extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) { + kfpu_begin(); + zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter, + flags); + kfpu_end(); +} - if (err == 0) { - atomic_swap_32(&blake3_impl_chosen, impl); - } +static void blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) { + kfpu_begin(); + zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags, + out); + kfpu_end(); +} - return (err); +static void blake3_hash_many_avx512(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); } -const blake3_ops_t * -blake3_impl_get_ops(void) +static boolean_t blake3_is_avx512_supported(void) { - const blake3_ops_t *ops = NULL; - uint32_t impl = IMPL_READ(blake3_impl_chosen); - - blake3_impl_init(); - switch (impl) { - case IMPL_FASTEST: - ASSERT(blake3_initialized); - ops = &blake3_fastest_impl; - break; - case IMPL_CYCLE: - /* Cycle through supported implementations */ - ASSERT(blake3_initialized); - ASSERT3U(blake3_supp_impls_cnt, >, 0); - static uint32_t cycle_count = 0; - uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt; - ops = blake3_supp_impls[idx]; - break; - default: - ASSERT3U(blake3_supp_impls_cnt, >, 0); - ASSERT3U(impl, <, blake3_supp_impls_cnt); - ops = blake3_supp_impls[impl]; - break; - } - - ASSERT3P(ops, !=, NULL); - return (ops); + return (kfpu_allowed() && zfs_avx512f_available() && + zfs_avx512vl_available()); } -#if defined(_KERNEL) +const blake3_ops_t blake3_avx512_impl = { + .compress_in_place = blake3_compress_in_place_avx512, + .compress_xof = blake3_compress_xof_avx512, + .hash_many = blake3_hash_many_avx512, + .is_supported = blake3_is_avx512_supported, + .degree = 16, + .name = "avx512" +}; +#endif + +extern const blake3_ops_t blake3_generic_impl; + +static const blake3_ops_t *const blake3_impls[] = { + &blake3_generic_impl, +#if defined(__aarch64__) || \ + (defined(__x86_64) && defined(HAVE_SSE2)) || \ + (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + &blake3_sse2_impl, +#endif +#if defined(__aarch64__) || \ + (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ + (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + &blake3_sse41_impl, +#endif +#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) + &blake3_avx2_impl, +#endif +#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) + &blake3_avx512_impl, +#endif +}; +/* use the generic implementation functions */ +#define IMPL_NAME "blake3" +#define IMPL_OPS_T blake3_ops_t +#define IMPL_ARRAY blake3_impls +#define IMPL_GET_OPS blake3_get_ops +#define ZFS_IMPL_OPS zfs_blake3_ops +#include <generic_impl.c> + +#ifdef _KERNEL void **blake3_per_cpu_ctx; void @@ -253,9 +297,6 @@ blake3_per_cpu_ctx_init(void) blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), KM_SLEEP); } - - /* init once in kernel mode */ - blake3_impl_init(); } void @@ -276,7 +317,7 @@ blake3_per_cpu_ctx_fini(void) static int blake3_param_get(char *buffer, zfs_kernel_param_t *unused) { - const uint32_t impl = IMPL_READ(blake3_impl_chosen); + const uint32_t impl = IMPL_READ(generic_impl_chosen); char *fmt; int cnt = 0; @@ -289,10 +330,11 @@ blake3_param_get(char *buffer, zfs_kernel_param_t *unused) cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest"); /* list all supported implementations */ - for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { + generic_impl_init(); + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { fmt = IMPL_FMT(impl, i); cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, - blake3_supp_impls[i]->name); + blake3_impls[i]->name); } return (cnt); @@ -302,7 +344,7 @@ static int blake3_param_set(const char *val, zfs_kernel_param_t *unused) { (void) unused; - return (blake3_impl_setname(val)); + return (generic_impl_setname(val)); } #elif defined(__FreeBSD__) @@ -314,8 +356,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS) { int err; + generic_impl_init(); if (req->newptr == NULL) { - const uint32_t impl = IMPL_READ(blake3_impl_chosen); + const uint32_t impl = IMPL_READ(generic_impl_chosen); const int init_buflen = 64; const char *fmt; struct sbuf *s; @@ -331,9 +374,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS) (void) sbuf_printf(s, fmt, "fastest"); /* list all supported implementations */ - for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { fmt = IMPL_FMT(impl, i); - (void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name); + (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); } err = sbuf_finish(s); @@ -349,7 +392,7 @@ blake3_param(ZFS_MODULE_PARAM_ARGS) return (err); } - return (-blake3_impl_setname(buf)); + return (-generic_impl_setname(buf)); } #endif diff --git a/module/icp/algs/blake3/blake3_impl.h b/module/icp/algs/blake3/blake3_impl.h index ecb51e3a3..90d508fac 100644 --- a/module/icp/algs/blake3/blake3_impl.h +++ b/module/icp/algs/blake3/blake3_impl.h @@ -25,14 +25,13 @@ * Copyright (c) 2021-2022 Tino Reichardt <[email protected]> */ -#ifndef BLAKE3_IMPL_H +#ifndef BLAKE3_IMPL_H #define BLAKE3_IMPL_H #ifdef __cplusplus extern "C" { #endif -#include <sys/types.h> #include <sys/blake3.h> #include <sys/simd.h> #include <sys/asm_linkage.h> @@ -56,7 +55,7 @@ typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs, typedef boolean_t (*blake3_is_supported_f)(void); -typedef struct blake3_impl_ops { +typedef struct { blake3_compress_in_place_f compress_in_place; blake3_compress_xof_f compress_xof; blake3_hash_many_f hash_many; @@ -65,30 +64,8 @@ typedef struct blake3_impl_ops { const char *name; } blake3_ops_t; -/* Return selected BLAKE3 implementation ops */ -extern const blake3_ops_t *blake3_impl_get_ops(void); - -extern const blake3_ops_t blake3_generic_impl; - -#if defined(__aarch64__) || \ - (defined(__x86_64) && defined(HAVE_SSE2)) || \ - (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) -extern const blake3_ops_t blake3_sse2_impl; -#endif - -#if defined(__aarch64__) || \ - (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ - (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) -extern const blake3_ops_t blake3_sse41_impl; -#endif - -#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) -extern const blake3_ops_t blake3_avx2_impl; -#endif - -#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) -extern const blake3_ops_t blake3_avx512_impl; -#endif +/* return selected BLAKE3 implementation ops */ +extern const blake3_ops_t *blake3_get_ops(void); #if defined(__x86_64) #define MAX_SIMD_DEGREE 16 diff --git a/module/icp/algs/blake3/blake3_x86-64.c b/module/icp/algs/blake3/blake3_x86-64.c deleted file mode 100644 index 04a8b3333..000000000 --- a/module/icp/algs/blake3/blake3_x86-64.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or https://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2021-2022 Tino Reichardt <[email protected]> - */ - -#include "blake3_impl.h" - -#if defined(__aarch64__) || \ - (defined(__x86_64) && defined(HAVE_SSE2)) || \ - (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) - -extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags); - -extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags, uint8_t out[64]); - -extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out); - -static void blake3_compress_in_place_sse2(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags) { - kfpu_begin(); - zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter, - flags); - kfpu_end(); -} - -static void blake3_compress_xof_sse2(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags, uint8_t out[64]) { - kfpu_begin(); - zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags, - out); - kfpu_end(); -} - -static void blake3_hash_many_sse2(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out) { - kfpu_begin(); - zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, out); - kfpu_end(); -} - -static boolean_t blake3_is_sse2_supported(void) -{ -#if defined(__x86_64) - return (kfpu_allowed() && zfs_sse2_available()); -#elif defined(__PPC64__) && defined(__linux__) - return (kfpu_allowed() && zfs_vsx_available()); -#else - return (kfpu_allowed()); -#endif -} - -const blake3_ops_t blake3_sse2_impl = { - .compress_in_place = blake3_compress_in_place_sse2, - .compress_xof = blake3_compress_xof_sse2, - .hash_many = blake3_hash_many_sse2, - .is_supported = blake3_is_sse2_supported, - .degree = 4, - .name = "sse2" -}; -#endif - -#if defined(__aarch64__) || \ - (defined(__x86_64) && defined(HAVE_SSE2)) || \ - (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) - -extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags); - -extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags, uint8_t out[64]); - -extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out); - -static void blake3_compress_in_place_sse41(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags) { - kfpu_begin(); - zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter, - flags); - kfpu_end(); -} - -static void blake3_compress_xof_sse41(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags, uint8_t out[64]) { - kfpu_begin(); - zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags, - out); - kfpu_end(); -} - -static void blake3_hash_many_sse41(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out) { - kfpu_begin(); - zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, out); - kfpu_end(); -} - -static boolean_t blake3_is_sse41_supported(void) -{ -#if defined(__x86_64) - return (kfpu_allowed() && zfs_sse4_1_available()); -#elif defined(__PPC64__) && defined(__linux__) - return (kfpu_allowed() && zfs_vsx_available()); -#else - return (kfpu_allowed()); -#endif -} - -const blake3_ops_t blake3_sse41_impl = { - .compress_in_place = blake3_compress_in_place_sse41, - .compress_xof = blake3_compress_xof_sse41, - .hash_many = blake3_hash_many_sse41, - .is_supported = blake3_is_sse41_supported, - .degree = 4, - .name = "sse41" -}; -#endif - -#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) -extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out); - -static void blake3_hash_many_avx2(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out) { - kfpu_begin(); - zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, out); - kfpu_end(); -} - -static boolean_t blake3_is_avx2_supported(void) -{ - return (kfpu_allowed() && zfs_sse4_1_available() && - zfs_avx2_available()); -} - -const blake3_ops_t blake3_avx2_impl = { - .compress_in_place = blake3_compress_in_place_sse41, - .compress_xof = blake3_compress_xof_sse41, - .hash_many = blake3_hash_many_avx2, - .is_supported = blake3_is_avx2_supported, - .degree = 8, - .name = "avx2" -}; -#endif - -#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) -extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags); - -extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags, uint8_t out[64]); - -extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out); - -static void blake3_compress_in_place_avx512(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags) { - kfpu_begin(); - zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter, - flags); - kfpu_end(); -} - -static void blake3_compress_xof_avx512(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, - uint64_t counter, uint8_t flags, uint8_t out[64]) { - kfpu_begin(); - zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags, - out); - kfpu_end(); -} - -static void blake3_hash_many_avx512(const uint8_t * const *inputs, - size_t num_inputs, size_t blocks, const uint32_t key[8], - uint64_t counter, boolean_t increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out) { - kfpu_begin(); - zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, out); - kfpu_end(); -} - -static boolean_t blake3_is_avx512_supported(void) -{ - return (kfpu_allowed() && zfs_avx512f_available() && - zfs_avx512vl_available()); -} - -const blake3_ops_t blake3_avx512_impl = { - .compress_in_place = blake3_compress_in_place_avx512, - .compress_xof = blake3_compress_xof_avx512, - .hash_many = blake3_hash_many_avx512, - .is_supported = blake3_is_avx512_supported, - .degree = 16, - .name = "avx512" -}; -#endif |