diff options
author | Romain Dolbeau <[email protected]> | 2016-10-21 19:55:49 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-10-21 10:55:49 -0700 |
commit | 24cdeaf12e9e546621902449699fc6d664aeac2b (patch) | |
tree | 12af696407ba15b157d591f44f23de6d5eee2882 /include | |
parent | e4ffa98dcaf2208c742609f0ab2bdb343071446a (diff) |
Fletcher4 algorithm implemented in pure NEON for Aarch64 / ARMv8 64 bits
This is not useful on micro-architecture with a weak NEON
implementation (only 64 bits); the native version is slower &
the byteswap barely faster than scalar. On A53 or A57, it's
a small improvement on scalar but OK for byteswap.
Results from an A53 system:
0 0 0x01 -1 0 1499068294333000 1499101101878000
implementation native byteswap
scalar 1008227510 755880264
aarch64_neon 1198098720 1044818671
fastest aarch64_neon aarch64_neon
Results from a A57 system:
0 0 0x01 -1 0 4407214734807033 4407233933777404
implementation native byteswap
scalar 2302071241 1124873346
aarch64_neon 2542214946 2245570352
fastest aarch64_neon aarch64_neon
Reviewed-by: Gvozden Neskovic <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Romain Dolbeau <[email protected]>
Closes #5248
Diffstat (limited to 'include')
-rw-r--r-- | include/zfs_fletcher.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/include/zfs_fletcher.h b/include/zfs_fletcher.h index 85c2b5a7e..1f113ae2f 100644 --- a/include/zfs_fletcher.h +++ b/include/zfs_fletcher.h @@ -77,6 +77,10 @@ typedef struct zfs_fletcher_avx512 { uint64_t v[8] __attribute__((aligned(64))); } zfs_fletcher_avx512_t; +typedef struct zfs_fletcher_aarch64_neon { + uint64_t v[2] __attribute__((aligned(16))); +} zfs_fletcher_aarch64_neon_t; + typedef union fletcher_4_ctx { zio_cksum_t scalar; @@ -90,6 +94,9 @@ typedef union fletcher_4_ctx { #if defined(__x86_64) && defined(HAVE_AVX512F) zfs_fletcher_avx512_t avx512[4]; #endif +#if defined(__aarch64__) + zfs_fletcher_aarch64_neon_t aarch64_neon[4]; +#endif } fletcher_4_ctx_t; /* @@ -128,6 +135,10 @@ extern const fletcher_4_ops_t fletcher_4_avx2_ops; extern const fletcher_4_ops_t fletcher_4_avx512f_ops; #endif +#if defined(__aarch64__) +extern const fletcher_4_ops_t fletcher_4_aarch64_neon_ops; +#endif + #ifdef __cplusplus } #endif |