diff options
author | Gvozden Neskovic <[email protected]> | 2016-07-06 13:42:04 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-08-16 14:11:14 -0700 |
commit | 70b258fc962fd40673b9a47574cb83d8438e7d94 (patch) | |
tree | 6e45c08b144622dc78f1106681ce5566c77b588d /module/zcommon/zfs_fletcher.c | |
parent | 32ffaa3de58981814342fe6d3556c03d41d121f8 (diff) |
Fletcher4 implementation using avx512f instruction set
Algorithm runs 8 parallel sums, consuming 8x uint32_t elements per
loop iteration. Size alignment of main fletcher4 methods is adjusted
accordingly. New implementation is called 'avx512f'.
Note: byteswap method can be implemented more efficiently when avx512bw hardware
becomes available. Currently, it is ~ 2x slower than native method.
Table shows result of full (native) fletcher4 calculation for different buffer size:
fletcher4 4KB 16KB 64KB 128KB 256KB 1MB 16MB
--------------------------------------------------------------------
[scalar] 1213 1228 1231 1231 1225 1200 1160
[sse2] 2374 2442 2459 2456 2462 2250 2220
[avx2] 4288 4753 4871 4893 4900 4050 3882
[avx512f] 5975 8445 9196 9221 9262 6307 5620
Signed-off-by: Gvozden Neskovic <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #4952
Diffstat (limited to 'module/zcommon/zfs_fletcher.c')
-rw-r--r-- | module/zcommon/zfs_fletcher.c | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index f3eae6791..e8ba93433 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -158,6 +158,9 @@ static const fletcher_4_ops_t *fletcher_4_algos[] = { #if defined(HAVE_AVX) && defined(HAVE_AVX2) &fletcher_4_avx2_ops, #endif +#if defined(__x86_64) && defined(HAVE_AVX512F) + &fletcher_4_avx512f_ops, +#endif }; static enum fletcher_selector { @@ -172,6 +175,9 @@ static enum fletcher_selector { #if defined(HAVE_AVX) && defined(HAVE_AVX2) FLETCHER_AVX2, #endif +#if defined(__x86_64) && defined(HAVE_AVX512F) + FLETCHER_AVX512F, +#endif FLETCHER_CYCLE } fletcher_4_impl_chosen = FLETCHER_SCALAR; @@ -190,6 +196,9 @@ static struct fletcher_4_impl_selector { #if defined(HAVE_AVX) && defined(HAVE_AVX2) [ FLETCHER_AVX2 ] = { "avx2", &fletcher_4_avx2_ops }, #endif +#if defined(__x86_64) && defined(HAVE_AVX512F) + [ FLETCHER_AVX512F ] = { "avx512f", &fletcher_4_avx512f_ops }, +#endif #if !defined(_KERNEL) [ FLETCHER_CYCLE ] = { "cycle", &fletcher_4_scalar_ops } #endif @@ -354,7 +363,7 @@ fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) { const fletcher_4_ops_t *ops; - if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t))) + if (IS_P2ALIGNED(size, 8 * sizeof (uint32_t))) ops = fletcher_4_impl_get(); else ops = &fletcher_4_scalar_ops; @@ -370,7 +379,7 @@ fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) { const fletcher_4_ops_t *ops; - if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t))) + if (IS_P2ALIGNED(size, 8 * sizeof (uint32_t))) ops = fletcher_4_impl_get(); else ops = &fletcher_4_scalar_ops; |