diff options
author | Tyler J. Stachecki <[email protected]> | 2016-06-23 23:32:40 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-07-15 10:42:35 -0700 |
commit | 35a76a0366372d89a0f1ac3cebd5bc7646aadec3 (patch) | |
tree | c5a736ca9254afde0f65a0991efd9da216fa112a /module/zcommon/zfs_fletcher.c | |
parent | dfbc86309fd8ebb70a55cafa876320dc1ea8e833 (diff) |
Implementation of SSE optimized Fletcher-4
Builds off of 1eeb4562 (Implementation of AVX2 optimized Fletcher-4)
This commit adds another implementation of the Fletcher-4 algorithm.
It is automatically selected at module load if it benchmarks higher
than all other available implementations.
The module benchmark was also amended to analyze the performance of
the byteswap-ed version of Fletcher-4, as well as the non-byteswaped
version. The average performance of the two is used to select the
the fastest implementation available on the host system.
Adds a pair of fields to an existing zcommon module parameter:
- zfs_fletcher_4_impl (str)
"sse2" - new SSE2 implementation if available
"ssse3" - new SSSE3 implementation if available
Signed-off-by: Tyler J. Stachecki <[email protected]>
Signed-off-by: Gvozden Neskovic <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #4789
Diffstat (limited to 'module/zcommon/zfs_fletcher.c')
-rw-r--r-- | module/zcommon/zfs_fletcher.c | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index e76c5b8a5..a3888a32b 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -149,6 +149,12 @@ static const fletcher_4_ops_t fletcher_4_scalar_ops = { static const fletcher_4_ops_t *fletcher_4_algos[] = { &fletcher_4_scalar_ops, +#if defined(HAVE_SSE2) + &fletcher_4_sse2_ops, +#endif +#if defined(HAVE_SSE2) && defined(HAVE_SSSE3) + &fletcher_4_ssse3_ops, +#endif #if defined(HAVE_AVX) && defined(HAVE_AVX2) &fletcher_4_avx2_ops, #endif @@ -157,6 +163,12 @@ static const fletcher_4_ops_t *fletcher_4_algos[] = { static enum fletcher_selector { FLETCHER_FASTEST = 0, FLETCHER_SCALAR, +#if defined(HAVE_SSE2) + FLETCHER_SSE2, +#endif +#if defined(HAVE_SSE2) && defined(HAVE_SSSE3) + FLETCHER_SSSE3, +#endif #if defined(HAVE_AVX) && defined(HAVE_AVX2) FLETCHER_AVX2, #endif @@ -169,6 +181,12 @@ static struct fletcher_4_impl_selector { } fletcher_4_impl_selectors[] = { [ FLETCHER_FASTEST ] = { "fastest", NULL }, [ FLETCHER_SCALAR ] = { "scalar", &fletcher_4_scalar_ops }, +#if defined(HAVE_SSE2) + [ FLETCHER_SSE2 ] = { "sse2", &fletcher_4_sse2_ops }, +#endif +#if defined(HAVE_SSE2) && defined(HAVE_SSSE3) + [ FLETCHER_SSSE3 ] = { "ssse3", &fletcher_4_ssse3_ops }, +#endif #if defined(HAVE_AVX) && defined(HAVE_AVX2) [ FLETCHER_AVX2 ] = { "avx2", &fletcher_4_avx2_ops }, #endif @@ -407,6 +425,7 @@ fletcher_4_init(void) ops->init(&zc); do { ops->compute(databuf, data_size, &zc); + ops->compute_byteswap(databuf, data_size, &zc); run_count++; } while (gethrtime() < start + bench_ns); if (ops->fini != NULL) |