summaryrefslogtreecommitdiffstats
path: root/module/zcommon/zfs_fletcher.c
diff options
context:
space:
mode:
authorTyler J. Stachecki <[email protected]>2016-06-23 23:32:40 -0400
committerBrian Behlendorf <[email protected]>2016-07-15 10:42:35 -0700
commit35a76a0366372d89a0f1ac3cebd5bc7646aadec3 (patch)
treec5a736ca9254afde0f65a0991efd9da216fa112a /module/zcommon/zfs_fletcher.c
parentdfbc86309fd8ebb70a55cafa876320dc1ea8e833 (diff)
Implementation of SSE optimized Fletcher-4
Builds off of 1eeb4562 (Implementation of AVX2 optimized Fletcher-4) This commit adds another implementation of the Fletcher-4 algorithm. It is automatically selected at module load if it benchmarks higher than all other available implementations. The module benchmark was also amended to analyze the performance of the byteswap-ed version of Fletcher-4, as well as the non-byteswaped version. The average performance of the two is used to select the the fastest implementation available on the host system. Adds a pair of fields to an existing zcommon module parameter: - zfs_fletcher_4_impl (str) "sse2" - new SSE2 implementation if available "ssse3" - new SSSE3 implementation if available Signed-off-by: Tyler J. Stachecki <[email protected]> Signed-off-by: Gvozden Neskovic <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #4789
Diffstat (limited to 'module/zcommon/zfs_fletcher.c')
-rw-r--r--module/zcommon/zfs_fletcher.c19
1 files changed, 19 insertions, 0 deletions
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c
index e76c5b8a5..a3888a32b 100644
--- a/module/zcommon/zfs_fletcher.c
+++ b/module/zcommon/zfs_fletcher.c
@@ -149,6 +149,12 @@ static const fletcher_4_ops_t fletcher_4_scalar_ops = {
static const fletcher_4_ops_t *fletcher_4_algos[] = {
&fletcher_4_scalar_ops,
+#if defined(HAVE_SSE2)
+ &fletcher_4_sse2_ops,
+#endif
+#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
+ &fletcher_4_ssse3_ops,
+#endif
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
&fletcher_4_avx2_ops,
#endif
@@ -157,6 +163,12 @@ static const fletcher_4_ops_t *fletcher_4_algos[] = {
static enum fletcher_selector {
FLETCHER_FASTEST = 0,
FLETCHER_SCALAR,
+#if defined(HAVE_SSE2)
+ FLETCHER_SSE2,
+#endif
+#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
+ FLETCHER_SSSE3,
+#endif
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
FLETCHER_AVX2,
#endif
@@ -169,6 +181,12 @@ static struct fletcher_4_impl_selector {
} fletcher_4_impl_selectors[] = {
[ FLETCHER_FASTEST ] = { "fastest", NULL },
[ FLETCHER_SCALAR ] = { "scalar", &fletcher_4_scalar_ops },
+#if defined(HAVE_SSE2)
+ [ FLETCHER_SSE2 ] = { "sse2", &fletcher_4_sse2_ops },
+#endif
+#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
+ [ FLETCHER_SSSE3 ] = { "ssse3", &fletcher_4_ssse3_ops },
+#endif
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
[ FLETCHER_AVX2 ] = { "avx2", &fletcher_4_avx2_ops },
#endif
@@ -407,6 +425,7 @@ fletcher_4_init(void)
ops->init(&zc);
do {
ops->compute(databuf, data_size, &zc);
+ ops->compute_byteswap(databuf, data_size, &zc);
run_count++;
} while (gethrtime() < start + bench_ns);
if (ops->fini != NULL)