Fletcher4: Incremental using SIMD

Combine incrementally computed fletcher4 checksums. Checksums are combined a posteriori, allowing for parallel computation on chunks to be implemented if required. The algorithm is general, and does not add changes in each SIMD implementation. New test in ztest verifies incremental fletcher computations. Checksum combining matrix for two buffers `a` and `b`, where `Ca` and `Cb` are respective fletcher4 checksums, `Cab` is combined checksum, `s` is size of buffer `b` (divided by sizeof(uint32_t)) is: Cab[A] = Cb[A] + Ca[A] Cab[B] = Cb[B] + Ca[B] + s * Ca[A] Cab[C] = Cb[C] + Ca[C] + s * Ca[B] + s(s+1)/2 * Ca[A] Cab[D] = Cb[D] + Ca[D] + s * Ca[C] + s(s+1)/2 * Ca[B] + s(s+1)(s+2)/6 * Ca[A] NOTE: this calculation overflows for larger buffers. Thus, internally, the calculation is performed on 8MiB chunks. Signed-off-by: Gvozden Neskovic <[email protected]>
author: Gvozden Neskovic <[email protected]> 2016-09-23 03:52:29 +0200
committer: Gvozden Neskovic <[email protected]> 2016-10-05 16:41:46 +0200
commit: 37f520db2d19389deb2a68065391ae2b229c6b50 (patch)
tree: 1a8e035b7190e3c7ae6cb3452b7d6e4ca8bbc080 /module/zcommon
parent: dc03fa3092472c40bf1b6c7d7ea3170e3ffa9e38 (diff)
1 files changed, 58 insertions, 18 deletions
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c
index 8a975ecb3..3ca70db13 100644
--- a/module/zcommon/zfs_fletcher.c
+++ b/module/zcommon/zfs_fletcher.c
@@ -383,24 +383,6 @@ fletcher_4_impl_get(void)
 	return (ops);
 }
 
-void
-fletcher_4_incremental_native(const void *buf, uint64_t size,
-    zio_cksum_t *zcp)
-{
-	ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
-
-	fletcher_4_scalar_native(buf, size, zcp);
-}
-
-void
-fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
-    zio_cksum_t *zcp)
-{
-	ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
-
-	fletcher_4_scalar_byteswap(buf, size, zcp);
-}
-
 static inline void
 fletcher_4_native_impl(const fletcher_4_ops_t *ops, const void *buf,
 	uint64_t size, zio_cksum_t *zcp)
@@ -477,6 +459,64 @@ fletcher_4_byteswap(const void *buf, uint64_t size,
 	}
 }
 
+/* Incremental Fletcher 4 */
+
+static inline void
+fletcher_4_incremental_combine(zio_cksum_t *zcp, const uint64_t size,
+    const zio_cksum_t *nzcp)
+{
+	const uint64_t c1 = size / sizeof (uint32_t);
+	const uint64_t c2 = c1 * (c1 + 1) / 2;
+	const uint64_t c3 = c2 * (c1 + 2) / 3;
+
+	zcp->zc_word[3] += nzcp->zc_word[3] + c1 * zcp->zc_word[2] +
+	    c2 * zcp->zc_word[1] + c3 * zcp->zc_word[0];
+	zcp->zc_word[2] += nzcp->zc_word[2] + c1 * zcp->zc_word[1] +
+	    c2 * zcp->zc_word[0];
+	zcp->zc_word[1] += nzcp->zc_word[1] + c1 * zcp->zc_word[0];
+	zcp->zc_word[0] += nzcp->zc_word[0];
+}
+
+static inline void
+fletcher_4_incremental_impl(boolean_t native, const void *buf, uint64_t size,
+    zio_cksum_t *zcp)
+{
+	static const uint64_t FLETCHER_4_INC_MAX = 8ULL << 20;
+	uint64_t len;
+
+	while (size > 0) {
+		zio_cksum_t nzc;
+
+		len = MIN(size, FLETCHER_4_INC_MAX);
+
+		if (native)
+			fletcher_4_native(buf, len, NULL, &nzc);
+		else
+			fletcher_4_byteswap(buf, len, NULL, &nzc);
+
+		fletcher_4_incremental_combine(zcp, len, &nzc);
+
+		size -= len;
+		buf += len;
+	}
+}
+
+void
+fletcher_4_incremental_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	fletcher_4_incremental_impl(B_TRUE, buf, size, zcp);
+}
+
+void
+fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
+    zio_cksum_t *zcp)
+{
+	fletcher_4_incremental_impl(B_FALSE, buf, size, zcp);
+}
+
+
+/* Fletcher 4 kstats */
+
 static int
 fletcher_4_kstat_headers(char *buf, size_t size)
 {
author	Gvozden Neskovic <[email protected]>	2016-09-23 03:52:29 +0200
committer	Gvozden Neskovic <[email protected]>	2016-10-05 16:41:46 +0200
commit	37f520db2d19389deb2a68065391ae2b229c6b50 (patch)
tree	1a8e035b7190e3c7ae6cb3452b7d6e4ca8bbc080 /module/zcommon
parent	dc03fa3092472c40bf1b6c7d7ea3170e3ffa9e38 (diff)