summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGvozden Neskovic <[email protected]>2016-08-24 15:51:33 +0200
committerBrian Behlendorf <[email protected]>2016-11-29 14:34:33 -0800
commitcbf484f8ad26b84a17c5308af47d2c202e1dc9e9 (patch)
treeb5739d61fe437b5f024eddaa061980b693a06088 /module
parenta206522c4fd31f03f14ba174d6159b72acfae0a9 (diff)
ABD Vectorized raidz
Enable vectorized raidz code on ABD buffers. The avx512f, avx512bw, neon and aarch64_neonx2 are disabled in this commit. With the exception of avx512bw these implementations are updated for ABD in the subsequent commits. Signed-off-by: Gvozden Neskovic <[email protected]>
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev_raidz_math.c39
-rw-r--r--module/zfs/vdev_raidz_math_aarch64_neon.c5
-rw-r--r--module/zfs/vdev_raidz_math_aarch64_neonx2.c2
-rw-r--r--module/zfs/vdev_raidz_math_avx2.c84
-rw-r--r--module/zfs/vdev_raidz_math_avx512bw.c18
-rw-r--r--module/zfs/vdev_raidz_math_avx512f.c17
-rw-r--r--module/zfs/vdev_raidz_math_impl.h1849
-rw-r--r--module/zfs/vdev_raidz_math_scalar.c152
-rw-r--r--module/zfs/vdev_raidz_math_sse2.c92
-rw-r--r--module/zfs/vdev_raidz_math_ssse3.c84
10 files changed, 1363 insertions, 979 deletions
diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c
index 1e4bf8413..93d7964d2 100644
--- a/module/zfs/vdev_raidz_math.c
+++ b/module/zfs/vdev_raidz_math.c
@@ -44,16 +44,6 @@ static raidz_impl_ops_t vdev_raidz_fastest_impl = {
.name = "fastest"
};
-/* ABD BRINGUP -- not ready yet */
-#if 1
-#ifdef HAVE_SSSE3
-#undef HAVE_SSSE3
-#endif
-#ifdef HAVE_AVX2
-#undef HAVE_AVX2
-#endif
-#endif
-
/* All compiled in implementations */
const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_original_impl,
@@ -68,14 +58,14 @@ const raidz_impl_ops_t *raidz_all_maths[] = {
&vdev_raidz_avx2_impl,
#endif
#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */
- &vdev_raidz_avx512f_impl,
+ // &vdev_raidz_avx512f_impl,
#endif
#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */
- &vdev_raidz_avx512bw_impl,
+ // &vdev_raidz_avx512bw_impl,
#endif
#if defined(__aarch64__)
- &vdev_raidz_aarch64_neon_impl,
- &vdev_raidz_aarch64_neonx2_impl,
+ // &vdev_raidz_aarch64_neon_impl,
+ // &vdev_raidz_aarch64_neonx2_impl,
#endif
};
@@ -159,8 +149,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
{
raidz_gen_f gen_parity = NULL;
-/* ABD Bringup -- vector code not ready */
-#if 0
switch (raidz_parity(rm)) {
case 1:
gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
@@ -177,7 +165,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
raidz_parity(rm));
break;
}
-#endif
/* if method is NULL execute the original implementation */
if (gen_parity == NULL)
@@ -188,8 +175,6 @@ vdev_raidz_math_generate(raidz_map_t *rm)
return (0);
}
-/* ABD Bringup -- vector code not ready */
-#if 0
static raidz_rec_f
reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
const int nbaddata)
@@ -244,7 +229,6 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
}
return ((raidz_rec_f) NULL);
}
-#endif
/*
* Select data reconstruction method for raidz_map
@@ -256,31 +240,28 @@ int
vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
const int *dt, const int nbaddata)
{
- raidz_rec_f rec_data = NULL;
+ raidz_rec_f rec_fn = NULL;
-/* ABD Bringup -- vector code not ready */
-#if 0
switch (raidz_parity(rm)) {
case PARITY_P:
- rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
+ rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
break;
case PARITY_PQ:
- rec_data = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
+ rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
break;
case PARITY_PQR:
- rec_data = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
+ rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
break;
default:
cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
raidz_parity(rm));
break;
}
-#endif
- if (rec_data == NULL)
+ if (rec_fn == NULL)
return (RAIDZ_ORIGINAL_IMPL);
else
- return (rec_data(rm, dt));
+ return (rec_fn(rm, dt));
}
const char *raidz_gen_name[] = {
diff --git a/module/zfs/vdev_raidz_math_aarch64_neon.c b/module/zfs/vdev_raidz_math_aarch64_neon.c
index f6a433f10..7ba30ba5e 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neon.c
+++ b/module/zfs/vdev_raidz_math_aarch64_neon.c
@@ -23,8 +23,9 @@
*/
#include <sys/isa_defs.h>
+#include <sys/types.h>
-#if defined(__aarch64__)
+#if 0 // defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
@@ -153,7 +154,7 @@ const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
#endif /* defined(__aarch64__) */
-#if defined(__aarch64__)
+#if 0 // defined(__aarch64__)
const uint8_t
__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {
diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c
index d8d1f1bce..e05deeb98 100644
--- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c
+++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c
@@ -24,7 +24,7 @@
#include <sys/isa_defs.h>
-#if defined(__aarch64__)
+#if 0 // defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
diff --git a/module/zfs/vdev_raidz_math_avx2.c b/module/zfs/vdev_raidz_math_avx2.c
index 508c95f8d..25ba9fabd 100644
--- a/module/zfs/vdev_raidz_math_avx2.c
+++ b/module/zfs/vdev_raidz_math_avx2.c
@@ -334,59 +334,86 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
kfpu_end(); \
}
-#define GEN_P_DEFINE() {}
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1, 2, 3
+
#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3
-#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
#define GEN_PQ_D 0, 1, 2, 3
-#define GEN_PQ_P 4, 5, 6, 7
-#define GEN_PQ_Q 8, 9, 10, 11
+#define GEN_PQ_C 4, 5, 6, 7
+#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {}
-#define GEN_PQR_STRIDE 2
-#define GEN_PQR_D 0, 1
-#define GEN_PQR_P 2, 3
-#define GEN_PQR_Q 4, 5
-#define GEN_PQR_R 6, 7
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
-#define REC_P_DEFINE() {}
-#define REC_P_STRIDE 4
-#define REC_P_X 0, 1, 2, 3
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
-#define REC_Q_DEFINE() {}
-#define REC_Q_STRIDE 4
-#define REC_Q_X 0, 1, 2, 3
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
-#define REC_R_DEFINE() {}
-#define REC_R_STRIDE 4
-#define REC_R_X 0, 1, 2, 3
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
-#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
-#define REC_PQ_D 4, 5
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
-#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
-#define REC_PR_D 4, 5
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
-#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
-#define REC_QR_D 4, 5
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
-#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 2
+#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
-#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
@@ -400,12 +427,7 @@ DEFINE_REC_METHODS(avx2);
static boolean_t
raidz_will_avx2_work(void)
{
-/* ABD Bringup -- vector code not ready */
-#if 1
- return (B_FALSE);
-#else
return (zfs_avx_available() && zfs_avx2_available());
-#endif
}
const raidz_impl_ops_t vdev_raidz_avx2_impl = {
diff --git a/module/zfs/vdev_raidz_math_avx512bw.c b/module/zfs/vdev_raidz_math_avx512bw.c
index bcbe657d0..465d1e569 100644
--- a/module/zfs/vdev_raidz_math_avx512bw.c
+++ b/module/zfs/vdev_raidz_math_avx512bw.c
@@ -24,7 +24,7 @@
#include <sys/isa_defs.h>
-#if defined(__x86_64) && defined(HAVE_AVX512BW)
+#if 0 // defined(__x86_64) && defined(HAVE_AVX512BW)
#include <sys/types.h>
#include <linux/simd_x86.h>
@@ -345,6 +345,22 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
kfpu_end(); \
}
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1, 2, 3
+
#define GEN_P_DEFINE() {}
#define GEN_P_STRIDE 4
#define GEN_P_P 0, 1, 2, 3
diff --git a/module/zfs/vdev_raidz_math_avx512f.c b/module/zfs/vdev_raidz_math_avx512f.c
index cc3868bce..c2ccd875e 100644
--- a/module/zfs/vdev_raidz_math_avx512f.c
+++ b/module/zfs/vdev_raidz_math_avx512f.c
@@ -24,7 +24,7 @@
#include <sys/isa_defs.h>
-#if defined(__x86_64) && defined(HAVE_AVX512F)
+#if 0 // defined(__x86_64) && defined(HAVE_AVX512F)
#include <sys/types.h>
#include <linux/simd_x86.h>
@@ -437,6 +437,21 @@ typedef struct v {
kfpu_end(); \
}
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 20, 21, 22, 23
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 20, 21, 22, 23
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 20, 21, 22, 23
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 20, 21, 22, 23
/*
* This use zmm16-zmm31 registers to free up zmm0-zmm15
* to use with the AVX2 pshufb, see above
diff --git a/module/zfs/vdev_raidz_math_impl.h b/module/zfs/vdev_raidz_math_impl.h
index 53800fd72..a8e4a0740 100644
--- a/module/zfs/vdev_raidz_math_impl.h
+++ b/module/zfs/vdev_raidz_math_impl.h
@@ -32,257 +32,14 @@
#define noinline __attribute__((noinline))
#endif
-/* Calculate data offset in raidz column, offset is in bytes */
-/* ADB BRINGUP -- needs to be refactored for ABD */
-#define COL_OFF(col, off) ((v_t *)(((char *)(col)->rc_abd) + (off)))
-
-/*
- * PARITY CALCULATION
- * An optimized function is called for a full length of data columns
- * If RAIDZ map contains remainder columns (shorter columns) the same function
- * is called for reminder of full columns.
- *
- * GEN_[P|PQ|PQR]_BLOCK() functions are designed to be efficiently in-lined by
- * the compiler. This removes a lot of conditionals from the inside loop which
- * makes the code faster, especially for vectorized code.
- * They are also highly parametrized, allowing for each implementation to define
- * most optimal stride, and register allocation.
- */
-
-static raidz_inline void
-GEN_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int ncols)
-{
- int c;
- size_t ioff;
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t *col;
-
- GEN_P_DEFINE();
-
- for (ioff = off; ioff < end; ioff += (GEN_P_STRIDE * sizeof (v_t))) {
- LOAD(COL_OFF(&(rm->rm_col[1]), ioff), GEN_P_P);
-
- for (c = 2; c < ncols; c++) {
- col = &rm->rm_col[c];
- XOR_ACC(COL_OFF(col, ioff), GEN_P_P);
- }
-
- STORE(COL_OFF(pcol, ioff), GEN_P_P);
- }
-}
-
-/*
- * Generate P parity (RAIDZ1)
- *
- * @rm RAIDZ map
- */
-static raidz_inline void
-raidz_generate_p_impl(raidz_map_t * const rm)
-{
- const int ncols = raidz_ncols(rm);
- const size_t psize = raidz_big_size(rm);
- const size_t short_size = raidz_short_size(rm);
-
- panic("not ABD ready");
-
- raidz_math_begin();
-
- /* short_size */
- GEN_P_BLOCK(rm, 0, short_size, ncols);
-
- /* fullcols */
- GEN_P_BLOCK(rm, short_size, psize, raidz_nbigcols(rm));
-
- raidz_math_end();
-}
-
-static raidz_inline void
-GEN_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int ncols, const int nbigcols)
-{
- int c;
- size_t ioff;
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
- raidz_col_t *col;
-
- GEN_PQ_DEFINE();
-
- MUL2_SETUP();
-
- for (ioff = off; ioff < end; ioff += (GEN_PQ_STRIDE * sizeof (v_t))) {
- LOAD(COL_OFF(&rm->rm_col[2], ioff), GEN_PQ_P);
- COPY(GEN_PQ_P, GEN_PQ_Q);
-
- for (c = 3; c < nbigcols; c++) {
- col = &rm->rm_col[c];
- LOAD(COL_OFF(col, ioff), GEN_PQ_D);
- MUL2(GEN_PQ_Q);
- XOR(GEN_PQ_D, GEN_PQ_P);
- XOR(GEN_PQ_D, GEN_PQ_Q);
- }
-
- STORE(COL_OFF(pcol, ioff), GEN_PQ_P);
-
- for (; c < ncols; c++)
- MUL2(GEN_PQ_Q);
-
- STORE(COL_OFF(qcol, ioff), GEN_PQ_Q);
- }
-}
-
-/*
- * Generate PQ parity (RAIDZ2)
- *
- * @rm RAIDZ map
- */
-static raidz_inline void
-raidz_generate_pq_impl(raidz_map_t * const rm)
-{
- const int ncols = raidz_ncols(rm);
- const size_t psize = raidz_big_size(rm);
- const size_t short_size = raidz_short_size(rm);
-
- panic("not ABD ready");
-
- raidz_math_begin();
-
- /* short_size */
- GEN_PQ_BLOCK(rm, 0, short_size, ncols, ncols);
-
- /* fullcols */
- GEN_PQ_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm));
-
- raidz_math_end();
-}
-
-
-static raidz_inline void
-GEN_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int ncols, const int nbigcols)
-{
- int c;
- size_t ioff;
- raidz_col_t *col;
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
- raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
-
- GEN_PQR_DEFINE();
-
- MUL2_SETUP();
-
- for (ioff = off; ioff < end; ioff += (GEN_PQR_STRIDE * sizeof (v_t))) {
- LOAD(COL_OFF(&rm->rm_col[3], ioff), GEN_PQR_P);
- COPY(GEN_PQR_P, GEN_PQR_Q);
- COPY(GEN_PQR_P, GEN_PQR_R);
-
- for (c = 4; c < nbigcols; c++) {
- col = &rm->rm_col[c];
- LOAD(COL_OFF(col, ioff), GEN_PQR_D);
- MUL2(GEN_PQR_Q);
- MUL4(GEN_PQR_R);
- XOR(GEN_PQR_D, GEN_PQR_P);
- XOR(GEN_PQR_D, GEN_PQR_Q);
- XOR(GEN_PQR_D, GEN_PQR_R);
- }
-
- STORE(COL_OFF(pcol, ioff), GEN_PQR_P);
-
- for (; c < ncols; c++) {
- MUL2(GEN_PQR_Q);
- MUL4(GEN_PQR_R);
- }
-
- STORE(COL_OFF(qcol, ioff), GEN_PQR_Q);
- STORE(COL_OFF(rcol, ioff), GEN_PQR_R);
- }
-}
-
-
-/*
- * Generate PQR parity (RAIDZ3)
- *
- * @rm RAIDZ map
- */
-static raidz_inline void
-raidz_generate_pqr_impl(raidz_map_t * const rm)
-{
- const int ncols = raidz_ncols(rm);
- const size_t psize = raidz_big_size(rm);
- const size_t short_size = raidz_short_size(rm);
-
- panic("not ABD ready");
-
- raidz_math_begin();
-
- /* short_size */
- GEN_PQR_BLOCK(rm, 0, short_size, ncols, ncols);
-
- /* fullcols */
- GEN_PQR_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm));
-
- raidz_math_end();
-}
-
-/*
- * DATA RECONSTRUCTION
- *
- * Data reconstruction process consists of two phases:
- * - Syndrome calculation
- * - Data reconstruction
- *
- * Syndrome is calculated by generating parity using available data columns
- * and zeros in places of erasure. Existing parity is added to corresponding
- * syndrome value to obtain the [P|Q|R]syn values from equation:
- * P = Psyn + Dx + Dy + Dz
- * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
- * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
- *
- * For data reconstruction phase, the corresponding equations are solved
- * for missing data (Dx, Dy, Dz). This generally involves multiplying known
- * symbols by an coefficient and adding them together. The multiplication
- * constant coefficients are calculated ahead of the operation in
- * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
- *
- * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
- * and "short" columns.
- * For this reason, reconstruction is performed in minimum of
- * two steps. First, from offset 0 to short_size, then from short_size to
- * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
- * over both ranges. The split also enables removal of conditional expressions
- * from loop bodies, improving throughput of SIMD implementations.
- * For the best performance, all functions marked with raidz_inline attribute
- * must be inlined by compiler.
- *
- * parity data
- * columns columns
- * <----------> <------------------>
- * x y <----+ missing columns (x, y)
- * | |
- * +---+---+---+---+-v-+---+-v-+---+ ^ 0
- * | | | | | | | | | |
- * | | | | | | | | | |
- * | P | Q | R | D | D | D | D | D | |
- * | | | | 0 | 1 | 2 | 3 | 4 | |
- * | | | | | | | | | v
- * | | | | | +---+---+---+ ^ short_size
- * | | | | | | |
- * +---+---+---+---+---+ v big_size
- * <------------------> <---------->
- * big columns short columns
- *
- */
-
/*
* Functions calculate multiplication constants for data reconstruction.
* Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
* used parity columns for reconstruction.
* @rm RAIDZ map
* @tgtidx array of missing data indexes
- * @coeff output array of coefficients. Array must be user
- * provided and must hold minimum MUL_CNT values
+ * @coeff output array of coefficients. Array must be provided by
+ * user and must hold minimum MUL_CNT values.
*/
static noinline void
raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
@@ -390,240 +147,602 @@ raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
coeff[MUL_PQR_YQ] = yd;
}
+/*
+ * Method for zeroing a buffer (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @dsize Destination buffer size
+ * @private Unused
+ */
+static int
+raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
+{
+ v_t *dst = (v_t *) dc;
+ size_t i;
+
+ ZERO_DEFINE();
+
+ (void) private; /* unused */
+
+ ZERO(ZERO_D);
+
+ for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
+ STORE(dst + i, ZERO_D);
+ STORE(dst + i + ZERO_STRIDE, ZERO_D);
+ }
+
+ return (0);
+}
+
+#define raidz_zero(dabd, size) \
+{ \
+ abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL); \
+}
/*
- * Reconstruction using P parity
- * @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @ncols number of column
+ * Method for copying two buffers (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
*/
-static raidz_inline void
-REC_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const int ncols)
+static int
+raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
{
- int c;
- size_t ioff;
- const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t *col;
+ v_t *dst = (v_t *) dc;
+ const v_t *src = (v_t *) sc;
+ size_t i;
- REC_P_DEFINE();
+ COPY_DEFINE();
- for (ioff = off; ioff < end; ioff += (REC_P_STRIDE * sizeof (v_t))) {
- LOAD(COL_OFF(pcol, ioff), REC_P_X);
+ (void) private; /* unused */
- for (c = firstdc; c < x; c++) {
- col = &rm->rm_col[c];
- XOR_ACC(COL_OFF(col, ioff), REC_P_X);
- }
+ for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
+ LOAD(src + i, COPY_D);
+ STORE(dst + i, COPY_D);
- for (c++; c < ncols; c++) {
- col = &rm->rm_col[c];
- XOR_ACC(COL_OFF(col, ioff), REC_P_X);
- }
+ LOAD(src + i + COPY_STRIDE, COPY_D);
+ STORE(dst + i + COPY_STRIDE, COPY_D);
+ }
- STORE(COL_OFF(xcol, ioff), REC_P_X);
+ return (0);
+}
+
+
+#define raidz_copy(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
+}
+
+/*
+ * Method for adding (XORing) two buffers.
+ * Source and destination are XORed together and result is stored in
+ * destination buffer. This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *) dc;
+ const v_t *src = (v_t *) sc;
+ size_t i;
+
+ ADD_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
+ LOAD(dst + i, ADD_D);
+ XOR_ACC(src + i, ADD_D);
+ STORE(dst + i, ADD_D);
+
+ LOAD(dst + i + ADD_STRIDE, ADD_D);
+ XOR_ACC(src + i + ADD_STRIDE, ADD_D);
+ STORE(dst + i + ADD_STRIDE, ADD_D);
}
+
+ return (0);
+}
+
+#define raidz_add(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
}
/*
- * Reconstruct single data column using P parity
- * @rec_method REC_P_BLOCK()
+ * Method for multiplying a buffer with a constant in GF(2^8).
+ * Symbols from buffer are multiplied by a constant and result is stored
+ * back in the same buffer.
*
- * @rm RAIDZ map
- * @tgtidx array of missing data indexes
+ * @dc In/Out data buffer.
+ * @size Size of the buffer
+ * @private pointer to the multiplication constant (unsigned)
*/
-static raidz_inline int
-raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
+static int
+raidz_mul_abd(void *dc, size_t size, void *private)
+{
+ const unsigned mul = *((unsigned *) private);
+ v_t *d = (v_t *) dc;
+ size_t i;
+
+ MUL_DEFINE();
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
+ LOAD(d + i, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i, MUL_D);
+
+ LOAD(d + i + MUL_STRIDE, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i + MUL_STRIDE, MUL_D);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Syndrome generation/update macros
+ *
+ * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
+ */
+#define P_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ STORE((t), T); \
+}
+
+#define R_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define R_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ STORE((t), T); \
+}
+
+
+/*
+ * PARITY CALCULATION
+ *
+ * Macros *_SYNDROME are used for parity/syndrome calculation.
+ * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
+ * length of data column, and *_SYNDROME() macros are only for updating
+ * the parity/syndrome if data column is shorter.
+ *
+ * P parity is calculated using raidz_add_abd().
+ */
+
+/*
+ * Generate P parity (RAIDZ1)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_p_impl(raidz_map_t * const rm)
{
- const int x = tgtidx[TARGET_X];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t short_size = raidz_short_size(rm);
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t psize = rm->rm_col[CODE_P].rc_size;
+ abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
+ size_t size;
+ abd_t *dabd;
raidz_math_begin();
- /* 0 - short_size */
- REC_P_BLOCK(rm, 0, short_size, x, ncols);
+ /* start with first data column */
+ raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
- /* short_size - xsize */
- REC_P_BLOCK(rm, short_size, xsize, x, nbigcols);
+ for (c = 2; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ size = rm->rm_col[c].rc_size;
- raidz_math_end();
+ /* add data column */
+ raidz_add(pabd, dabd, size);
+ }
- return (1 << CODE_P);
+ raidz_math_end();
}
+
/*
- * Reconstruct using Q parity
+ * Generate PQ parity (RAIDZ2)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
*/
+static void
+raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *) c[0];
+ v_t *q = (v_t *) c[1];
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQ_DEFINE();
-#define REC_Q_SYN_UPDATE() MUL2(REC_Q_X)
+ MUL2_SETUP();
-#define REC_Q_INNER_LOOP(c) \
-{ \
- col = &rm->rm_col[c]; \
- REC_Q_SYN_UPDATE(); \
- XOR_ACC(COL_OFF(col, ioff), REC_Q_X); \
+ for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
+ q += GEN_PQ_STRIDE) {
+ LOAD(d, GEN_PQ_D);
+ P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
+ Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
+ }
+ for (; q < qend; q += GEN_PQ_STRIDE) {
+ Q_SYNDROME(GEN_PQ_C, q);
+ }
}
+
/*
- * Reconstruction using Q parity
- * @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @coeff multiplication coefficients
- * @ncols number of column
- * @nbigcols number of big columns
+ * Generate PQ parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
*/
static raidz_inline void
-REC_Q_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const unsigned *coeff, const int ncols, const int nbigcols)
+raidz_generate_pq_impl(raidz_map_t * const rm)
{
- int c;
- size_t ioff = 0;
- const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t *col;
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
- REC_Q_DEFINE();
+ raidz_math_begin();
- for (ioff = off; ioff < end; ioff += (REC_Q_STRIDE * sizeof (v_t))) {
- MUL2_SETUP();
+ raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
- ZERO(REC_Q_X);
+ for (c = 3; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
- if (ncols == nbigcols) {
- for (c = firstdc; c < x; c++)
- REC_Q_INNER_LOOP(c);
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
+ raidz_gen_pq_add);
+ }
- REC_Q_SYN_UPDATE();
- for (c++; c < nbigcols; c++)
- REC_Q_INNER_LOOP(c);
- } else {
- for (c = firstdc; c < nbigcols; c++) {
- REC_Q_SYN_UPDATE();
- if (x != c) {
- col = &rm->rm_col[c];
- XOR_ACC(COL_OFF(col, ioff), REC_Q_X);
- }
- }
- for (; c < ncols; c++)
- REC_Q_SYN_UPDATE();
- }
+ raidz_math_end();
+}
- XOR_ACC(COL_OFF(qcol, ioff), REC_Q_X);
- MUL(coeff[MUL_Q_X], REC_Q_X);
- STORE(COL_OFF(xcol, ioff), REC_Q_X);
+
+/*
+ * Generate PQR parity (RAIDZ3)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *) c[0];
+ v_t *q = (v_t *) c[1];
+ v_t *r = (v_t *) c[CODE_R];
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
+ q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ LOAD(d, GEN_PQR_D);
+ P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
+ Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
+ R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
+ }
+ for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ Q_SYNDROME(GEN_PQR_C, q);
+ R_SYNDROME(GEN_PQR_C, r);
}
}
+
/*
- * Reconstruct single data column using Q parity
- * @rec_method REC_Q_BLOCK()
+ * Generate PQR parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pqr_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
+
+ for (c = 4; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
+ raidz_gen_pqr_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * DATA RECONSTRUCTION
+ *
+ * Data reconstruction process consists of two phases:
+ * - Syndrome calculation
+ * - Data reconstruction
+ *
+ * Syndrome is calculated by generating parity using available data columns
+ * and zeros in places of erasure. Existing parity is added to corresponding
+ * syndrome value to obtain the [P|Q|R]syn values from equation:
+ * P = Psyn + Dx + Dy + Dz
+ * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
+ * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
+ *
+ * For data reconstruction phase, the corresponding equations are solved
+ * for missing data (Dx, Dy, Dz). This generally involves multiplying known
+ * symbols by an coefficient and adding them together. The multiplication
+ * constant coefficients are calculated ahead of the operation in
+ * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
+ *
+ * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
+ * and "short" columns.
+ * For this reason, reconstruction is performed in minimum of
+ * two steps. First, from offset 0 to short_size, then from short_size to
+ * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
+ * over both ranges. The split also enables removal of conditional expressions
+ * from loop bodies, improving throughput of SIMD implementations.
+ * For the best performance, all functions marked with raidz_inline attribute
+ * must be inlined by compiler.
+ *
+ * parity data
+ * columns columns
+ * <----------> <------------------>
+ * x y <----+ missing columns (x, y)
+ * | |
+ * +---+---+---+---+-v-+---+-v-+---+ ^ 0
+ * | | | | | | | | | |
+ * | | | | | | | | | |
+ * | P | Q | R | D | D | D | D | D | |
+ * | | | | 0 | 1 | 2 | 3 | 4 | |
+ * | | | | | | | | | v
+ * | | | | | +---+---+---+ ^ short_size
+ * | | | | | | |
+ * +---+---+---+---+---+ v big_size
+ * <------------------> <---------->
+ * big columns short columns
+ *
+ */
+
+
+
+
+/*
+ * Reconstruct single data column using P parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method not applicable
*
* @rm RAIDZ map
* @tgtidx array of missing data indexes
*/
static raidz_inline int
-raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
+raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
{
- const int x = tgtidx[TARGET_X];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t short_size = raidz_short_size(rm);
- unsigned coeff[MUL_CNT];
-
- raidz_rec_q_coeff(rm, tgtidx, coeff);
+ size_t c;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ size_t size;
+ abd_t *dabd;
raidz_math_begin();
- /* 0 - short_size */
- REC_Q_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols);
+ /* copy P into target */
+ raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
+
+ /* generate p_syndrome */
+ for (c = firstdc; c < ncols; c++) {
+ if (c == x)
+ continue;
- /* short_size - xsize */
- REC_Q_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols);
+ dabd = rm->rm_col[c].rc_abd;
+ size = MIN(rm->rm_col[c].rc_size, xsize);
+
+ raidz_add(xabd, dabd, size);
+ }
raidz_math_end();
- return (1 << CODE_Q);
+ return (1 << CODE_P);
}
+
/*
- * Reconstruct using R parity
+ * Generate Q syndrome (Qsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @xsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
*/
+static void
+raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *) xc[TARGET_X];
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (xsize / sizeof (v_t));
+
+ SYN_Q_DEFINE();
+
+ MUL2_SETUP();
-#define REC_R_SYN_UPDATE() MUL4(REC_R_X)
-#define REC_R_INNER_LOOP(c) \
-{ \
- col = &rm->rm_col[c]; \
- REC_R_SYN_UPDATE(); \
- XOR_ACC(COL_OFF(col, ioff), REC_R_X); \
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_Q_D);
+ Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ Q_SYNDROME(SYN_Q_X, x);
+ }
}
+
/*
- * Reconstruction using R parity
+ * Reconstruct single data column using Q parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd()
+ *
* @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @coeff multiplication coefficients
- * @ncols number of column
- * @nbigcols number of big columns
+ * @tgtidx array of missing data indexes
*/
-static raidz_inline void
-REC_R_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const unsigned *coeff, const int ncols, const int nbigcols)
+static raidz_inline int
+raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
{
- int c;
- size_t ioff = 0;
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t *col;
-
- REC_R_DEFINE();
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *tabds[] = { xabd };
- for (ioff = off; ioff < end; ioff += (REC_R_STRIDE * sizeof (v_t))) {
- MUL2_SETUP();
+ unsigned coeff[MUL_CNT];
+ raidz_rec_q_coeff(rm, tgtidx, coeff);
- ZERO(REC_R_X);
+ raidz_math_begin();
- if (ncols == nbigcols) {
- for (c = firstdc; c < x; c++)
- REC_R_INNER_LOOP(c);
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
- REC_R_SYN_UPDATE();
- for (c++; c < nbigcols; c++)
- REC_R_INNER_LOOP(c);
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
} else {
- for (c = firstdc; c < nbigcols; c++) {
- REC_R_SYN_UPDATE();
- if (c != x) {
- col = &rm->rm_col[c];
- XOR_ACC(COL_OFF(col, ioff), REC_R_X);
- }
- }
- for (; c < ncols; c++)
- REC_R_SYN_UPDATE();
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
}
- XOR_ACC(COL_OFF(rcol, ioff), REC_R_X);
- MUL(coeff[MUL_R_X], REC_R_X);
- STORE(COL_OFF(xcol, ioff), REC_R_X);
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_q_abd);
}
+
+ /* add Q to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd, (void*) coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_Q);
}
+
+/*
+ * Generate R syndrome (Rsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *) xc[TARGET_X];
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+
+ SYN_R_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_R_D);
+ R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ R_SYNDROME(SYN_R_X, x);
+ }
+}
+
+
/*
* Reconstruct single data column using R parity
- * @rec_method REC_R_BLOCK()
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd()
*
* @rm RAIDZ map
* @tgtidx array of missing data indexes
@@ -631,122 +750,136 @@ REC_R_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
static raidz_inline int
raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
{
- const int x = tgtidx[TARGET_X];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t short_size = raidz_short_size(rm);
- unsigned coeff[MUL_CNT];
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *tabds[] = { xabd };
+ unsigned coeff[MUL_CNT];
raidz_rec_r_coeff(rm, tgtidx, coeff);
raidz_math_begin();
- /* 0 - short_size */
- REC_R_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols);
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_r_abd);
+ }
- /* short_size - xsize */
- REC_R_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols);
+ /* add R to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd, (void *)coeff);
raidz_math_end();
return (1 << CODE_R);
}
+
/*
- * Reconstruct using PQ parity
+ * Generate P and Q syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
*/
+static void
+raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *) tc[TARGET_X];
+ v_t *y = (v_t *) tc[TARGET_Y];
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
-#define REC_PQ_SYN_UPDATE() MUL2(REC_PQ_Y)
-#define REC_PQ_INNER_LOOP(c) \
-{ \
- col = &rm->rm_col[c]; \
- LOAD(COL_OFF(col, ioff), REC_PQ_D); \
- REC_PQ_SYN_UPDATE(); \
- XOR(REC_PQ_D, REC_PQ_X); \
- XOR(REC_PQ_D, REC_PQ_Y); \
+ SYN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
+ Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQ_X, y);
+ }
}
/*
- * Reconstruction using PQ parity
- * @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @y missing data column
- * @coeff multiplication coefficients
- * @ncols number of column
- * @nbigcols number of big columns
- * @calcy calculate second data column
+ * Reconstruct data using PQ parity and PQ syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
*/
-static raidz_inline void
-REC_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const int y, const unsigned *coeff, const int ncols,
- const int nbigcols, const boolean_t calcy)
+static void
+raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
+ const unsigned *mul)
{
- int c;
- size_t ioff = 0;
- const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t * const ycol = raidz_col_p(rm, y);
- raidz_col_t *col;
+ v_t *x = (v_t *) tc[TARGET_X];
+ v_t *y = (v_t *) tc[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *) c[CODE_P];
+ const v_t *q = (v_t *) c[CODE_Q];
REC_PQ_DEFINE();
- for (ioff = off; ioff < end; ioff += (REC_PQ_STRIDE * sizeof (v_t))) {
- LOAD(COL_OFF(pcol, ioff), REC_PQ_X);
- ZERO(REC_PQ_Y);
- MUL2_SETUP();
+ for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
+ p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
+ LOAD(x, REC_PQ_X);
+ LOAD(y, REC_PQ_Y);
- if (ncols == nbigcols) {
- for (c = firstdc; c < x; c++)
- REC_PQ_INNER_LOOP(c);
-
- REC_PQ_SYN_UPDATE();
- for (c++; c < y; c++)
- REC_PQ_INNER_LOOP(c);
-
- REC_PQ_SYN_UPDATE();
- for (c++; c < nbigcols; c++)
- REC_PQ_INNER_LOOP(c);
- } else {
- for (c = firstdc; c < nbigcols; c++) {
- REC_PQ_SYN_UPDATE();
- if (c != x && c != y) {
- col = &rm->rm_col[c];
- LOAD(COL_OFF(col, ioff), REC_PQ_D);
- XOR(REC_PQ_D, REC_PQ_X);
- XOR(REC_PQ_D, REC_PQ_Y);
- }
- }
- for (; c < ncols; c++)
- REC_PQ_SYN_UPDATE();
- }
-
- XOR_ACC(COL_OFF(qcol, ioff), REC_PQ_Y);
+ XOR_ACC(p, REC_PQ_X);
+ XOR_ACC(q, REC_PQ_Y);
/* Save Pxy */
- COPY(REC_PQ_X, REC_PQ_D);
+ COPY(REC_PQ_X, REC_PQ_T);
/* Calc X */
- MUL(coeff[MUL_PQ_X], REC_PQ_X);
- MUL(coeff[MUL_PQ_Y], REC_PQ_Y);
+ MUL(mul[MUL_PQ_X], REC_PQ_X);
+ MUL(mul[MUL_PQ_Y], REC_PQ_Y);
XOR(REC_PQ_Y, REC_PQ_X);
- STORE(COL_OFF(xcol, ioff), REC_PQ_X);
+ STORE(x, REC_PQ_X);
- if (calcy) {
- /* Calc Y */
- XOR(REC_PQ_D, REC_PQ_X);
- STORE(COL_OFF(ycol, ioff), REC_PQ_X);
- }
+ /* Calc Y */
+ XOR(REC_PQ_T, REC_PQ_X);
+ STORE(y, REC_PQ_X);
}
}
+
/*
* Reconstruct two data columns using PQ parity
- * @rec_method REC_PQ_BLOCK()
+ *
+ * @syn_method raidz_syn_pq_abd()
+ * @rec_method raidz_rec_pq_abd()
*
* @rm RAIDZ map
* @tgtidx array of missing data indexes
@@ -754,126 +887,156 @@ REC_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
static raidz_inline int
raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
{
- const int x = tgtidx[TARGET_X];
- const int y = tgtidx[TARGET_Y];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t ysize = raidz_col_size(rm, y);
- const size_t short_size = raidz_short_size(rm);
- unsigned coeff[MUL_CNT];
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
raidz_rec_pq_coeff(rm, tgtidx, coeff);
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
raidz_math_begin();
- /* 0 - short_size */
- REC_PQ_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
- /* short_size - xsize */
- REC_PQ_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
- xsize == ysize);
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pq_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
+
+ /* Copy shorter targets back to the original abd buffer */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
raidz_math_end();
+ if (ysize < xsize)
+ abd_free(yabd);
+
return ((1 << CODE_P) | (1 << CODE_Q));
}
+
/*
- * Reconstruct using PR parity
+ * Generate P and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
*/
+static void
+raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *) c[TARGET_X];
+ v_t *y = (v_t *) c[TARGET_Y];
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PR_DEFINE();
-#define REC_PR_SYN_UPDATE() MUL4(REC_PR_Y)
-#define REC_PR_INNER_LOOP(c) \
-{ \
- col = &rm->rm_col[c]; \
- LOAD(COL_OFF(col, ioff), REC_PR_D); \
- REC_PR_SYN_UPDATE(); \
- XOR(REC_PR_D, REC_PR_X); \
- XOR(REC_PR_D, REC_PR_Y); \
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PR_D);
+ P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
+ R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ R_SYNDROME(SYN_PR_X, y);
+ }
}
/*
- * Reconstruction using PR parity
- * @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @y missing data column
- * @coeff multiplication coefficients
- * @ncols number of column
- * @nbigcols number of big columns
- * @calcy calculate second data column
+ * Reconstruct data using PR parity and PR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
*/
-static raidz_inline void
-REC_PR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const int y, const unsigned *coeff, const int ncols,
- const int nbigcols, const boolean_t calcy)
+static void
+raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
{
- int c;
- size_t ioff;
- const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t * const ycol = raidz_col_p(rm, y);
- raidz_col_t *col;
+ v_t *x = (v_t *) t[TARGET_X];
+ v_t *y = (v_t *) t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *) c[CODE_P];
+ const v_t *q = (v_t *) c[CODE_Q];
REC_PR_DEFINE();
- for (ioff = off; ioff < end; ioff += (REC_PR_STRIDE * sizeof (v_t))) {
- LOAD(COL_OFF(pcol, ioff), REC_PR_X);
- ZERO(REC_PR_Y);
- MUL2_SETUP();
-
- if (ncols == nbigcols) {
- for (c = firstdc; c < x; c++)
- REC_PR_INNER_LOOP(c);
-
- REC_PR_SYN_UPDATE();
- for (c++; c < y; c++)
- REC_PR_INNER_LOOP(c);
-
- REC_PR_SYN_UPDATE();
- for (c++; c < nbigcols; c++)
- REC_PR_INNER_LOOP(c);
- } else {
- for (c = firstdc; c < nbigcols; c++) {
- REC_PR_SYN_UPDATE();
- if (c != x && c != y) {
- col = &rm->rm_col[c];
- LOAD(COL_OFF(col, ioff), REC_PR_D);
- XOR(REC_PR_D, REC_PR_X);
- XOR(REC_PR_D, REC_PR_Y);
- }
- }
- for (; c < ncols; c++)
- REC_PR_SYN_UPDATE();
- }
-
- XOR_ACC(COL_OFF(rcol, ioff), REC_PR_Y);
+ for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
+ p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
+ LOAD(x, REC_PR_X);
+ LOAD(y, REC_PR_Y);
+ XOR_ACC(p, REC_PR_X);
+ XOR_ACC(q, REC_PR_Y);
/* Save Pxy */
- COPY(REC_PR_X, REC_PR_D);
+ COPY(REC_PR_X, REC_PR_T);
/* Calc X */
- MUL(coeff[MUL_PR_X], REC_PR_X);
- MUL(coeff[MUL_PR_Y], REC_PR_Y);
+ MUL(mul[MUL_PR_X], REC_PR_X);
+ MUL(mul[MUL_PR_Y], REC_PR_Y);
XOR(REC_PR_Y, REC_PR_X);
- STORE(COL_OFF(xcol, ioff), REC_PR_X);
+ STORE(x, REC_PR_X);
- if (calcy) {
- /* Calc Y */
- XOR(REC_PR_D, REC_PR_X);
- STORE(COL_OFF(ycol, ioff), REC_PR_X);
- }
+ /* Calc Y */
+ XOR(REC_PR_T, REC_PR_X);
+ STORE(y, REC_PR_X);
}
}
/*
* Reconstruct two data columns using PR parity
- * @rec_method REC_PR_BLOCK()
+ *
+ * @syn_method raidz_syn_pr_abd()
+ * @rec_method raidz_rec_pr_abd()
*
* @rm RAIDZ map
* @tgtidx array of missing data indexes
@@ -881,134 +1044,162 @@ REC_PR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
static raidz_inline int
raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
{
- const int x = tgtidx[TARGET_X];
- const int y = tgtidx[TARGET_Y];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t ysize = raidz_col_size(rm, y);
- const size_t short_size = raidz_short_size(rm);
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[0];
+ const size_t y = tgtidx[1];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
unsigned coeff[MUL_CNT];
-
raidz_rec_pr_coeff(rm, tgtidx, coeff);
+ /*
+ * Check if some of targets are shorter then others.
+ * They need to be replaced with a new buffer so that syndrome can
+ * be calculated on full length.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
raidz_math_begin();
- /* 0 - short_size */
- REC_PR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
- /* short_size - xsize */
- REC_PR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
- xsize == ysize);
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
raidz_math_end();
- return ((1 << CODE_P) | (1 << CODE_R));
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
}
/*
- * Reconstruct using QR parity
+ * Generate Q and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
*/
+static void
+raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *) c[TARGET_X];
+ v_t *y = (v_t *) c[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
-#define REC_QR_SYN_UPDATE() \
-{ \
- MUL2(REC_QR_X); \
- MUL4(REC_QR_Y); \
-}
+ SYN_QR_DEFINE();
+
+ MUL2_SETUP();
-#define REC_QR_INNER_LOOP(c) \
-{ \
- col = &rm->rm_col[c]; \
- LOAD(COL_OFF(col, ioff), REC_QR_D); \
- REC_QR_SYN_UPDATE(); \
- XOR(REC_QR_D, REC_QR_X); \
- XOR(REC_QR_D, REC_QR_Y); \
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
+ R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
+ }
+ for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_QR_X, x);
+ R_SYNDROME(SYN_QR_X, y);
+ }
}
+
/*
- * Reconstruction using QR parity
- * @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @y missing data column
- * @coeff multiplication coefficients
- * @ncols number of column
- * @nbigcols number of big columns
- * @calcy calculate second data column
+ * Reconstruct data using QR parity and QR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
*/
-static raidz_inline void
-REC_QR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const int y, const unsigned *coeff, const int ncols,
- const int nbigcols, const boolean_t calcy)
+static void
+raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
{
- int c;
- size_t ioff;
- const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
- raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t * const ycol = raidz_col_p(rm, y);
- raidz_col_t *col;
+ v_t *x = (v_t *) t[TARGET_X];
+ v_t *y = (v_t *) t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *) c[CODE_P];
+ const v_t *q = (v_t *) c[CODE_Q];
REC_QR_DEFINE();
- for (ioff = off; ioff < end; ioff += (REC_QR_STRIDE * sizeof (v_t))) {
- MUL2_SETUP();
- ZERO(REC_QR_X);
- ZERO(REC_QR_Y);
-
- if (ncols == nbigcols) {
- for (c = firstdc; c < x; c++)
- REC_QR_INNER_LOOP(c);
+ for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
+ p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
+ LOAD(x, REC_QR_X);
+ LOAD(y, REC_QR_Y);
- REC_QR_SYN_UPDATE();
- for (c++; c < y; c++)
- REC_QR_INNER_LOOP(c);
+ XOR_ACC(p, REC_QR_X);
+ XOR_ACC(q, REC_QR_Y);
- REC_QR_SYN_UPDATE();
- for (c++; c < nbigcols; c++)
- REC_QR_INNER_LOOP(c);
- } else {
- for (c = firstdc; c < nbigcols; c++) {
- REC_QR_SYN_UPDATE();
- if (c != x && c != y) {
- col = &rm->rm_col[c];
- LOAD(COL_OFF(col, ioff), REC_QR_D);
- XOR(REC_QR_D, REC_QR_X);
- XOR(REC_QR_D, REC_QR_Y);
- }
- }
- for (; c < ncols; c++)
- REC_QR_SYN_UPDATE();
- }
-
- XOR_ACC(COL_OFF(qcol, ioff), REC_QR_X);
- XOR_ACC(COL_OFF(rcol, ioff), REC_QR_Y);
-
- /* Save Qxy */
- COPY(REC_QR_X, REC_QR_D);
+ /* Save Pxy */
+ COPY(REC_QR_X, REC_QR_T);
/* Calc X */
- MUL(coeff[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
- XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
- MUL(coeff[MUL_QR_X], REC_QR_X); /* X = X * xm */
- STORE(COL_OFF(xcol, ioff), REC_QR_X);
-
- if (calcy) {
- /* Calc Y */
- MUL(coeff[MUL_QR_YQ], REC_QR_D); /* X = Q * xqm */
- XOR(REC_QR_Y, REC_QR_D); /* X = R ^ X */
- MUL(coeff[MUL_QR_Y], REC_QR_D); /* X = X * xm */
- STORE(COL_OFF(ycol, ioff), REC_QR_D);
- }
+ MUL(mul[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
+ MUL(mul[MUL_QR_X], REC_QR_X); /* X = X * xm */
+ STORE(x, REC_QR_X);
+
+ /* Calc Y */
+ MUL(mul[MUL_QR_YQ], REC_QR_T); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_T); /* X = R ^ X */
+ MUL(mul[MUL_QR_Y], REC_QR_T); /* X = X * xm */
+ STORE(y, REC_QR_T);
}
}
+
/*
* Reconstruct two data columns using QR parity
- * @rec_method REC_QR_BLOCK()
+ *
+ * @syn_method raidz_syn_qr_abd()
+ * @rec_method raidz_rec_qr_abd()
*
* @rm RAIDZ map
* @tgtidx array of missing data indexes
@@ -1016,158 +1207,182 @@ REC_QR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
static raidz_inline int
raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
{
- const int x = tgtidx[TARGET_X];
- const int y = tgtidx[TARGET_Y];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t ysize = raidz_col_size(rm, y);
- const size_t short_size = raidz_short_size(rm);
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
unsigned coeff[MUL_CNT];
-
raidz_rec_qr_coeff(rm, tgtidx, coeff);
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
raidz_math_begin();
- /* 0 - short_size */
- REC_QR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
- /* short_size - xsize */
- REC_QR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
- xsize == ysize);
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_qr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
raidz_math_end();
+ if (ysize < xsize)
+ abd_free(yabd);
+
+
return ((1 << CODE_Q) | (1 << CODE_R));
}
+
/*
- * Reconstruct using PQR parity
+ * Generate P, Q, and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
*/
+static void
+raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *) c[TARGET_X];
+ v_t *y = (v_t *) c[TARGET_Y];
+ v_t *z = (v_t *) c[TARGET_Z];
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+ const v_t *d = (v_t *) dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
-#define REC_PQR_SYN_UPDATE() \
-{ \
- MUL2(REC_PQR_Y); \
- MUL4(REC_PQR_Z); \
-}
+ SYN_PQR_DEFINE();
-#define REC_PQR_INNER_LOOP(c) \
-{ \
- col = &rm->rm_col[(c)]; \
- LOAD(COL_OFF(col, ioff), REC_PQR_D); \
- REC_PQR_SYN_UPDATE(); \
- XOR(REC_PQR_D, REC_PQR_X); \
- XOR(REC_PQR_D, REC_PQR_Y); \
- XOR(REC_PQR_D, REC_PQR_Z); \
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
+ z += SYN_STRIDE) {
+ LOAD(d, SYN_PQR_D);
+ P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
+ Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
+ R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
+ }
+ for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQR_X, y);
+ R_SYNDROME(SYN_PQR_X, z);
+ }
}
+
/*
- * Reconstruction using PQR parity
- * @rm RAIDZ map
- * @off starting offset
- * @end ending offset
- * @x missing data column
- * @y missing data column
- * @z missing data column
- * @coeff multiplication coefficients
- * @ncols number of column
- * @nbigcols number of big columns
- * @calcy calculate second data column
- * @calcz calculate third data column
+ * Reconstruct data using PRQ parity and PQR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
*/
-static raidz_inline void
-REC_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
- const int x, const int y, const int z, const unsigned *coeff,
- const int ncols, const int nbigcols, const boolean_t calcy,
- const boolean_t calcz)
+static void
+raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
+ const unsigned * const mul)
{
- int c;
- size_t ioff;
- const size_t firstdc = raidz_parity(rm);
- raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
- raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
- raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
- raidz_col_t * const xcol = raidz_col_p(rm, x);
- raidz_col_t * const ycol = raidz_col_p(rm, y);
- raidz_col_t * const zcol = raidz_col_p(rm, z);
- raidz_col_t *col;
+ v_t *x = (v_t *) t[TARGET_X];
+ v_t *y = (v_t *) t[TARGET_Y];
+ v_t *z = (v_t *) t[TARGET_Z];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *) c[CODE_P];
+ const v_t *q = (v_t *) c[CODE_Q];
+ const v_t *r = (v_t *) c[CODE_R];
REC_PQR_DEFINE();
- for (ioff = off; ioff < end; ioff += (REC_PQR_STRIDE * sizeof (v_t))) {
- MUL2_SETUP();
- LOAD(COL_OFF(pcol, ioff), REC_PQR_X);
- ZERO(REC_PQR_Y);
- ZERO(REC_PQR_Z);
+ for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
+ z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
+ r += REC_PQR_STRIDE) {
+ LOAD(x, REC_PQR_X);
+ LOAD(y, REC_PQR_Y);
+ LOAD(z, REC_PQR_Z);
- if (ncols == nbigcols) {
- for (c = firstdc; c < x; c++)
- REC_PQR_INNER_LOOP(c);
-
- REC_PQR_SYN_UPDATE();
- for (c++; c < y; c++)
- REC_PQR_INNER_LOOP(c);
-
- REC_PQR_SYN_UPDATE();
- for (c++; c < z; c++)
- REC_PQR_INNER_LOOP(c);
-
- REC_PQR_SYN_UPDATE();
- for (c++; c < nbigcols; c++)
- REC_PQR_INNER_LOOP(c);
- } else {
- for (c = firstdc; c < nbigcols; c++) {
- REC_PQR_SYN_UPDATE();
- if (c != x && c != y && c != z) {
- col = &rm->rm_col[c];
- LOAD(COL_OFF(col, ioff), REC_PQR_D);
- XOR(REC_PQR_D, REC_PQR_X);
- XOR(REC_PQR_D, REC_PQR_Y);
- XOR(REC_PQR_D, REC_PQR_Z);
- }
- }
- for (; c < ncols; c++)
- REC_PQR_SYN_UPDATE();
- }
-
- XOR_ACC(COL_OFF(qcol, ioff), REC_PQR_Y);
- XOR_ACC(COL_OFF(rcol, ioff), REC_PQR_Z);
+ XOR_ACC(p, REC_PQR_X);
+ XOR_ACC(q, REC_PQR_Y);
+ XOR_ACC(r, REC_PQR_Z);
/* Save Pxyz and Qxyz */
COPY(REC_PQR_X, REC_PQR_XS);
COPY(REC_PQR_Y, REC_PQR_YS);
/* Calc X */
- MUL(coeff[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
- MUL(coeff[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
+ MUL(mul[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
+ MUL(mul[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
XOR(REC_PQR_Y, REC_PQR_X);
- MUL(coeff[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
+ MUL(mul[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */
- STORE(COL_OFF(xcol, ioff), REC_PQR_X);
-
- if (calcy) {
- /* Calc Y */
- XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
- MUL(coeff[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
- XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
- COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
- MUL(coeff[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
- MUL(coeff[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
- XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
- STORE(COL_OFF(ycol, ioff), REC_PQR_YS);
- }
-
- if (calcz) {
- /* Calc Z */
- XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
- STORE(COL_OFF(zcol, ioff), REC_PQR_YS);
- }
+ STORE(x, REC_PQR_X);
+
+ /* Calc Y */
+ XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
+ MUL(mul[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
+ COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
+ MUL(mul[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
+ MUL(mul[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
+ STORE(y, REC_PQR_YS);
+
+ /* Calc Z */
+ XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
+ STORE(z, REC_PQR_YS);
}
}
+
/*
* Reconstruct three data columns using PQR parity
- * @rec_method REC_PQR_BLOCK()
+ *
+ * @syn_method raidz_syn_pqr_abd()
+ * @rec_method raidz_rec_pqr_abd()
*
* @rm RAIDZ map
* @tgtidx array of missing data indexes
@@ -1175,31 +1390,87 @@ REC_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
static raidz_inline int
raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
{
- const int x = tgtidx[TARGET_X];
- const int y = tgtidx[TARGET_Y];
- const int z = tgtidx[TARGET_Z];
- const int ncols = raidz_ncols(rm);
- const int nbigcols = raidz_nbigcols(rm);
- const size_t xsize = raidz_col_size(rm, x);
- const size_t ysize = raidz_col_size(rm, y);
- const size_t zsize = raidz_col_size(rm, z);
- const size_t short_size = raidz_short_size(rm);
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t z = tgtidx[TARGET_Z];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ const size_t zsize = rm->rm_col[z].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *zabd = rm->rm_col[z].rc_abd;
+ abd_t *tabds[] = { xabd, yabd, zabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
unsigned coeff[MUL_CNT];
-
raidz_rec_pqr_coeff(rm, tgtidx, coeff);
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+ if (zsize < xsize) {
+ zabd = abd_alloc(xsize, B_FALSE);
+ tabds[2] = zabd;
+ }
+
raidz_math_begin();
- /* 0 - short_size */
- REC_PQR_BLOCK(rm, 0, short_size, x, y, z, coeff, ncols, ncols,
- B_TRUE, B_TRUE);
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ raidz_zero(zabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y || c == z) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
- /* short_size - xsize */
- REC_PQR_BLOCK(rm, short_size, xsize, x, y, z, coeff, ncols, nbigcols,
- xsize == ysize, xsize == zsize);
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
+ raidz_syn_pqr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+ if (zsize < xsize)
+ raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
raidz_math_end();
+ if (ysize < xsize)
+ abd_free(yabd);
+ if (zsize < xsize)
+ abd_free(zabd);
+
return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
}
diff --git a/module/zfs/vdev_raidz_math_scalar.c b/module/zfs/vdev_raidz_math_scalar.c
index 1d782b633..a693bff63 100644
--- a/module/zfs/vdev_raidz_math_scalar.c
+++ b/module/zfs/vdev_raidz_math_scalar.c
@@ -24,6 +24,7 @@
*/
#include <sys/vdev_raidz_impl.h>
+
/*
* Provide native CPU scalar routines.
* Support 32bit and 64bit CPUs.
@@ -153,71 +154,96 @@ static const struct {
#define raidz_math_begin() {}
#define raidz_math_end() {}
-#define GEN_P_DEFINE() v_t p0
-#define GEN_P_STRIDE 1
-#define GEN_P_P p0
-
-#define GEN_PQ_DEFINE() v_t d0, p0, q0
-#define GEN_PQ_STRIDE 1
-#define GEN_PQ_D d0
-#define GEN_PQ_P p0
-#define GEN_PQ_Q q0
-
-#define GEN_PQR_DEFINE() v_t d0, p0, q0, r0
-#define GEN_PQR_STRIDE 1
-#define GEN_PQR_D d0
-#define GEN_PQR_P p0
-#define GEN_PQR_Q q0
-#define GEN_PQR_R r0
-
-#define REC_P_DEFINE() v_t x0
-#define REC_P_STRIDE 1
-#define REC_P_X x0
-
-#define REC_Q_DEFINE() v_t x0
-#define REC_Q_STRIDE 1
-#define REC_Q_X x0
-
-#define REC_R_DEFINE() v_t x0
-#define REC_R_STRIDE 1
-#define REC_R_X x0
-
-#define REC_PQ_DEFINE() v_t x0, y0, d0
-#define REC_PQ_STRIDE 1
-#define REC_PQ_X x0
-#define REC_PQ_Y y0
-#define REC_PQ_D d0
-
-#define REC_PR_DEFINE() v_t x0, y0, d0
-#define REC_PR_STRIDE 1
-#define REC_PR_X x0
-#define REC_PR_Y y0
-#define REC_PR_D d0
-
-#define REC_QR_DEFINE() v_t x0, y0, d0
-#define REC_QR_STRIDE 1
-#define REC_QR_X x0
-#define REC_QR_Y y0
-#define REC_QR_D d0
-
-#define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
-#define REC_PQR_STRIDE 1
-#define REC_PQR_X x0
-#define REC_PQR_Y y0
-#define REC_PQR_Z z0
-#define REC_PQR_D d0
-#define REC_PQR_XS d0
-#define REC_PQR_YS t0
+#define SYN_STRIDE 1
-#include "vdev_raidz_math_impl.h"
+#define ZERO_DEFINE() v_t d0
+#define ZERO_STRIDE 1
+#define ZERO_D d0
-/*
- * If compiled with -O0, gcc doesn't do any stack frame coalescing
- * and -Wframe-larger-than=1024 is triggered in debug mode.
- * Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
- * does not trigger this warning.
- */
-#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#define COPY_DEFINE() v_t d0
+#define COPY_STRIDE 1
+#define COPY_D d0
+
+#define ADD_DEFINE() v_t d0
+#define ADD_STRIDE 1
+#define ADD_D d0
+
+#define MUL_DEFINE() v_t d0
+#define MUL_STRIDE 1
+#define MUL_D d0
+
+#define GEN_P_STRIDE 1
+#define GEN_P_DEFINE() v_t p0
+#define GEN_P_P p0
+
+#define GEN_PQ_STRIDE 1
+#define GEN_PQ_DEFINE() v_t d0, c0
+#define GEN_PQ_D d0
+#define GEN_PQ_C c0
+
+#define GEN_PQR_STRIDE 1
+#define GEN_PQR_DEFINE() v_t d0, c0
+#define GEN_PQR_D d0
+#define GEN_PQR_C c0
+
+#define SYN_Q_DEFINE() v_t d0, x0
+#define SYN_Q_D d0
+#define SYN_Q_X x0
+
+
+#define SYN_R_DEFINE() v_t d0, x0
+#define SYN_R_D d0
+#define SYN_R_X x0
+
+
+#define SYN_PQ_DEFINE() v_t d0, x0
+#define SYN_PQ_D d0
+#define SYN_PQ_X x0
+
+
+#define REC_PQ_STRIDE 1
+#define REC_PQ_DEFINE() v_t x0, y0, t0
+#define REC_PQ_X x0
+#define REC_PQ_Y y0
+#define REC_PQ_T t0
+
+
+#define SYN_PR_DEFINE() v_t d0, x0
+#define SYN_PR_D d0
+#define SYN_PR_X x0
+
+#define REC_PR_STRIDE 1
+#define REC_PR_DEFINE() v_t x0, y0, t0
+#define REC_PR_X x0
+#define REC_PR_Y y0
+#define REC_PR_T t0
+
+
+#define SYN_QR_DEFINE() v_t d0, x0
+#define SYN_QR_D d0
+#define SYN_QR_X x0
+
+
+#define REC_QR_STRIDE 1
+#define REC_QR_DEFINE() v_t x0, y0, t0
+#define REC_QR_X x0
+#define REC_QR_Y y0
+#define REC_QR_T t0
+
+
+#define SYN_PQR_DEFINE() v_t d0, x0
+#define SYN_PQR_D d0
+#define SYN_PQR_X x0
+
+#define REC_PQR_STRIDE 1
+#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
+#define REC_PQR_X x0
+#define REC_PQR_Y y0
+#define REC_PQR_Z z0
+#define REC_PQR_XS xs0
+#define REC_PQR_YS ys0
+
+#include "vdev_raidz_math_impl.h"
DEFINE_GEN_METHODS(scalar);
DEFINE_REC_METHODS(scalar);
diff --git a/module/zfs/vdev_raidz_math_sse2.c b/module/zfs/vdev_raidz_math_sse2.c
index 6fc81215a..97ddfc989 100644
--- a/module/zfs/vdev_raidz_math_sse2.c
+++ b/module/zfs/vdev_raidz_math_sse2.c
@@ -236,6 +236,10 @@ typedef struct v {
#define MUL2(r...) \
{ \
switch (REG_CNT(r)) { \
+ case 4: \
+ _MUL2_x2(VR0(r), VR1(r)); \
+ _MUL2_x2(VR2(r), VR3(r)); \
+ break; \
case 2: \
_MUL2_x2(VR0(r), VR1(r)); \
break; \
@@ -271,8 +275,8 @@ typedef struct v {
if (x & 0x80) { MUL2(in); XOR(in, acc); } \
}
-#define _mul_x1_in 9
-#define _mul_x1_acc 11
+#define _mul_x1_in 11
+#define _mul_x1_acc 12
#define MUL_x1_DEFINE(x) \
static void \
@@ -533,61 +537,87 @@ gf_x2_mul_fns[256] = {
#define raidz_math_begin() kfpu_begin()
#define raidz_math_end() kfpu_end()
-#define GEN_P_DEFINE() {}
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 2
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1
+
#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3
+#define GEN_PQ_STRIDE 4
#define GEN_PQ_DEFINE() {}
-#define GEN_PQ_STRIDE 2
-#define GEN_PQ_D 0, 1
-#define GEN_PQ_P 2, 3
-#define GEN_PQ_Q 4, 5
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {}
-#define GEN_PQR_STRIDE 2
-#define GEN_PQR_D 0, 1
-#define GEN_PQR_P 2, 3
-#define GEN_PQR_Q 4, 5
-#define GEN_PQR_R 6, 7
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
-#define REC_P_DEFINE() {}
-#define REC_P_STRIDE 4
-#define REC_P_X 0, 1, 2, 3
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
-#define REC_Q_DEFINE() {}
-#define REC_Q_STRIDE 2
-#define REC_Q_X 0, 1
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
-#define REC_R_DEFINE() {}
-#define REC_R_STRIDE 2
-#define REC_R_X 0, 1
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
-#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
-#define REC_PQ_D 4, 5
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
-#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
-#define REC_PR_D 4, 5
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
-#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
-#define REC_QR_D 4, 5
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
-#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 1
+#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0
#define REC_PQR_Y 1
#define REC_PQR_Z 2
-#define REC_PQR_D 3
-#define REC_PQR_XS 4
-#define REC_PQR_YS 5
+#define REC_PQR_XS 3
+#define REC_PQR_YS 4
#include <sys/vdev_raidz_impl.h>
diff --git a/module/zfs/vdev_raidz_math_ssse3.c b/module/zfs/vdev_raidz_math_ssse3.c
index 81f1b9a07..d8fa8fb82 100644
--- a/module/zfs/vdev_raidz_math_ssse3.c
+++ b/module/zfs/vdev_raidz_math_ssse3.c
@@ -337,59 +337,86 @@ typedef struct v {
#define raidz_math_begin() kfpu_begin()
#define raidz_math_end() kfpu_end()
-#define GEN_P_DEFINE() {}
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1, 2, 3
+
#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
#define GEN_P_P 0, 1, 2, 3
-#define GEN_PQ_DEFINE() {}
#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
#define GEN_PQ_D 0, 1, 2, 3
-#define GEN_PQ_P 4, 5, 6, 7
-#define GEN_PQ_Q 8, 9, 10, 11
+#define GEN_PQ_C 4, 5, 6, 7
+#define GEN_PQR_STRIDE 4
#define GEN_PQR_DEFINE() {}
-#define GEN_PQR_STRIDE 2
-#define GEN_PQR_D 0, 1
-#define GEN_PQR_P 2, 3
-#define GEN_PQR_Q 4, 5
-#define GEN_PQR_R 6, 7
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
-#define REC_P_DEFINE() {}
-#define REC_P_STRIDE 4
-#define REC_P_X 0, 1, 2, 3
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
-#define REC_Q_DEFINE() {}
-#define REC_Q_STRIDE 4
-#define REC_Q_X 0, 1, 2, 3
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
-#define REC_R_DEFINE() {}
-#define REC_R_STRIDE 4
-#define REC_R_X 0, 1, 2, 3
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
-#define REC_PQ_DEFINE() {}
#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() {}
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
-#define REC_PQ_D 4, 5
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
-#define REC_PR_DEFINE() {}
#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() {}
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
-#define REC_PR_D 4, 5
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
-#define REC_QR_DEFINE() {}
#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() {}
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
-#define REC_QR_D 4, 5
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
-#define REC_PQR_DEFINE() {}
#define REC_PQR_STRIDE 2
+#define REC_PQR_DEFINE() {}
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
-#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
@@ -403,13 +430,8 @@ DEFINE_REC_METHODS(ssse3);
static boolean_t
raidz_will_ssse3_work(void)
{
-/* ABD Bringup -- vector code not ready */
-#if 1
- return (B_FALSE);
-#else
return (zfs_sse_available() && zfs_sse2_available() &&
zfs_ssse3_available());
-#endif
}
const raidz_impl_ops_t vdev_raidz_ssse3_impl = {