diff options
author | Gvozden Neskovic <[email protected]> | 2016-08-24 15:51:33 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-11-29 14:34:33 -0800 |
commit | cbf484f8ad26b84a17c5308af47d2c202e1dc9e9 (patch) | |
tree | b5739d61fe437b5f024eddaa061980b693a06088 /module | |
parent | a206522c4fd31f03f14ba174d6159b72acfae0a9 (diff) |
ABD Vectorized raidz
Enable vectorized raidz code on ABD buffers. The avx512f,
avx512bw, neon and aarch64_neonx2 are disabled in this commit.
With the exception of avx512bw these implementations are
updated for ABD in the subsequent commits.
Signed-off-by: Gvozden Neskovic <[email protected]>
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/vdev_raidz_math.c | 39 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_aarch64_neon.c | 5 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_aarch64_neonx2.c | 2 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_avx2.c | 84 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_avx512bw.c | 18 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_avx512f.c | 17 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_impl.h | 1849 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_scalar.c | 152 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_sse2.c | 92 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_ssse3.c | 84 |
10 files changed, 1363 insertions, 979 deletions
diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index 1e4bf8413..93d7964d2 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -44,16 +44,6 @@ static raidz_impl_ops_t vdev_raidz_fastest_impl = { .name = "fastest" }; -/* ABD BRINGUP -- not ready yet */ -#if 1 -#ifdef HAVE_SSSE3 -#undef HAVE_SSSE3 -#endif -#ifdef HAVE_AVX2 -#undef HAVE_AVX2 -#endif -#endif - /* All compiled in implementations */ const raidz_impl_ops_t *raidz_all_maths[] = { &vdev_raidz_original_impl, @@ -68,14 +58,14 @@ const raidz_impl_ops_t *raidz_all_maths[] = { &vdev_raidz_avx2_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */ - &vdev_raidz_avx512f_impl, + // &vdev_raidz_avx512f_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */ - &vdev_raidz_avx512bw_impl, + // &vdev_raidz_avx512bw_impl, #endif #if defined(__aarch64__) - &vdev_raidz_aarch64_neon_impl, - &vdev_raidz_aarch64_neonx2_impl, + // &vdev_raidz_aarch64_neon_impl, + // &vdev_raidz_aarch64_neonx2_impl, #endif }; @@ -159,8 +149,6 @@ vdev_raidz_math_generate(raidz_map_t *rm) { raidz_gen_f gen_parity = NULL; -/* ABD Bringup -- vector code not ready */ -#if 0 switch (raidz_parity(rm)) { case 1: gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P]; @@ -177,7 +165,6 @@ vdev_raidz_math_generate(raidz_map_t *rm) raidz_parity(rm)); break; } -#endif /* if method is NULL execute the original implementation */ if (gen_parity == NULL) @@ -188,8 +175,6 @@ vdev_raidz_math_generate(raidz_map_t *rm) return (0); } -/* ABD Bringup -- vector code not ready */ -#if 0 static raidz_rec_f reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid, const int nbaddata) @@ -244,7 +229,6 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid, } return ((raidz_rec_f) NULL); } -#endif /* * Select data reconstruction method for raidz_map @@ -256,31 +240,28 @@ int vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, const int *dt, const int nbaddata) { - raidz_rec_f rec_data = NULL; + raidz_rec_f rec_fn = NULL; -/* ABD Bringup -- vector code not ready */ -#if 0 switch (raidz_parity(rm)) { case PARITY_P: - rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata); + rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata); break; case PARITY_PQ: - rec_data = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata); + rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata); break; case PARITY_PQR: - rec_data = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata); + rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata); break; default: cmn_err(CE_PANIC, "invalid RAID-Z configuration %d", raidz_parity(rm)); break; } -#endif - if (rec_data == NULL) + if (rec_fn == NULL) return (RAIDZ_ORIGINAL_IMPL); else - return (rec_data(rm, dt)); + return (rec_fn(rm, dt)); } const char *raidz_gen_name[] = { diff --git a/module/zfs/vdev_raidz_math_aarch64_neon.c b/module/zfs/vdev_raidz_math_aarch64_neon.c index f6a433f10..7ba30ba5e 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon.c +++ b/module/zfs/vdev_raidz_math_aarch64_neon.c @@ -23,8 +23,9 @@ */ #include <sys/isa_defs.h> +#include <sys/types.h> -#if defined(__aarch64__) +#if 0 // defined(__aarch64__) #include "vdev_raidz_math_aarch64_neon_common.h" @@ -153,7 +154,7 @@ const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = { #endif /* defined(__aarch64__) */ -#if defined(__aarch64__) +#if 0 // defined(__aarch64__) const uint8_t __attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = { diff --git a/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/module/zfs/vdev_raidz_math_aarch64_neonx2.c index d8d1f1bce..e05deeb98 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neonx2.c +++ b/module/zfs/vdev_raidz_math_aarch64_neonx2.c @@ -24,7 +24,7 @@ #include <sys/isa_defs.h> -#if defined(__aarch64__) +#if 0 // defined(__aarch64__) #include "vdev_raidz_math_aarch64_neon_common.h" diff --git a/module/zfs/vdev_raidz_math_avx2.c b/module/zfs/vdev_raidz_math_avx2.c index 508c95f8d..25ba9fabd 100644 --- a/module/zfs/vdev_raidz_math_avx2.c +++ b/module/zfs/vdev_raidz_math_avx2.c @@ -334,59 +334,86 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F; kfpu_end(); \ } -#define GEN_P_DEFINE() {} + +#define SYN_STRIDE 4 + +#define ZERO_STRIDE 4 +#define ZERO_DEFINE() {} +#define ZERO_D 0, 1, 2, 3 + +#define COPY_STRIDE 4 +#define COPY_DEFINE() {} +#define COPY_D 0, 1, 2, 3 + +#define ADD_STRIDE 4 +#define ADD_DEFINE() {} +#define ADD_D 0, 1, 2, 3 + +#define MUL_STRIDE 4 +#define MUL_DEFINE() {} +#define MUL_D 0, 1, 2, 3 + #define GEN_P_STRIDE 4 +#define GEN_P_DEFINE() {} #define GEN_P_P 0, 1, 2, 3 -#define GEN_PQ_DEFINE() {} #define GEN_PQ_STRIDE 4 +#define GEN_PQ_DEFINE() {} #define GEN_PQ_D 0, 1, 2, 3 -#define GEN_PQ_P 4, 5, 6, 7 -#define GEN_PQ_Q 8, 9, 10, 11 +#define GEN_PQ_C 4, 5, 6, 7 +#define GEN_PQR_STRIDE 4 #define GEN_PQR_DEFINE() {} -#define GEN_PQR_STRIDE 2 -#define GEN_PQR_D 0, 1 -#define GEN_PQR_P 2, 3 -#define GEN_PQR_Q 4, 5 -#define GEN_PQR_R 6, 7 +#define GEN_PQR_D 0, 1, 2, 3 +#define GEN_PQR_C 4, 5, 6, 7 -#define REC_P_DEFINE() {} -#define REC_P_STRIDE 4 -#define REC_P_X 0, 1, 2, 3 +#define SYN_Q_DEFINE() {} +#define SYN_Q_D 0, 1, 2, 3 +#define SYN_Q_X 4, 5, 6, 7 -#define REC_Q_DEFINE() {} -#define REC_Q_STRIDE 4 -#define REC_Q_X 0, 1, 2, 3 +#define SYN_R_DEFINE() {} +#define SYN_R_D 0, 1, 2, 3 +#define SYN_R_X 4, 5, 6, 7 -#define REC_R_DEFINE() {} -#define REC_R_STRIDE 4 -#define REC_R_X 0, 1, 2, 3 +#define SYN_PQ_DEFINE() {} +#define SYN_PQ_D 0, 1, 2, 3 +#define SYN_PQ_X 4, 5, 6, 7 -#define REC_PQ_DEFINE() {} #define REC_PQ_STRIDE 2 +#define REC_PQ_DEFINE() {} #define REC_PQ_X 0, 1 #define REC_PQ_Y 2, 3 -#define REC_PQ_D 4, 5 +#define REC_PQ_T 4, 5 + +#define SYN_PR_DEFINE() {} +#define SYN_PR_D 0, 1, 2, 3 +#define SYN_PR_X 4, 5, 6, 7 -#define REC_PR_DEFINE() {} #define REC_PR_STRIDE 2 +#define REC_PR_DEFINE() {} #define REC_PR_X 0, 1 #define REC_PR_Y 2, 3 -#define REC_PR_D 4, 5 +#define REC_PR_T 4, 5 + +#define SYN_QR_DEFINE() {} +#define SYN_QR_D 0, 1, 2, 3 +#define SYN_QR_X 4, 5, 6, 7 -#define REC_QR_DEFINE() {} #define REC_QR_STRIDE 2 +#define REC_QR_DEFINE() {} #define REC_QR_X 0, 1 #define REC_QR_Y 2, 3 -#define REC_QR_D 4, 5 +#define REC_QR_T 4, 5 + +#define SYN_PQR_DEFINE() {} +#define SYN_PQR_D 0, 1, 2, 3 +#define SYN_PQR_X 4, 5, 6, 7 -#define REC_PQR_DEFINE() {} #define REC_PQR_STRIDE 2 +#define REC_PQR_DEFINE() {} #define REC_PQR_X 0, 1 #define REC_PQR_Y 2, 3 #define REC_PQR_Z 4, 5 -#define REC_PQR_D 6, 7 #define REC_PQR_XS 6, 7 #define REC_PQR_YS 8, 9 @@ -400,12 +427,7 @@ DEFINE_REC_METHODS(avx2); static boolean_t raidz_will_avx2_work(void) { -/* ABD Bringup -- vector code not ready */ -#if 1 - return (B_FALSE); -#else return (zfs_avx_available() && zfs_avx2_available()); -#endif } const raidz_impl_ops_t vdev_raidz_avx2_impl = { diff --git a/module/zfs/vdev_raidz_math_avx512bw.c b/module/zfs/vdev_raidz_math_avx512bw.c index bcbe657d0..465d1e569 100644 --- a/module/zfs/vdev_raidz_math_avx512bw.c +++ b/module/zfs/vdev_raidz_math_avx512bw.c @@ -24,7 +24,7 @@ #include <sys/isa_defs.h> -#if defined(__x86_64) && defined(HAVE_AVX512BW) +#if 0 // defined(__x86_64) && defined(HAVE_AVX512BW) #include <sys/types.h> #include <linux/simd_x86.h> @@ -345,6 +345,22 @@ static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F; kfpu_end(); \ } +#define ZERO_STRIDE 4 +#define ZERO_DEFINE() {} +#define ZERO_D 0, 1, 2, 3 + +#define COPY_STRIDE 4 +#define COPY_DEFINE() {} +#define COPY_D 0, 1, 2, 3 + +#define ADD_STRIDE 4 +#define ADD_DEFINE() {} +#define ADD_D 0, 1, 2, 3 + +#define MUL_STRIDE 4 +#define MUL_DEFINE() {} +#define MUL_D 0, 1, 2, 3 + #define GEN_P_DEFINE() {} #define GEN_P_STRIDE 4 #define GEN_P_P 0, 1, 2, 3 diff --git a/module/zfs/vdev_raidz_math_avx512f.c b/module/zfs/vdev_raidz_math_avx512f.c index cc3868bce..c2ccd875e 100644 --- a/module/zfs/vdev_raidz_math_avx512f.c +++ b/module/zfs/vdev_raidz_math_avx512f.c @@ -24,7 +24,7 @@ #include <sys/isa_defs.h> -#if defined(__x86_64) && defined(HAVE_AVX512F) +#if 0 // defined(__x86_64) && defined(HAVE_AVX512F) #include <sys/types.h> #include <linux/simd_x86.h> @@ -437,6 +437,21 @@ typedef struct v { kfpu_end(); \ } +#define ZERO_STRIDE 4 +#define ZERO_DEFINE() {} +#define ZERO_D 20, 21, 22, 23 + +#define COPY_STRIDE 4 +#define COPY_DEFINE() {} +#define COPY_D 20, 21, 22, 23 + +#define ADD_STRIDE 4 +#define ADD_DEFINE() {} +#define ADD_D 20, 21, 22, 23 + +#define MUL_STRIDE 4 +#define MUL_DEFINE() {} +#define MUL_D 20, 21, 22, 23 /* * This use zmm16-zmm31 registers to free up zmm0-zmm15 * to use with the AVX2 pshufb, see above diff --git a/module/zfs/vdev_raidz_math_impl.h b/module/zfs/vdev_raidz_math_impl.h index 53800fd72..a8e4a0740 100644 --- a/module/zfs/vdev_raidz_math_impl.h +++ b/module/zfs/vdev_raidz_math_impl.h @@ -32,257 +32,14 @@ #define noinline __attribute__((noinline)) #endif -/* Calculate data offset in raidz column, offset is in bytes */ -/* ADB BRINGUP -- needs to be refactored for ABD */ -#define COL_OFF(col, off) ((v_t *)(((char *)(col)->rc_abd) + (off))) - -/* - * PARITY CALCULATION - * An optimized function is called for a full length of data columns - * If RAIDZ map contains remainder columns (shorter columns) the same function - * is called for reminder of full columns. - * - * GEN_[P|PQ|PQR]_BLOCK() functions are designed to be efficiently in-lined by - * the compiler. This removes a lot of conditionals from the inside loop which - * makes the code faster, especially for vectorized code. - * They are also highly parametrized, allowing for each implementation to define - * most optimal stride, and register allocation. - */ - -static raidz_inline void -GEN_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int ncols) -{ - int c; - size_t ioff; - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t *col; - - GEN_P_DEFINE(); - - for (ioff = off; ioff < end; ioff += (GEN_P_STRIDE * sizeof (v_t))) { - LOAD(COL_OFF(&(rm->rm_col[1]), ioff), GEN_P_P); - - for (c = 2; c < ncols; c++) { - col = &rm->rm_col[c]; - XOR_ACC(COL_OFF(col, ioff), GEN_P_P); - } - - STORE(COL_OFF(pcol, ioff), GEN_P_P); - } -} - -/* - * Generate P parity (RAIDZ1) - * - * @rm RAIDZ map - */ -static raidz_inline void -raidz_generate_p_impl(raidz_map_t * const rm) -{ - const int ncols = raidz_ncols(rm); - const size_t psize = raidz_big_size(rm); - const size_t short_size = raidz_short_size(rm); - - panic("not ABD ready"); - - raidz_math_begin(); - - /* short_size */ - GEN_P_BLOCK(rm, 0, short_size, ncols); - - /* fullcols */ - GEN_P_BLOCK(rm, short_size, psize, raidz_nbigcols(rm)); - - raidz_math_end(); -} - -static raidz_inline void -GEN_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int ncols, const int nbigcols) -{ - int c; - size_t ioff; - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q); - raidz_col_t *col; - - GEN_PQ_DEFINE(); - - MUL2_SETUP(); - - for (ioff = off; ioff < end; ioff += (GEN_PQ_STRIDE * sizeof (v_t))) { - LOAD(COL_OFF(&rm->rm_col[2], ioff), GEN_PQ_P); - COPY(GEN_PQ_P, GEN_PQ_Q); - - for (c = 3; c < nbigcols; c++) { - col = &rm->rm_col[c]; - LOAD(COL_OFF(col, ioff), GEN_PQ_D); - MUL2(GEN_PQ_Q); - XOR(GEN_PQ_D, GEN_PQ_P); - XOR(GEN_PQ_D, GEN_PQ_Q); - } - - STORE(COL_OFF(pcol, ioff), GEN_PQ_P); - - for (; c < ncols; c++) - MUL2(GEN_PQ_Q); - - STORE(COL_OFF(qcol, ioff), GEN_PQ_Q); - } -} - -/* - * Generate PQ parity (RAIDZ2) - * - * @rm RAIDZ map - */ -static raidz_inline void -raidz_generate_pq_impl(raidz_map_t * const rm) -{ - const int ncols = raidz_ncols(rm); - const size_t psize = raidz_big_size(rm); - const size_t short_size = raidz_short_size(rm); - - panic("not ABD ready"); - - raidz_math_begin(); - - /* short_size */ - GEN_PQ_BLOCK(rm, 0, short_size, ncols, ncols); - - /* fullcols */ - GEN_PQ_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm)); - - raidz_math_end(); -} - - -static raidz_inline void -GEN_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int ncols, const int nbigcols) -{ - int c; - size_t ioff; - raidz_col_t *col; - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q); - raidz_col_t * const rcol = raidz_col_p(rm, CODE_R); - - GEN_PQR_DEFINE(); - - MUL2_SETUP(); - - for (ioff = off; ioff < end; ioff += (GEN_PQR_STRIDE * sizeof (v_t))) { - LOAD(COL_OFF(&rm->rm_col[3], ioff), GEN_PQR_P); - COPY(GEN_PQR_P, GEN_PQR_Q); - COPY(GEN_PQR_P, GEN_PQR_R); - - for (c = 4; c < nbigcols; c++) { - col = &rm->rm_col[c]; - LOAD(COL_OFF(col, ioff), GEN_PQR_D); - MUL2(GEN_PQR_Q); - MUL4(GEN_PQR_R); - XOR(GEN_PQR_D, GEN_PQR_P); - XOR(GEN_PQR_D, GEN_PQR_Q); - XOR(GEN_PQR_D, GEN_PQR_R); - } - - STORE(COL_OFF(pcol, ioff), GEN_PQR_P); - - for (; c < ncols; c++) { - MUL2(GEN_PQR_Q); - MUL4(GEN_PQR_R); - } - - STORE(COL_OFF(qcol, ioff), GEN_PQR_Q); - STORE(COL_OFF(rcol, ioff), GEN_PQR_R); - } -} - - -/* - * Generate PQR parity (RAIDZ3) - * - * @rm RAIDZ map - */ -static raidz_inline void -raidz_generate_pqr_impl(raidz_map_t * const rm) -{ - const int ncols = raidz_ncols(rm); - const size_t psize = raidz_big_size(rm); - const size_t short_size = raidz_short_size(rm); - - panic("not ABD ready"); - - raidz_math_begin(); - - /* short_size */ - GEN_PQR_BLOCK(rm, 0, short_size, ncols, ncols); - - /* fullcols */ - GEN_PQR_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm)); - - raidz_math_end(); -} - -/* - * DATA RECONSTRUCTION - * - * Data reconstruction process consists of two phases: - * - Syndrome calculation - * - Data reconstruction - * - * Syndrome is calculated by generating parity using available data columns - * and zeros in places of erasure. Existing parity is added to corresponding - * syndrome value to obtain the [P|Q|R]syn values from equation: - * P = Psyn + Dx + Dy + Dz - * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz - * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz - * - * For data reconstruction phase, the corresponding equations are solved - * for missing data (Dx, Dy, Dz). This generally involves multiplying known - * symbols by an coefficient and adding them together. The multiplication - * constant coefficients are calculated ahead of the operation in - * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions. - * - * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big" - * and "short" columns. - * For this reason, reconstruction is performed in minimum of - * two steps. First, from offset 0 to short_size, then from short_size to - * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work - * over both ranges. The split also enables removal of conditional expressions - * from loop bodies, improving throughput of SIMD implementations. - * For the best performance, all functions marked with raidz_inline attribute - * must be inlined by compiler. - * - * parity data - * columns columns - * <----------> <------------------> - * x y <----+ missing columns (x, y) - * | | - * +---+---+---+---+-v-+---+-v-+---+ ^ 0 - * | | | | | | | | | | - * | | | | | | | | | | - * | P | Q | R | D | D | D | D | D | | - * | | | | 0 | 1 | 2 | 3 | 4 | | - * | | | | | | | | | v - * | | | | | +---+---+---+ ^ short_size - * | | | | | | | - * +---+---+---+---+---+ v big_size - * <------------------> <----------> - * big columns short columns - * - */ - /* * Functions calculate multiplication constants for data reconstruction. * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and * used parity columns for reconstruction. * @rm RAIDZ map * @tgtidx array of missing data indexes - * @coeff output array of coefficients. Array must be user - * provided and must hold minimum MUL_CNT values + * @coeff output array of coefficients. Array must be provided by + * user and must hold minimum MUL_CNT values. */ static noinline void raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) @@ -390,240 +147,602 @@ raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) coeff[MUL_PQR_YQ] = yd; } +/* + * Method for zeroing a buffer (can be implemented using SIMD). + * This method is used by multiple for gen/rec functions. + * + * @dc Destination buffer + * @dsize Destination buffer size + * @private Unused + */ +static int +raidz_zero_abd_cb(void *dc, size_t dsize, void *private) +{ + v_t *dst = (v_t *) dc; + size_t i; + + ZERO_DEFINE(); + + (void) private; /* unused */ + + ZERO(ZERO_D); + + for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) { + STORE(dst + i, ZERO_D); + STORE(dst + i + ZERO_STRIDE, ZERO_D); + } + + return (0); +} + +#define raidz_zero(dabd, size) \ +{ \ + abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL); \ +} /* - * Reconstruction using P parity - * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @ncols number of column + * Method for copying two buffers (can be implemented using SIMD). + * This method is used by multiple for gen/rec functions. + * + * @dc Destination buffer + * @sc Source buffer + * @dsize Destination buffer size + * @ssize Source buffer size + * @private Unused */ -static raidz_inline void -REC_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const int ncols) +static int +raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private) { - int c; - size_t ioff; - const size_t firstdc = raidz_parity(rm); - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t *col; + v_t *dst = (v_t *) dc; + const v_t *src = (v_t *) sc; + size_t i; - REC_P_DEFINE(); + COPY_DEFINE(); - for (ioff = off; ioff < end; ioff += (REC_P_STRIDE * sizeof (v_t))) { - LOAD(COL_OFF(pcol, ioff), REC_P_X); + (void) private; /* unused */ - for (c = firstdc; c < x; c++) { - col = &rm->rm_col[c]; - XOR_ACC(COL_OFF(col, ioff), REC_P_X); - } + for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) { + LOAD(src + i, COPY_D); + STORE(dst + i, COPY_D); - for (c++; c < ncols; c++) { - col = &rm->rm_col[c]; - XOR_ACC(COL_OFF(col, ioff), REC_P_X); - } + LOAD(src + i + COPY_STRIDE, COPY_D); + STORE(dst + i + COPY_STRIDE, COPY_D); + } - STORE(COL_OFF(xcol, ioff), REC_P_X); + return (0); +} + + +#define raidz_copy(dabd, sabd, size) \ +{ \ + abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\ +} + +/* + * Method for adding (XORing) two buffers. + * Source and destination are XORed together and result is stored in + * destination buffer. This method is used by multiple for gen/rec functions. + * + * @dc Destination buffer + * @sc Source buffer + * @dsize Destination buffer size + * @ssize Source buffer size + * @private Unused + */ +static int +raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private) +{ + v_t *dst = (v_t *) dc; + const v_t *src = (v_t *) sc; + size_t i; + + ADD_DEFINE(); + + (void) private; /* unused */ + + for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) { + LOAD(dst + i, ADD_D); + XOR_ACC(src + i, ADD_D); + STORE(dst + i, ADD_D); + + LOAD(dst + i + ADD_STRIDE, ADD_D); + XOR_ACC(src + i + ADD_STRIDE, ADD_D); + STORE(dst + i + ADD_STRIDE, ADD_D); } + + return (0); +} + +#define raidz_add(dabd, sabd, size) \ +{ \ + abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\ } /* - * Reconstruct single data column using P parity - * @rec_method REC_P_BLOCK() + * Method for multiplying a buffer with a constant in GF(2^8). + * Symbols from buffer are multiplied by a constant and result is stored + * back in the same buffer. * - * @rm RAIDZ map - * @tgtidx array of missing data indexes + * @dc In/Out data buffer. + * @size Size of the buffer + * @private pointer to the multiplication constant (unsigned) */ -static raidz_inline int -raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx) +static int +raidz_mul_abd(void *dc, size_t size, void *private) +{ + const unsigned mul = *((unsigned *) private); + v_t *d = (v_t *) dc; + size_t i; + + MUL_DEFINE(); + + for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) { + LOAD(d + i, MUL_D); + MUL(mul, MUL_D); + STORE(d + i, MUL_D); + + LOAD(d + i + MUL_STRIDE, MUL_D); + MUL(mul, MUL_D); + STORE(d + i + MUL_STRIDE, MUL_D); + } + + return (0); +} + + +/* + * Syndrome generation/update macros + * + * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros + */ +#define P_D_SYNDROME(D, T, t) \ +{ \ + LOAD((t), T); \ + XOR(D, T); \ + STORE((t), T); \ +} + +#define Q_D_SYNDROME(D, T, t) \ +{ \ + LOAD((t), T); \ + MUL2(T); \ + XOR(D, T); \ + STORE((t), T); \ +} + +#define Q_SYNDROME(T, t) \ +{ \ + LOAD((t), T); \ + MUL2(T); \ + STORE((t), T); \ +} + +#define R_D_SYNDROME(D, T, t) \ +{ \ + LOAD((t), T); \ + MUL4(T); \ + XOR(D, T); \ + STORE((t), T); \ +} + +#define R_SYNDROME(T, t) \ +{ \ + LOAD((t), T); \ + MUL4(T); \ + STORE((t), T); \ +} + + +/* + * PARITY CALCULATION + * + * Macros *_SYNDROME are used for parity/syndrome calculation. + * *_D_SYNDROME() macros are used to calculate syndrome between 0 and + * length of data column, and *_SYNDROME() macros are only for updating + * the parity/syndrome if data column is shorter. + * + * P parity is calculated using raidz_add_abd(). + */ + +/* + * Generate P parity (RAIDZ1) + * + * @rm RAIDZ map + */ +static raidz_inline void +raidz_generate_p_impl(raidz_map_t * const rm) { - const int x = tgtidx[TARGET_X]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t short_size = raidz_short_size(rm); + size_t c; + const size_t ncols = raidz_ncols(rm); + const size_t psize = rm->rm_col[CODE_P].rc_size; + abd_t *pabd = rm->rm_col[CODE_P].rc_abd; + size_t size; + abd_t *dabd; raidz_math_begin(); - /* 0 - short_size */ - REC_P_BLOCK(rm, 0, short_size, x, ncols); + /* start with first data column */ + raidz_copy(pabd, rm->rm_col[1].rc_abd, psize); - /* short_size - xsize */ - REC_P_BLOCK(rm, short_size, xsize, x, nbigcols); + for (c = 2; c < ncols; c++) { + dabd = rm->rm_col[c].rc_abd; + size = rm->rm_col[c].rc_size; - raidz_math_end(); + /* add data column */ + raidz_add(pabd, dabd, size); + } - return (1 << CODE_P); + raidz_math_end(); } + /* - * Reconstruct using Q parity + * Generate PQ parity (RAIDZ2) + * The function is called per data column. + * + * @c array of pointers to parity (code) columns + * @dc pointer to data column + * @csize size of parity columns + * @dsize size of data column */ +static void +raidz_gen_pq_add(void **c, const void *dc, const size_t csize, + const size_t dsize) +{ + v_t *p = (v_t *) c[0]; + v_t *q = (v_t *) c[1]; + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); + const v_t * const qend = q + (csize / sizeof (v_t)); + + GEN_PQ_DEFINE(); -#define REC_Q_SYN_UPDATE() MUL2(REC_Q_X) + MUL2_SETUP(); -#define REC_Q_INNER_LOOP(c) \ -{ \ - col = &rm->rm_col[c]; \ - REC_Q_SYN_UPDATE(); \ - XOR_ACC(COL_OFF(col, ioff), REC_Q_X); \ + for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE, + q += GEN_PQ_STRIDE) { + LOAD(d, GEN_PQ_D); + P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p); + Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q); + } + for (; q < qend; q += GEN_PQ_STRIDE) { + Q_SYNDROME(GEN_PQ_C, q); + } } + /* - * Reconstruction using Q parity - * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @coeff multiplication coefficients - * @ncols number of column - * @nbigcols number of big columns + * Generate PQ parity (RAIDZ2) + * + * @rm RAIDZ map */ static raidz_inline void -REC_Q_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const unsigned *coeff, const int ncols, const int nbigcols) +raidz_generate_pq_impl(raidz_map_t * const rm) { - int c; - size_t ioff = 0; - const size_t firstdc = raidz_parity(rm); - raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t *col; + size_t c; + const size_t ncols = raidz_ncols(rm); + const size_t csize = rm->rm_col[CODE_P].rc_size; + size_t dsize; + abd_t *dabd; + abd_t *cabds[] = { + rm->rm_col[CODE_P].rc_abd, + rm->rm_col[CODE_Q].rc_abd + }; - REC_Q_DEFINE(); + raidz_math_begin(); - for (ioff = off; ioff < end; ioff += (REC_Q_STRIDE * sizeof (v_t))) { - MUL2_SETUP(); + raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize); + raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize); - ZERO(REC_Q_X); + for (c = 3; c < ncols; c++) { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; - if (ncols == nbigcols) { - for (c = firstdc; c < x; c++) - REC_Q_INNER_LOOP(c); + abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2, + raidz_gen_pq_add); + } - REC_Q_SYN_UPDATE(); - for (c++; c < nbigcols; c++) - REC_Q_INNER_LOOP(c); - } else { - for (c = firstdc; c < nbigcols; c++) { - REC_Q_SYN_UPDATE(); - if (x != c) { - col = &rm->rm_col[c]; - XOR_ACC(COL_OFF(col, ioff), REC_Q_X); - } - } - for (; c < ncols; c++) - REC_Q_SYN_UPDATE(); - } + raidz_math_end(); +} - XOR_ACC(COL_OFF(qcol, ioff), REC_Q_X); - MUL(coeff[MUL_Q_X], REC_Q_X); - STORE(COL_OFF(xcol, ioff), REC_Q_X); + +/* + * Generate PQR parity (RAIDZ3) + * The function is called per data column. + * + * @c array of pointers to parity (code) columns + * @dc pointer to data column + * @csize size of parity columns + * @dsize size of data column + */ +static void +raidz_gen_pqr_add(void **c, const void *dc, const size_t csize, + const size_t dsize) +{ + v_t *p = (v_t *) c[0]; + v_t *q = (v_t *) c[1]; + v_t *r = (v_t *) c[CODE_R]; + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); + const v_t * const qend = q + (csize / sizeof (v_t)); + + GEN_PQR_DEFINE(); + + MUL2_SETUP(); + + for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE, + q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) { + LOAD(d, GEN_PQR_D); + P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p); + Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q); + R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r); + } + for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) { + Q_SYNDROME(GEN_PQR_C, q); + R_SYNDROME(GEN_PQR_C, r); } } + /* - * Reconstruct single data column using Q parity - * @rec_method REC_Q_BLOCK() + * Generate PQR parity (RAIDZ2) + * + * @rm RAIDZ map + */ +static raidz_inline void +raidz_generate_pqr_impl(raidz_map_t * const rm) +{ + size_t c; + const size_t ncols = raidz_ncols(rm); + const size_t csize = rm->rm_col[CODE_P].rc_size; + size_t dsize; + abd_t *dabd; + abd_t *cabds[] = { + rm->rm_col[CODE_P].rc_abd, + rm->rm_col[CODE_Q].rc_abd, + rm->rm_col[CODE_R].rc_abd + }; + + raidz_math_begin(); + + raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize); + raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize); + raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize); + + for (c = 4; c < ncols; c++) { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; + + abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3, + raidz_gen_pqr_add); + } + + raidz_math_end(); +} + + +/* + * DATA RECONSTRUCTION + * + * Data reconstruction process consists of two phases: + * - Syndrome calculation + * - Data reconstruction + * + * Syndrome is calculated by generating parity using available data columns + * and zeros in places of erasure. Existing parity is added to corresponding + * syndrome value to obtain the [P|Q|R]syn values from equation: + * P = Psyn + Dx + Dy + Dz + * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz + * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz + * + * For data reconstruction phase, the corresponding equations are solved + * for missing data (Dx, Dy, Dz). This generally involves multiplying known + * symbols by an coefficient and adding them together. The multiplication + * constant coefficients are calculated ahead of the operation in + * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions. + * + * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big" + * and "short" columns. + * For this reason, reconstruction is performed in minimum of + * two steps. First, from offset 0 to short_size, then from short_size to + * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work + * over both ranges. The split also enables removal of conditional expressions + * from loop bodies, improving throughput of SIMD implementations. + * For the best performance, all functions marked with raidz_inline attribute + * must be inlined by compiler. + * + * parity data + * columns columns + * <----------> <------------------> + * x y <----+ missing columns (x, y) + * | | + * +---+---+---+---+-v-+---+-v-+---+ ^ 0 + * | | | | | | | | | | + * | | | | | | | | | | + * | P | Q | R | D | D | D | D | D | | + * | | | | 0 | 1 | 2 | 3 | 4 | | + * | | | | | | | | | v + * | | | | | +---+---+---+ ^ short_size + * | | | | | | | + * +---+---+---+---+---+ v big_size + * <------------------> <----------> + * big columns short columns + * + */ + + + + +/* + * Reconstruct single data column using P parity + * + * @syn_method raidz_add_abd() + * @rec_method not applicable * * @rm RAIDZ map * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx) { - const int x = tgtidx[TARGET_X]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t short_size = raidz_short_size(rm); - unsigned coeff[MUL_CNT]; - - raidz_rec_q_coeff(rm, tgtidx, coeff); + size_t c; + const size_t firstdc = raidz_parity(rm); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[TARGET_X]; + const size_t xsize = rm->rm_col[x].rc_size; + abd_t *xabd = rm->rm_col[x].rc_abd; + size_t size; + abd_t *dabd; raidz_math_begin(); - /* 0 - short_size */ - REC_Q_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols); + /* copy P into target */ + raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize); + + /* generate p_syndrome */ + for (c = firstdc; c < ncols; c++) { + if (c == x) + continue; - /* short_size - xsize */ - REC_Q_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols); + dabd = rm->rm_col[c].rc_abd; + size = MIN(rm->rm_col[c].rc_size, xsize); + + raidz_add(xabd, dabd, size); + } raidz_math_end(); - return (1 << CODE_Q); + return (1 << CODE_P); } + /* - * Reconstruct using R parity + * Generate Q syndrome (Qsyn) + * + * @xc array of pointers to syndrome columns + * @dc data column (NULL if missing) + * @xsize size of syndrome columns + * @dsize size of data column (0 if missing) */ +static void +raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize, + const size_t dsize) +{ + v_t *x = (v_t *) xc[TARGET_X]; + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); + const v_t * const xend = x + (xsize / sizeof (v_t)); + + SYN_Q_DEFINE(); + + MUL2_SETUP(); -#define REC_R_SYN_UPDATE() MUL4(REC_R_X) -#define REC_R_INNER_LOOP(c) \ -{ \ - col = &rm->rm_col[c]; \ - REC_R_SYN_UPDATE(); \ - XOR_ACC(COL_OFF(col, ioff), REC_R_X); \ + for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) { + LOAD(d, SYN_Q_D); + Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x); + } + for (; x < xend; x += SYN_STRIDE) { + Q_SYNDROME(SYN_Q_X, x); + } } + /* - * Reconstruction using R parity + * Reconstruct single data column using Q parity + * + * @syn_method raidz_add_abd() + * @rec_method raidz_mul_abd() + * * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @coeff multiplication coefficients - * @ncols number of column - * @nbigcols number of big columns + * @tgtidx array of missing data indexes */ -static raidz_inline void -REC_R_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const unsigned *coeff, const int ncols, const int nbigcols) +static raidz_inline int +raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx) { - int c; - size_t ioff = 0; + size_t c; + size_t dsize; + abd_t *dabd; const size_t firstdc = raidz_parity(rm); - raidz_col_t * const rcol = raidz_col_p(rm, CODE_R); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t *col; - - REC_R_DEFINE(); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[TARGET_X]; + abd_t *xabd = rm->rm_col[x].rc_abd; + const size_t xsize = rm->rm_col[x].rc_size; + abd_t *tabds[] = { xabd }; - for (ioff = off; ioff < end; ioff += (REC_R_STRIDE * sizeof (v_t))) { - MUL2_SETUP(); + unsigned coeff[MUL_CNT]; + raidz_rec_q_coeff(rm, tgtidx, coeff); - ZERO(REC_R_X); + raidz_math_begin(); - if (ncols == nbigcols) { - for (c = firstdc; c < x; c++) - REC_R_INNER_LOOP(c); + /* Start with first data column if present */ + if (firstdc != x) { + raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + } else { + raidz_zero(xabd, xsize); + } - REC_R_SYN_UPDATE(); - for (c++; c < nbigcols; c++) - REC_R_INNER_LOOP(c); + /* generate q_syndrome */ + for (c = firstdc+1; c < ncols; c++) { + if (c == x) { + dabd = NULL; + dsize = 0; } else { - for (c = firstdc; c < nbigcols; c++) { - REC_R_SYN_UPDATE(); - if (c != x) { - col = &rm->rm_col[c]; - XOR_ACC(COL_OFF(col, ioff), REC_R_X); - } - } - for (; c < ncols; c++) - REC_R_SYN_UPDATE(); + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; } - XOR_ACC(COL_OFF(rcol, ioff), REC_R_X); - MUL(coeff[MUL_R_X], REC_R_X); - STORE(COL_OFF(xcol, ioff), REC_R_X); + abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1, + raidz_syn_q_abd); } + + /* add Q to the syndrome */ + raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize); + + /* transform the syndrome */ + abd_iterate_func(xabd, 0, xsize, raidz_mul_abd, (void*) coeff); + + raidz_math_end(); + + return (1 << CODE_Q); } + +/* + * Generate R syndrome (Rsyn) + * + * @xc array of pointers to syndrome columns + * @dc data column (NULL if missing) + * @tsize size of syndrome columns + * @dsize size of data column (0 if missing) + */ +static void +raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize, + const size_t dsize) +{ + v_t *x = (v_t *) xc[TARGET_X]; + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); + const v_t * const xend = x + (tsize / sizeof (v_t)); + + SYN_R_DEFINE(); + + MUL2_SETUP(); + + for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) { + LOAD(d, SYN_R_D); + R_D_SYNDROME(SYN_R_D, SYN_R_X, x); + } + for (; x < xend; x += SYN_STRIDE) { + R_SYNDROME(SYN_R_X, x); + } +} + + /* * Reconstruct single data column using R parity - * @rec_method REC_R_BLOCK() + * + * @syn_method raidz_add_abd() + * @rec_method raidz_mul_abd() * * @rm RAIDZ map * @tgtidx array of missing data indexes @@ -631,122 +750,136 @@ REC_R_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, static raidz_inline int raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx) { - const int x = tgtidx[TARGET_X]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t short_size = raidz_short_size(rm); - unsigned coeff[MUL_CNT]; + size_t c; + size_t dsize; + abd_t *dabd; + const size_t firstdc = raidz_parity(rm); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[TARGET_X]; + const size_t xsize = rm->rm_col[x].rc_size; + abd_t *xabd = rm->rm_col[x].rc_abd; + abd_t *tabds[] = { xabd }; + unsigned coeff[MUL_CNT]; raidz_rec_r_coeff(rm, tgtidx, coeff); raidz_math_begin(); - /* 0 - short_size */ - REC_R_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols); + /* Start with first data column if present */ + if (firstdc != x) { + raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + } else { + raidz_zero(xabd, xsize); + } + + + /* generate q_syndrome */ + for (c = firstdc+1; c < ncols; c++) { + if (c == x) { + dabd = NULL; + dsize = 0; + } else { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; + } + + abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1, + raidz_syn_r_abd); + } - /* short_size - xsize */ - REC_R_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols); + /* add R to the syndrome */ + raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize); + + /* transform the syndrome */ + abd_iterate_func(xabd, 0, xsize, raidz_mul_abd, (void *)coeff); raidz_math_end(); return (1 << CODE_R); } + /* - * Reconstruct using PQ parity + * Generate P and Q syndromes + * + * @xc array of pointers to syndrome columns + * @dc data column (NULL if missing) + * @tsize size of syndrome columns + * @dsize size of data column (0 if missing) */ +static void +raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize, + const size_t dsize) +{ + v_t *x = (v_t *) tc[TARGET_X]; + v_t *y = (v_t *) tc[TARGET_Y]; + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); + const v_t * const yend = y + (tsize / sizeof (v_t)); -#define REC_PQ_SYN_UPDATE() MUL2(REC_PQ_Y) -#define REC_PQ_INNER_LOOP(c) \ -{ \ - col = &rm->rm_col[c]; \ - LOAD(COL_OFF(col, ioff), REC_PQ_D); \ - REC_PQ_SYN_UPDATE(); \ - XOR(REC_PQ_D, REC_PQ_X); \ - XOR(REC_PQ_D, REC_PQ_Y); \ + SYN_PQ_DEFINE(); + + MUL2_SETUP(); + + for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) { + LOAD(d, SYN_PQ_D); + P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x); + Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y); + } + for (; y < yend; y += SYN_STRIDE) { + Q_SYNDROME(SYN_PQ_X, y); + } } /* - * Reconstruction using PQ parity - * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @y missing data column - * @coeff multiplication coefficients - * @ncols number of column - * @nbigcols number of big columns - * @calcy calculate second data column + * Reconstruct data using PQ parity and PQ syndromes + * + * @tc syndrome/result columns + * @tsize size of syndrome/result columns + * @c parity columns + * @mul array of multiplication constants */ -static raidz_inline void -REC_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const int y, const unsigned *coeff, const int ncols, - const int nbigcols, const boolean_t calcy) +static void +raidz_rec_pq_abd(void **tc, const size_t tsize, void **c, + const unsigned *mul) { - int c; - size_t ioff = 0; - const size_t firstdc = raidz_parity(rm); - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t * const ycol = raidz_col_p(rm, y); - raidz_col_t *col; + v_t *x = (v_t *) tc[TARGET_X]; + v_t *y = (v_t *) tc[TARGET_Y]; + const v_t * const xend = x + (tsize / sizeof (v_t)); + const v_t *p = (v_t *) c[CODE_P]; + const v_t *q = (v_t *) c[CODE_Q]; REC_PQ_DEFINE(); - for (ioff = off; ioff < end; ioff += (REC_PQ_STRIDE * sizeof (v_t))) { - LOAD(COL_OFF(pcol, ioff), REC_PQ_X); - ZERO(REC_PQ_Y); - MUL2_SETUP(); + for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE, + p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) { + LOAD(x, REC_PQ_X); + LOAD(y, REC_PQ_Y); - if (ncols == nbigcols) { - for (c = firstdc; c < x; c++) - REC_PQ_INNER_LOOP(c); - - REC_PQ_SYN_UPDATE(); - for (c++; c < y; c++) - REC_PQ_INNER_LOOP(c); - - REC_PQ_SYN_UPDATE(); - for (c++; c < nbigcols; c++) - REC_PQ_INNER_LOOP(c); - } else { - for (c = firstdc; c < nbigcols; c++) { - REC_PQ_SYN_UPDATE(); - if (c != x && c != y) { - col = &rm->rm_col[c]; - LOAD(COL_OFF(col, ioff), REC_PQ_D); - XOR(REC_PQ_D, REC_PQ_X); - XOR(REC_PQ_D, REC_PQ_Y); - } - } - for (; c < ncols; c++) - REC_PQ_SYN_UPDATE(); - } - - XOR_ACC(COL_OFF(qcol, ioff), REC_PQ_Y); + XOR_ACC(p, REC_PQ_X); + XOR_ACC(q, REC_PQ_Y); /* Save Pxy */ - COPY(REC_PQ_X, REC_PQ_D); + COPY(REC_PQ_X, REC_PQ_T); /* Calc X */ - MUL(coeff[MUL_PQ_X], REC_PQ_X); - MUL(coeff[MUL_PQ_Y], REC_PQ_Y); + MUL(mul[MUL_PQ_X], REC_PQ_X); + MUL(mul[MUL_PQ_Y], REC_PQ_Y); XOR(REC_PQ_Y, REC_PQ_X); - STORE(COL_OFF(xcol, ioff), REC_PQ_X); + STORE(x, REC_PQ_X); - if (calcy) { - /* Calc Y */ - XOR(REC_PQ_D, REC_PQ_X); - STORE(COL_OFF(ycol, ioff), REC_PQ_X); - } + /* Calc Y */ + XOR(REC_PQ_T, REC_PQ_X); + STORE(y, REC_PQ_X); } } + /* * Reconstruct two data columns using PQ parity - * @rec_method REC_PQ_BLOCK() + * + * @syn_method raidz_syn_pq_abd() + * @rec_method raidz_rec_pq_abd() * * @rm RAIDZ map * @tgtidx array of missing data indexes @@ -754,126 +887,156 @@ REC_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, static raidz_inline int raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx) { - const int x = tgtidx[TARGET_X]; - const int y = tgtidx[TARGET_Y]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t ysize = raidz_col_size(rm, y); - const size_t short_size = raidz_short_size(rm); - unsigned coeff[MUL_CNT]; + size_t c; + size_t dsize; + abd_t *dabd; + const size_t firstdc = raidz_parity(rm); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[TARGET_X]; + const size_t y = tgtidx[TARGET_Y]; + const size_t xsize = rm->rm_col[x].rc_size; + const size_t ysize = rm->rm_col[y].rc_size; + abd_t *xabd = rm->rm_col[x].rc_abd; + abd_t *yabd = rm->rm_col[y].rc_abd; + abd_t *tabds[2] = { xabd, yabd }; + abd_t *cabds[] = { + rm->rm_col[CODE_P].rc_abd, + rm->rm_col[CODE_Q].rc_abd + }; + unsigned coeff[MUL_CNT]; raidz_rec_pq_coeff(rm, tgtidx, coeff); + /* + * Check if some of targets is shorter then others + * In this case, shorter target needs to be replaced with + * new buffer so that syndrome can be calculated. + */ + if (ysize < xsize) { + yabd = abd_alloc(xsize, B_FALSE); + tabds[1] = yabd; + } + raidz_math_begin(); - /* 0 - short_size */ - REC_PQ_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE); + /* Start with first data column if present */ + if (firstdc != x) { + raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + } else { + raidz_zero(xabd, xsize); + raidz_zero(yabd, xsize); + } - /* short_size - xsize */ - REC_PQ_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols, - xsize == ysize); + /* generate q_syndrome */ + for (c = firstdc+1; c < ncols; c++) { + if (c == x || c == y) { + dabd = NULL; + dsize = 0; + } else { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; + } + + abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2, + raidz_syn_pq_abd); + } + + abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff); + + /* Copy shorter targets back to the original abd buffer */ + if (ysize < xsize) + raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); raidz_math_end(); + if (ysize < xsize) + abd_free(yabd); + return ((1 << CODE_P) | (1 << CODE_Q)); } + /* - * Reconstruct using PR parity + * Generate P and R syndromes + * + * @xc array of pointers to syndrome columns + * @dc data column (NULL if missing) + * @tsize size of syndrome columns + * @dsize size of data column (0 if missing) */ +static void +raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize, + const size_t dsize) +{ + v_t *x = (v_t *) c[TARGET_X]; + v_t *y = (v_t *) c[TARGET_Y]; + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); + const v_t * const yend = y + (tsize / sizeof (v_t)); + + SYN_PR_DEFINE(); -#define REC_PR_SYN_UPDATE() MUL4(REC_PR_Y) -#define REC_PR_INNER_LOOP(c) \ -{ \ - col = &rm->rm_col[c]; \ - LOAD(COL_OFF(col, ioff), REC_PR_D); \ - REC_PR_SYN_UPDATE(); \ - XOR(REC_PR_D, REC_PR_X); \ - XOR(REC_PR_D, REC_PR_Y); \ + MUL2_SETUP(); + + for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) { + LOAD(d, SYN_PR_D); + P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x); + R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y); + } + for (; y < yend; y += SYN_STRIDE) { + R_SYNDROME(SYN_PR_X, y); + } } /* - * Reconstruction using PR parity - * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @y missing data column - * @coeff multiplication coefficients - * @ncols number of column - * @nbigcols number of big columns - * @calcy calculate second data column + * Reconstruct data using PR parity and PR syndromes + * + * @tc syndrome/result columns + * @tsize size of syndrome/result columns + * @c parity columns + * @mul array of multiplication constants */ -static raidz_inline void -REC_PR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const int y, const unsigned *coeff, const int ncols, - const int nbigcols, const boolean_t calcy) +static void +raidz_rec_pr_abd(void **t, const size_t tsize, void **c, + const unsigned *mul) { - int c; - size_t ioff; - const size_t firstdc = raidz_parity(rm); - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t * const rcol = raidz_col_p(rm, CODE_R); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t * const ycol = raidz_col_p(rm, y); - raidz_col_t *col; + v_t *x = (v_t *) t[TARGET_X]; + v_t *y = (v_t *) t[TARGET_Y]; + const v_t * const xend = x + (tsize / sizeof (v_t)); + const v_t *p = (v_t *) c[CODE_P]; + const v_t *q = (v_t *) c[CODE_Q]; REC_PR_DEFINE(); - for (ioff = off; ioff < end; ioff += (REC_PR_STRIDE * sizeof (v_t))) { - LOAD(COL_OFF(pcol, ioff), REC_PR_X); - ZERO(REC_PR_Y); - MUL2_SETUP(); - - if (ncols == nbigcols) { - for (c = firstdc; c < x; c++) - REC_PR_INNER_LOOP(c); - - REC_PR_SYN_UPDATE(); - for (c++; c < y; c++) - REC_PR_INNER_LOOP(c); - - REC_PR_SYN_UPDATE(); - for (c++; c < nbigcols; c++) - REC_PR_INNER_LOOP(c); - } else { - for (c = firstdc; c < nbigcols; c++) { - REC_PR_SYN_UPDATE(); - if (c != x && c != y) { - col = &rm->rm_col[c]; - LOAD(COL_OFF(col, ioff), REC_PR_D); - XOR(REC_PR_D, REC_PR_X); - XOR(REC_PR_D, REC_PR_Y); - } - } - for (; c < ncols; c++) - REC_PR_SYN_UPDATE(); - } - - XOR_ACC(COL_OFF(rcol, ioff), REC_PR_Y); + for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE, + p += REC_PR_STRIDE, q += REC_PR_STRIDE) { + LOAD(x, REC_PR_X); + LOAD(y, REC_PR_Y); + XOR_ACC(p, REC_PR_X); + XOR_ACC(q, REC_PR_Y); /* Save Pxy */ - COPY(REC_PR_X, REC_PR_D); + COPY(REC_PR_X, REC_PR_T); /* Calc X */ - MUL(coeff[MUL_PR_X], REC_PR_X); - MUL(coeff[MUL_PR_Y], REC_PR_Y); + MUL(mul[MUL_PR_X], REC_PR_X); + MUL(mul[MUL_PR_Y], REC_PR_Y); XOR(REC_PR_Y, REC_PR_X); - STORE(COL_OFF(xcol, ioff), REC_PR_X); + STORE(x, REC_PR_X); - if (calcy) { - /* Calc Y */ - XOR(REC_PR_D, REC_PR_X); - STORE(COL_OFF(ycol, ioff), REC_PR_X); - } + /* Calc Y */ + XOR(REC_PR_T, REC_PR_X); + STORE(y, REC_PR_X); } } /* * Reconstruct two data columns using PR parity - * @rec_method REC_PR_BLOCK() + * + * @syn_method raidz_syn_pr_abd() + * @rec_method raidz_rec_pr_abd() * * @rm RAIDZ map * @tgtidx array of missing data indexes @@ -881,134 +1044,162 @@ REC_PR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, static raidz_inline int raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx) { - const int x = tgtidx[TARGET_X]; - const int y = tgtidx[TARGET_Y]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t ysize = raidz_col_size(rm, y); - const size_t short_size = raidz_short_size(rm); + size_t c; + size_t dsize; + abd_t *dabd; + const size_t firstdc = raidz_parity(rm); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[0]; + const size_t y = tgtidx[1]; + const size_t xsize = rm->rm_col[x].rc_size; + const size_t ysize = rm->rm_col[y].rc_size; + abd_t *xabd = rm->rm_col[x].rc_abd; + abd_t *yabd = rm->rm_col[y].rc_abd; + abd_t *tabds[2] = { xabd, yabd }; + abd_t *cabds[] = { + rm->rm_col[CODE_P].rc_abd, + rm->rm_col[CODE_R].rc_abd + }; unsigned coeff[MUL_CNT]; - raidz_rec_pr_coeff(rm, tgtidx, coeff); + /* + * Check if some of targets are shorter then others. + * They need to be replaced with a new buffer so that syndrome can + * be calculated on full length. + */ + if (ysize < xsize) { + yabd = abd_alloc(xsize, B_FALSE); + tabds[1] = yabd; + } + raidz_math_begin(); - /* 0 - short_size */ - REC_PR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE); + /* Start with first data column if present */ + if (firstdc != x) { + raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + } else { + raidz_zero(xabd, xsize); + raidz_zero(yabd, xsize); + } + + /* generate q_syndrome */ + for (c = firstdc+1; c < ncols; c++) { + if (c == x || c == y) { + dabd = NULL; + dsize = 0; + } else { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; + } + + abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2, + raidz_syn_pr_abd); + } + + abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff); - /* short_size - xsize */ - REC_PR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols, - xsize == ysize); + /* + * Copy shorter targets back to the original abd buffer + */ + if (ysize < xsize) + raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); raidz_math_end(); - return ((1 << CODE_P) | (1 << CODE_R)); + if (ysize < xsize) + abd_free(yabd); + + return ((1 << CODE_P) | (1 << CODE_Q)); } /* - * Reconstruct using QR parity + * Generate Q and R syndromes + * + * @xc array of pointers to syndrome columns + * @dc data column (NULL if missing) + * @tsize size of syndrome columns + * @dsize size of data column (0 if missing) */ +static void +raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize, + const size_t dsize) +{ + v_t *x = (v_t *) c[TARGET_X]; + v_t *y = (v_t *) c[TARGET_Y]; + const v_t * const xend = x + (tsize / sizeof (v_t)); + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); -#define REC_QR_SYN_UPDATE() \ -{ \ - MUL2(REC_QR_X); \ - MUL4(REC_QR_Y); \ -} + SYN_QR_DEFINE(); + + MUL2_SETUP(); -#define REC_QR_INNER_LOOP(c) \ -{ \ - col = &rm->rm_col[c]; \ - LOAD(COL_OFF(col, ioff), REC_QR_D); \ - REC_QR_SYN_UPDATE(); \ - XOR(REC_QR_D, REC_QR_X); \ - XOR(REC_QR_D, REC_QR_Y); \ + for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) { + LOAD(d, SYN_PQ_D); + Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x); + R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y); + } + for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) { + Q_SYNDROME(SYN_QR_X, x); + R_SYNDROME(SYN_QR_X, y); + } } + /* - * Reconstruction using QR parity - * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @y missing data column - * @coeff multiplication coefficients - * @ncols number of column - * @nbigcols number of big columns - * @calcy calculate second data column + * Reconstruct data using QR parity and QR syndromes + * + * @tc syndrome/result columns + * @tsize size of syndrome/result columns + * @c parity columns + * @mul array of multiplication constants */ -static raidz_inline void -REC_QR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const int y, const unsigned *coeff, const int ncols, - const int nbigcols, const boolean_t calcy) +static void +raidz_rec_qr_abd(void **t, const size_t tsize, void **c, + const unsigned *mul) { - int c; - size_t ioff; - const size_t firstdc = raidz_parity(rm); - raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q); - raidz_col_t * const rcol = raidz_col_p(rm, CODE_R); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t * const ycol = raidz_col_p(rm, y); - raidz_col_t *col; + v_t *x = (v_t *) t[TARGET_X]; + v_t *y = (v_t *) t[TARGET_Y]; + const v_t * const xend = x + (tsize / sizeof (v_t)); + const v_t *p = (v_t *) c[CODE_P]; + const v_t *q = (v_t *) c[CODE_Q]; REC_QR_DEFINE(); - for (ioff = off; ioff < end; ioff += (REC_QR_STRIDE * sizeof (v_t))) { - MUL2_SETUP(); - ZERO(REC_QR_X); - ZERO(REC_QR_Y); - - if (ncols == nbigcols) { - for (c = firstdc; c < x; c++) - REC_QR_INNER_LOOP(c); + for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE, + p += REC_QR_STRIDE, q += REC_QR_STRIDE) { + LOAD(x, REC_QR_X); + LOAD(y, REC_QR_Y); - REC_QR_SYN_UPDATE(); - for (c++; c < y; c++) - REC_QR_INNER_LOOP(c); + XOR_ACC(p, REC_QR_X); + XOR_ACC(q, REC_QR_Y); - REC_QR_SYN_UPDATE(); - for (c++; c < nbigcols; c++) - REC_QR_INNER_LOOP(c); - } else { - for (c = firstdc; c < nbigcols; c++) { - REC_QR_SYN_UPDATE(); - if (c != x && c != y) { - col = &rm->rm_col[c]; - LOAD(COL_OFF(col, ioff), REC_QR_D); - XOR(REC_QR_D, REC_QR_X); - XOR(REC_QR_D, REC_QR_Y); - } - } - for (; c < ncols; c++) - REC_QR_SYN_UPDATE(); - } - - XOR_ACC(COL_OFF(qcol, ioff), REC_QR_X); - XOR_ACC(COL_OFF(rcol, ioff), REC_QR_Y); - - /* Save Qxy */ - COPY(REC_QR_X, REC_QR_D); + /* Save Pxy */ + COPY(REC_QR_X, REC_QR_T); /* Calc X */ - MUL(coeff[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */ - XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */ - MUL(coeff[MUL_QR_X], REC_QR_X); /* X = X * xm */ - STORE(COL_OFF(xcol, ioff), REC_QR_X); - - if (calcy) { - /* Calc Y */ - MUL(coeff[MUL_QR_YQ], REC_QR_D); /* X = Q * xqm */ - XOR(REC_QR_Y, REC_QR_D); /* X = R ^ X */ - MUL(coeff[MUL_QR_Y], REC_QR_D); /* X = X * xm */ - STORE(COL_OFF(ycol, ioff), REC_QR_D); - } + MUL(mul[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */ + XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */ + MUL(mul[MUL_QR_X], REC_QR_X); /* X = X * xm */ + STORE(x, REC_QR_X); + + /* Calc Y */ + MUL(mul[MUL_QR_YQ], REC_QR_T); /* X = Q * xqm */ + XOR(REC_QR_Y, REC_QR_T); /* X = R ^ X */ + MUL(mul[MUL_QR_Y], REC_QR_T); /* X = X * xm */ + STORE(y, REC_QR_T); } } + /* * Reconstruct two data columns using QR parity - * @rec_method REC_QR_BLOCK() + * + * @syn_method raidz_syn_qr_abd() + * @rec_method raidz_rec_qr_abd() * * @rm RAIDZ map * @tgtidx array of missing data indexes @@ -1016,158 +1207,182 @@ REC_QR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, static raidz_inline int raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx) { - const int x = tgtidx[TARGET_X]; - const int y = tgtidx[TARGET_Y]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t ysize = raidz_col_size(rm, y); - const size_t short_size = raidz_short_size(rm); + size_t c; + size_t dsize; + abd_t *dabd; + const size_t firstdc = raidz_parity(rm); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[TARGET_X]; + const size_t y = tgtidx[TARGET_Y]; + const size_t xsize = rm->rm_col[x].rc_size; + const size_t ysize = rm->rm_col[y].rc_size; + abd_t *xabd = rm->rm_col[x].rc_abd; + abd_t *yabd = rm->rm_col[y].rc_abd; + abd_t *tabds[2] = { xabd, yabd }; + abd_t *cabds[] = { + rm->rm_col[CODE_Q].rc_abd, + rm->rm_col[CODE_R].rc_abd + }; unsigned coeff[MUL_CNT]; - raidz_rec_qr_coeff(rm, tgtidx, coeff); + /* + * Check if some of targets is shorter then others + * In this case, shorter target needs to be replaced with + * new buffer so that syndrome can be calculated. + */ + if (ysize < xsize) { + yabd = abd_alloc(xsize, B_FALSE); + tabds[1] = yabd; + } + raidz_math_begin(); - /* 0 - short_size */ - REC_QR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE); + /* Start with first data column if present */ + if (firstdc != x) { + raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + } else { + raidz_zero(xabd, xsize); + raidz_zero(yabd, xsize); + } - /* short_size - xsize */ - REC_QR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols, - xsize == ysize); + /* generate q_syndrome */ + for (c = firstdc+1; c < ncols; c++) { + if (c == x || c == y) { + dabd = NULL; + dsize = 0; + } else { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; + } + + abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2, + raidz_syn_qr_abd); + } + + abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff); + + /* + * Copy shorter targets back to the original abd buffer + */ + if (ysize < xsize) + raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); raidz_math_end(); + if (ysize < xsize) + abd_free(yabd); + + return ((1 << CODE_Q) | (1 << CODE_R)); } + /* - * Reconstruct using PQR parity + * Generate P, Q, and R syndromes + * + * @xc array of pointers to syndrome columns + * @dc data column (NULL if missing) + * @tsize size of syndrome columns + * @dsize size of data column (0 if missing) */ +static void +raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize, + const size_t dsize) +{ + v_t *x = (v_t *) c[TARGET_X]; + v_t *y = (v_t *) c[TARGET_Y]; + v_t *z = (v_t *) c[TARGET_Z]; + const v_t * const yend = y + (tsize / sizeof (v_t)); + const v_t *d = (v_t *) dc; + const v_t * const dend = d + (dsize / sizeof (v_t)); -#define REC_PQR_SYN_UPDATE() \ -{ \ - MUL2(REC_PQR_Y); \ - MUL4(REC_PQR_Z); \ -} + SYN_PQR_DEFINE(); -#define REC_PQR_INNER_LOOP(c) \ -{ \ - col = &rm->rm_col[(c)]; \ - LOAD(COL_OFF(col, ioff), REC_PQR_D); \ - REC_PQR_SYN_UPDATE(); \ - XOR(REC_PQR_D, REC_PQR_X); \ - XOR(REC_PQR_D, REC_PQR_Y); \ - XOR(REC_PQR_D, REC_PQR_Z); \ + MUL2_SETUP(); + + for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE, + z += SYN_STRIDE) { + LOAD(d, SYN_PQR_D); + P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x) + Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y); + R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z); + } + for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) { + Q_SYNDROME(SYN_PQR_X, y); + R_SYNDROME(SYN_PQR_X, z); + } } + /* - * Reconstruction using PQR parity - * @rm RAIDZ map - * @off starting offset - * @end ending offset - * @x missing data column - * @y missing data column - * @z missing data column - * @coeff multiplication coefficients - * @ncols number of column - * @nbigcols number of big columns - * @calcy calculate second data column - * @calcz calculate third data column + * Reconstruct data using PRQ parity and PQR syndromes + * + * @tc syndrome/result columns + * @tsize size of syndrome/result columns + * @c parity columns + * @mul array of multiplication constants */ -static raidz_inline void -REC_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, - const int x, const int y, const int z, const unsigned *coeff, - const int ncols, const int nbigcols, const boolean_t calcy, - const boolean_t calcz) +static void +raidz_rec_pqr_abd(void **t, const size_t tsize, void **c, + const unsigned * const mul) { - int c; - size_t ioff; - const size_t firstdc = raidz_parity(rm); - raidz_col_t * const pcol = raidz_col_p(rm, CODE_P); - raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q); - raidz_col_t * const rcol = raidz_col_p(rm, CODE_R); - raidz_col_t * const xcol = raidz_col_p(rm, x); - raidz_col_t * const ycol = raidz_col_p(rm, y); - raidz_col_t * const zcol = raidz_col_p(rm, z); - raidz_col_t *col; + v_t *x = (v_t *) t[TARGET_X]; + v_t *y = (v_t *) t[TARGET_Y]; + v_t *z = (v_t *) t[TARGET_Z]; + const v_t * const xend = x + (tsize / sizeof (v_t)); + const v_t *p = (v_t *) c[CODE_P]; + const v_t *q = (v_t *) c[CODE_Q]; + const v_t *r = (v_t *) c[CODE_R]; REC_PQR_DEFINE(); - for (ioff = off; ioff < end; ioff += (REC_PQR_STRIDE * sizeof (v_t))) { - MUL2_SETUP(); - LOAD(COL_OFF(pcol, ioff), REC_PQR_X); - ZERO(REC_PQR_Y); - ZERO(REC_PQR_Z); + for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE, + z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE, + r += REC_PQR_STRIDE) { + LOAD(x, REC_PQR_X); + LOAD(y, REC_PQR_Y); + LOAD(z, REC_PQR_Z); - if (ncols == nbigcols) { - for (c = firstdc; c < x; c++) - REC_PQR_INNER_LOOP(c); - - REC_PQR_SYN_UPDATE(); - for (c++; c < y; c++) - REC_PQR_INNER_LOOP(c); - - REC_PQR_SYN_UPDATE(); - for (c++; c < z; c++) - REC_PQR_INNER_LOOP(c); - - REC_PQR_SYN_UPDATE(); - for (c++; c < nbigcols; c++) - REC_PQR_INNER_LOOP(c); - } else { - for (c = firstdc; c < nbigcols; c++) { - REC_PQR_SYN_UPDATE(); - if (c != x && c != y && c != z) { - col = &rm->rm_col[c]; - LOAD(COL_OFF(col, ioff), REC_PQR_D); - XOR(REC_PQR_D, REC_PQR_X); - XOR(REC_PQR_D, REC_PQR_Y); - XOR(REC_PQR_D, REC_PQR_Z); - } - } - for (; c < ncols; c++) - REC_PQR_SYN_UPDATE(); - } - - XOR_ACC(COL_OFF(qcol, ioff), REC_PQR_Y); - XOR_ACC(COL_OFF(rcol, ioff), REC_PQR_Z); + XOR_ACC(p, REC_PQR_X); + XOR_ACC(q, REC_PQR_Y); + XOR_ACC(r, REC_PQR_Z); /* Save Pxyz and Qxyz */ COPY(REC_PQR_X, REC_PQR_XS); COPY(REC_PQR_Y, REC_PQR_YS); /* Calc X */ - MUL(coeff[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */ - MUL(coeff[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */ + MUL(mul[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */ + MUL(mul[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */ XOR(REC_PQR_Y, REC_PQR_X); - MUL(coeff[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */ + MUL(mul[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */ XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */ - STORE(COL_OFF(xcol, ioff), REC_PQR_X); - - if (calcy) { - /* Calc Y */ - XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */ - MUL(coeff[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */ - XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */ - COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */ - MUL(coeff[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */ - MUL(coeff[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */ - XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */ - STORE(COL_OFF(ycol, ioff), REC_PQR_YS); - } - - if (calcz) { - /* Calc Z */ - XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */ - STORE(COL_OFF(zcol, ioff), REC_PQR_YS); - } + STORE(x, REC_PQR_X); + + /* Calc Y */ + XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */ + MUL(mul[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */ + XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */ + COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */ + MUL(mul[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */ + MUL(mul[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */ + XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */ + STORE(y, REC_PQR_YS); + + /* Calc Z */ + XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */ + STORE(z, REC_PQR_YS); } } + /* * Reconstruct three data columns using PQR parity - * @rec_method REC_PQR_BLOCK() + * + * @syn_method raidz_syn_pqr_abd() + * @rec_method raidz_rec_pqr_abd() * * @rm RAIDZ map * @tgtidx array of missing data indexes @@ -1175,31 +1390,87 @@ REC_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end, static raidz_inline int raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx) { - const int x = tgtidx[TARGET_X]; - const int y = tgtidx[TARGET_Y]; - const int z = tgtidx[TARGET_Z]; - const int ncols = raidz_ncols(rm); - const int nbigcols = raidz_nbigcols(rm); - const size_t xsize = raidz_col_size(rm, x); - const size_t ysize = raidz_col_size(rm, y); - const size_t zsize = raidz_col_size(rm, z); - const size_t short_size = raidz_short_size(rm); + size_t c; + size_t dsize; + abd_t *dabd; + const size_t firstdc = raidz_parity(rm); + const size_t ncols = raidz_ncols(rm); + const size_t x = tgtidx[TARGET_X]; + const size_t y = tgtidx[TARGET_Y]; + const size_t z = tgtidx[TARGET_Z]; + const size_t xsize = rm->rm_col[x].rc_size; + const size_t ysize = rm->rm_col[y].rc_size; + const size_t zsize = rm->rm_col[z].rc_size; + abd_t *xabd = rm->rm_col[x].rc_abd; + abd_t *yabd = rm->rm_col[y].rc_abd; + abd_t *zabd = rm->rm_col[z].rc_abd; + abd_t *tabds[] = { xabd, yabd, zabd }; + abd_t *cabds[] = { + rm->rm_col[CODE_P].rc_abd, + rm->rm_col[CODE_Q].rc_abd, + rm->rm_col[CODE_R].rc_abd + }; unsigned coeff[MUL_CNT]; - raidz_rec_pqr_coeff(rm, tgtidx, coeff); + /* + * Check if some of targets is shorter then others + * In this case, shorter target needs to be replaced with + * new buffer so that syndrome can be calculated. + */ + if (ysize < xsize) { + yabd = abd_alloc(xsize, B_FALSE); + tabds[1] = yabd; + } + if (zsize < xsize) { + zabd = abd_alloc(xsize, B_FALSE); + tabds[2] = zabd; + } + raidz_math_begin(); - /* 0 - short_size */ - REC_PQR_BLOCK(rm, 0, short_size, x, y, z, coeff, ncols, ncols, - B_TRUE, B_TRUE); + /* Start with first data column if present */ + if (firstdc != x) { + raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize); + } else { + raidz_zero(xabd, xsize); + raidz_zero(yabd, xsize); + raidz_zero(zabd, xsize); + } + + /* generate q_syndrome */ + for (c = firstdc+1; c < ncols; c++) { + if (c == x || c == y || c == z) { + dabd = NULL; + dsize = 0; + } else { + dabd = rm->rm_col[c].rc_abd; + dsize = rm->rm_col[c].rc_size; + } - /* short_size - xsize */ - REC_PQR_BLOCK(rm, short_size, xsize, x, y, z, coeff, ncols, nbigcols, - xsize == ysize, xsize == zsize); + abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3, + raidz_syn_pqr_abd); + } + + abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff); + + /* + * Copy shorter targets back to the original abd buffer + */ + if (ysize < xsize) + raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); + if (zsize < xsize) + raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize); raidz_math_end(); + if (ysize < xsize) + abd_free(yabd); + if (zsize < xsize) + abd_free(zabd); + return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R)); } diff --git a/module/zfs/vdev_raidz_math_scalar.c b/module/zfs/vdev_raidz_math_scalar.c index 1d782b633..a693bff63 100644 --- a/module/zfs/vdev_raidz_math_scalar.c +++ b/module/zfs/vdev_raidz_math_scalar.c @@ -24,6 +24,7 @@ */ #include <sys/vdev_raidz_impl.h> + /* * Provide native CPU scalar routines. * Support 32bit and 64bit CPUs. @@ -153,71 +154,96 @@ static const struct { #define raidz_math_begin() {} #define raidz_math_end() {} -#define GEN_P_DEFINE() v_t p0 -#define GEN_P_STRIDE 1 -#define GEN_P_P p0 - -#define GEN_PQ_DEFINE() v_t d0, p0, q0 -#define GEN_PQ_STRIDE 1 -#define GEN_PQ_D d0 -#define GEN_PQ_P p0 -#define GEN_PQ_Q q0 - -#define GEN_PQR_DEFINE() v_t d0, p0, q0, r0 -#define GEN_PQR_STRIDE 1 -#define GEN_PQR_D d0 -#define GEN_PQR_P p0 -#define GEN_PQR_Q q0 -#define GEN_PQR_R r0 - -#define REC_P_DEFINE() v_t x0 -#define REC_P_STRIDE 1 -#define REC_P_X x0 - -#define REC_Q_DEFINE() v_t x0 -#define REC_Q_STRIDE 1 -#define REC_Q_X x0 - -#define REC_R_DEFINE() v_t x0 -#define REC_R_STRIDE 1 -#define REC_R_X x0 - -#define REC_PQ_DEFINE() v_t x0, y0, d0 -#define REC_PQ_STRIDE 1 -#define REC_PQ_X x0 -#define REC_PQ_Y y0 -#define REC_PQ_D d0 - -#define REC_PR_DEFINE() v_t x0, y0, d0 -#define REC_PR_STRIDE 1 -#define REC_PR_X x0 -#define REC_PR_Y y0 -#define REC_PR_D d0 - -#define REC_QR_DEFINE() v_t x0, y0, d0 -#define REC_QR_STRIDE 1 -#define REC_QR_X x0 -#define REC_QR_Y y0 -#define REC_QR_D d0 - -#define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0 -#define REC_PQR_STRIDE 1 -#define REC_PQR_X x0 -#define REC_PQR_Y y0 -#define REC_PQR_Z z0 -#define REC_PQR_D d0 -#define REC_PQR_XS d0 -#define REC_PQR_YS t0 +#define SYN_STRIDE 1 -#include "vdev_raidz_math_impl.h" +#define ZERO_DEFINE() v_t d0 +#define ZERO_STRIDE 1 +#define ZERO_D d0 -/* - * If compiled with -O0, gcc doesn't do any stack frame coalescing - * and -Wframe-larger-than=1024 is triggered in debug mode. - * Starting with gcc 4.8, new opt level -Og is introduced for debugging, which - * does not trigger this warning. - */ -#pragma GCC diagnostic ignored "-Wframe-larger-than=" +#define COPY_DEFINE() v_t d0 +#define COPY_STRIDE 1 +#define COPY_D d0 + +#define ADD_DEFINE() v_t d0 +#define ADD_STRIDE 1 +#define ADD_D d0 + +#define MUL_DEFINE() v_t d0 +#define MUL_STRIDE 1 +#define MUL_D d0 + +#define GEN_P_STRIDE 1 +#define GEN_P_DEFINE() v_t p0 +#define GEN_P_P p0 + +#define GEN_PQ_STRIDE 1 +#define GEN_PQ_DEFINE() v_t d0, c0 +#define GEN_PQ_D d0 +#define GEN_PQ_C c0 + +#define GEN_PQR_STRIDE 1 +#define GEN_PQR_DEFINE() v_t d0, c0 +#define GEN_PQR_D d0 +#define GEN_PQR_C c0 + +#define SYN_Q_DEFINE() v_t d0, x0 +#define SYN_Q_D d0 +#define SYN_Q_X x0 + + +#define SYN_R_DEFINE() v_t d0, x0 +#define SYN_R_D d0 +#define SYN_R_X x0 + + +#define SYN_PQ_DEFINE() v_t d0, x0 +#define SYN_PQ_D d0 +#define SYN_PQ_X x0 + + +#define REC_PQ_STRIDE 1 +#define REC_PQ_DEFINE() v_t x0, y0, t0 +#define REC_PQ_X x0 +#define REC_PQ_Y y0 +#define REC_PQ_T t0 + + +#define SYN_PR_DEFINE() v_t d0, x0 +#define SYN_PR_D d0 +#define SYN_PR_X x0 + +#define REC_PR_STRIDE 1 +#define REC_PR_DEFINE() v_t x0, y0, t0 +#define REC_PR_X x0 +#define REC_PR_Y y0 +#define REC_PR_T t0 + + +#define SYN_QR_DEFINE() v_t d0, x0 +#define SYN_QR_D d0 +#define SYN_QR_X x0 + + +#define REC_QR_STRIDE 1 +#define REC_QR_DEFINE() v_t x0, y0, t0 +#define REC_QR_X x0 +#define REC_QR_Y y0 +#define REC_QR_T t0 + + +#define SYN_PQR_DEFINE() v_t d0, x0 +#define SYN_PQR_D d0 +#define SYN_PQR_X x0 + +#define REC_PQR_STRIDE 1 +#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0 +#define REC_PQR_X x0 +#define REC_PQR_Y y0 +#define REC_PQR_Z z0 +#define REC_PQR_XS xs0 +#define REC_PQR_YS ys0 + +#include "vdev_raidz_math_impl.h" DEFINE_GEN_METHODS(scalar); DEFINE_REC_METHODS(scalar); diff --git a/module/zfs/vdev_raidz_math_sse2.c b/module/zfs/vdev_raidz_math_sse2.c index 6fc81215a..97ddfc989 100644 --- a/module/zfs/vdev_raidz_math_sse2.c +++ b/module/zfs/vdev_raidz_math_sse2.c @@ -236,6 +236,10 @@ typedef struct v { #define MUL2(r...) \ { \ switch (REG_CNT(r)) { \ + case 4: \ + _MUL2_x2(VR0(r), VR1(r)); \ + _MUL2_x2(VR2(r), VR3(r)); \ + break; \ case 2: \ _MUL2_x2(VR0(r), VR1(r)); \ break; \ @@ -271,8 +275,8 @@ typedef struct v { if (x & 0x80) { MUL2(in); XOR(in, acc); } \ } -#define _mul_x1_in 9 -#define _mul_x1_acc 11 +#define _mul_x1_in 11 +#define _mul_x1_acc 12 #define MUL_x1_DEFINE(x) \ static void \ @@ -533,61 +537,87 @@ gf_x2_mul_fns[256] = { #define raidz_math_begin() kfpu_begin() #define raidz_math_end() kfpu_end() -#define GEN_P_DEFINE() {} +#define SYN_STRIDE 4 + +#define ZERO_STRIDE 4 +#define ZERO_DEFINE() {} +#define ZERO_D 0, 1, 2, 3 + +#define COPY_STRIDE 4 +#define COPY_DEFINE() {} +#define COPY_D 0, 1, 2, 3 + +#define ADD_STRIDE 4 +#define ADD_DEFINE() {} +#define ADD_D 0, 1, 2, 3 + +#define MUL_STRIDE 2 +#define MUL_DEFINE() {} +#define MUL_D 0, 1 + #define GEN_P_STRIDE 4 +#define GEN_P_DEFINE() {} #define GEN_P_P 0, 1, 2, 3 +#define GEN_PQ_STRIDE 4 #define GEN_PQ_DEFINE() {} -#define GEN_PQ_STRIDE 2 -#define GEN_PQ_D 0, 1 -#define GEN_PQ_P 2, 3 -#define GEN_PQ_Q 4, 5 +#define GEN_PQ_D 0, 1, 2, 3 +#define GEN_PQ_C 4, 5, 6, 7 +#define GEN_PQR_STRIDE 4 #define GEN_PQR_DEFINE() {} -#define GEN_PQR_STRIDE 2 -#define GEN_PQR_D 0, 1 -#define GEN_PQR_P 2, 3 -#define GEN_PQR_Q 4, 5 -#define GEN_PQR_R 6, 7 +#define GEN_PQR_D 0, 1, 2, 3 +#define GEN_PQR_C 4, 5, 6, 7 -#define REC_P_DEFINE() {} -#define REC_P_STRIDE 4 -#define REC_P_X 0, 1, 2, 3 +#define SYN_Q_DEFINE() {} +#define SYN_Q_D 0, 1, 2, 3 +#define SYN_Q_X 4, 5, 6, 7 -#define REC_Q_DEFINE() {} -#define REC_Q_STRIDE 2 -#define REC_Q_X 0, 1 +#define SYN_R_DEFINE() {} +#define SYN_R_D 0, 1, 2, 3 +#define SYN_R_X 4, 5, 6, 7 -#define REC_R_DEFINE() {} -#define REC_R_STRIDE 2 -#define REC_R_X 0, 1 +#define SYN_PQ_DEFINE() {} +#define SYN_PQ_D 0, 1, 2, 3 +#define SYN_PQ_X 4, 5, 6, 7 -#define REC_PQ_DEFINE() {} #define REC_PQ_STRIDE 2 +#define REC_PQ_DEFINE() {} #define REC_PQ_X 0, 1 #define REC_PQ_Y 2, 3 -#define REC_PQ_D 4, 5 +#define REC_PQ_T 4, 5 + +#define SYN_PR_DEFINE() {} +#define SYN_PR_D 0, 1, 2, 3 +#define SYN_PR_X 4, 5, 6, 7 -#define REC_PR_DEFINE() {} #define REC_PR_STRIDE 2 +#define REC_PR_DEFINE() {} #define REC_PR_X 0, 1 #define REC_PR_Y 2, 3 -#define REC_PR_D 4, 5 +#define REC_PR_T 4, 5 + +#define SYN_QR_DEFINE() {} +#define SYN_QR_D 0, 1, 2, 3 +#define SYN_QR_X 4, 5, 6, 7 -#define REC_QR_DEFINE() {} #define REC_QR_STRIDE 2 +#define REC_QR_DEFINE() {} #define REC_QR_X 0, 1 #define REC_QR_Y 2, 3 -#define REC_QR_D 4, 5 +#define REC_QR_T 4, 5 + +#define SYN_PQR_DEFINE() {} +#define SYN_PQR_D 0, 1, 2, 3 +#define SYN_PQR_X 4, 5, 6, 7 -#define REC_PQR_DEFINE() {} #define REC_PQR_STRIDE 1 +#define REC_PQR_DEFINE() {} #define REC_PQR_X 0 #define REC_PQR_Y 1 #define REC_PQR_Z 2 -#define REC_PQR_D 3 -#define REC_PQR_XS 4 -#define REC_PQR_YS 5 +#define REC_PQR_XS 3 +#define REC_PQR_YS 4 #include <sys/vdev_raidz_impl.h> diff --git a/module/zfs/vdev_raidz_math_ssse3.c b/module/zfs/vdev_raidz_math_ssse3.c index 81f1b9a07..d8fa8fb82 100644 --- a/module/zfs/vdev_raidz_math_ssse3.c +++ b/module/zfs/vdev_raidz_math_ssse3.c @@ -337,59 +337,86 @@ typedef struct v { #define raidz_math_begin() kfpu_begin() #define raidz_math_end() kfpu_end() -#define GEN_P_DEFINE() {} + +#define SYN_STRIDE 4 + +#define ZERO_STRIDE 4 +#define ZERO_DEFINE() {} +#define ZERO_D 0, 1, 2, 3 + +#define COPY_STRIDE 4 +#define COPY_DEFINE() {} +#define COPY_D 0, 1, 2, 3 + +#define ADD_STRIDE 4 +#define ADD_DEFINE() {} +#define ADD_D 0, 1, 2, 3 + +#define MUL_STRIDE 4 +#define MUL_DEFINE() {} +#define MUL_D 0, 1, 2, 3 + #define GEN_P_STRIDE 4 +#define GEN_P_DEFINE() {} #define GEN_P_P 0, 1, 2, 3 -#define GEN_PQ_DEFINE() {} #define GEN_PQ_STRIDE 4 +#define GEN_PQ_DEFINE() {} #define GEN_PQ_D 0, 1, 2, 3 -#define GEN_PQ_P 4, 5, 6, 7 -#define GEN_PQ_Q 8, 9, 10, 11 +#define GEN_PQ_C 4, 5, 6, 7 +#define GEN_PQR_STRIDE 4 #define GEN_PQR_DEFINE() {} -#define GEN_PQR_STRIDE 2 -#define GEN_PQR_D 0, 1 -#define GEN_PQR_P 2, 3 -#define GEN_PQR_Q 4, 5 -#define GEN_PQR_R 6, 7 +#define GEN_PQR_D 0, 1, 2, 3 +#define GEN_PQR_C 4, 5, 6, 7 -#define REC_P_DEFINE() {} -#define REC_P_STRIDE 4 -#define REC_P_X 0, 1, 2, 3 +#define SYN_Q_DEFINE() {} +#define SYN_Q_D 0, 1, 2, 3 +#define SYN_Q_X 4, 5, 6, 7 -#define REC_Q_DEFINE() {} -#define REC_Q_STRIDE 4 -#define REC_Q_X 0, 1, 2, 3 +#define SYN_R_DEFINE() {} +#define SYN_R_D 0, 1, 2, 3 +#define SYN_R_X 4, 5, 6, 7 -#define REC_R_DEFINE() {} -#define REC_R_STRIDE 4 -#define REC_R_X 0, 1, 2, 3 +#define SYN_PQ_DEFINE() {} +#define SYN_PQ_D 0, 1, 2, 3 +#define SYN_PQ_X 4, 5, 6, 7 -#define REC_PQ_DEFINE() {} #define REC_PQ_STRIDE 2 +#define REC_PQ_DEFINE() {} #define REC_PQ_X 0, 1 #define REC_PQ_Y 2, 3 -#define REC_PQ_D 4, 5 +#define REC_PQ_T 4, 5 + +#define SYN_PR_DEFINE() {} +#define SYN_PR_D 0, 1, 2, 3 +#define SYN_PR_X 4, 5, 6, 7 -#define REC_PR_DEFINE() {} #define REC_PR_STRIDE 2 +#define REC_PR_DEFINE() {} #define REC_PR_X 0, 1 #define REC_PR_Y 2, 3 -#define REC_PR_D 4, 5 +#define REC_PR_T 4, 5 + +#define SYN_QR_DEFINE() {} +#define SYN_QR_D 0, 1, 2, 3 +#define SYN_QR_X 4, 5, 6, 7 -#define REC_QR_DEFINE() {} #define REC_QR_STRIDE 2 +#define REC_QR_DEFINE() {} #define REC_QR_X 0, 1 #define REC_QR_Y 2, 3 -#define REC_QR_D 4, 5 +#define REC_QR_T 4, 5 + +#define SYN_PQR_DEFINE() {} +#define SYN_PQR_D 0, 1, 2, 3 +#define SYN_PQR_X 4, 5, 6, 7 -#define REC_PQR_DEFINE() {} #define REC_PQR_STRIDE 2 +#define REC_PQR_DEFINE() {} #define REC_PQR_X 0, 1 #define REC_PQR_Y 2, 3 #define REC_PQR_Z 4, 5 -#define REC_PQR_D 6, 7 #define REC_PQR_XS 6, 7 #define REC_PQR_YS 8, 9 @@ -403,13 +430,8 @@ DEFINE_REC_METHODS(ssse3); static boolean_t raidz_will_ssse3_work(void) { -/* ABD Bringup -- vector code not ready */ -#if 1 - return (B_FALSE); -#else return (zfs_sse_available() && zfs_sse2_available() && zfs_ssse3_available()); -#endif } const raidz_impl_ops_t vdev_raidz_ssse3_impl = { |