diff options
author | Gvozden Neskovic <[email protected]> | 2016-07-17 19:41:11 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-07-19 16:43:07 -0700 |
commit | c9187d867fee3972de48b71762407ae7dabb2563 (patch) | |
tree | c1d7fdb7f5761cba9aa14f0743b8c3a57def392f /module | |
parent | 1d9b3bd8fb2b633b7523d9f39149d76e24ffb535 (diff) |
Fixes and enhancements of SIMD raidz parity
- Implementation lock replaced with atomic variable
- Trailing whitespace is removed from user specified parameter, to enhance
experience when using commands that add newline, e.g. `echo`
- raidz_test: remove dependency on `getrusage()` and RUSAGE_THREAD, Issue #4813
- silence `cppcheck` in vdev_raidz, partial solution of Issue #1392
- Minor fixes and cleanups
- Enable use of original parity methods in [fastest] configuration.
New opaque original ops structure, representing native methods, is added
to supported raidz methods. Original parity methods are executed if selected
implementation has NULL fn pointer.
Signed-off-by: Gvozden Neskovic <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #4813
Issue #1392
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/vdev_raidz.c | 24 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math.c | 281 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_avx2.c | 8 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_scalar.c | 2 | ||||
-rw-r--r-- | module/zfs/vdev_raidz_math_ssse3.c | 8 |
5 files changed, 173 insertions, 150 deletions
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 7f17d18b6..b67de0896 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -458,8 +458,8 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, zio->io_vsd = rm; zio->io_vsd_ops = &vdev_raidz_vsd_ops; - /* RAIDZ ops init */ - vdev_raidz_math_get_ops(rm); + /* init RAIDZ parity ops */ + rm->rm_ops = vdev_raidz_math_get_ops(); return (rm); } @@ -611,10 +611,9 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm) void vdev_raidz_generate_parity(raidz_map_t *rm) { - if (rm->rm_ops) { - vdev_raidz_math_generate(rm); + /* Generate using the new math implementation */ + if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL) return; - } switch (rm->rm_firstdatacol) { case 1: @@ -1284,7 +1283,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) { int tgts[VDEV_RAIDZ_MAXPARITY], *dt; int ntgts; - int i, c; + int i, c, ret; int code; int nbadparity, nbaddata; int parity_valid[VDEV_RAIDZ_MAXPARITY]; @@ -1322,14 +1321,11 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) dt = &tgts[nbadparity]; - /* - * Reconstruct using the new math implementation if - * rm_ops is set. - */ - if (rm->rm_ops) { - return (vdev_raidz_math_reconstruct(rm, parity_valid, dt, - nbaddata)); - } + + /* Reconstruct using the new math implementation */ + ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata); + if (ret != RAIDZ_ORIGINAL_IMPL) + return (ret); /* * See if we can use any of our optimized reconstruction routines. diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index d4bde37cc..c257fe7ee 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -31,8 +31,22 @@ #include <sys/vdev_raidz.h> #include <sys/vdev_raidz_impl.h> +extern boolean_t raidz_will_scalar_work(void); + +/* Opaque implementation with NULL methods to represent original methods */ +static const raidz_impl_ops_t vdev_raidz_original_impl = { + .name = "original", + .is_supported = raidz_will_scalar_work, +}; + +/* RAIDZ parity op that contain the fastest methods */ +static raidz_impl_ops_t vdev_raidz_fastest_impl = { + .name = "fastest" +}; + /* All compiled in implementations */ const raidz_impl_ops_t *raidz_all_maths[] = { + &vdev_raidz_original_impl, &vdev_raidz_scalar_impl, #if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */ &vdev_raidz_sse2_impl, @@ -49,30 +63,19 @@ const raidz_impl_ops_t *raidz_all_maths[] = { static boolean_t raidz_math_initialized = B_FALSE; /* Select raidz implementation */ -static enum vdev_raidz_impl_sel { - IMPL_FASTEST = -1, - IMPL_ORIGINAL = -2, - IMPL_CYCLE = -3, - IMPL_SCALAR = 0, -} zfs_vdev_raidz_impl = IMPL_SCALAR; - -/* selected implementation and its lock */ -static krwlock_t vdev_raidz_impl_lock; -static raidz_impl_ops_t *vdev_raidz_used_impl = - (raidz_impl_ops_t *) &vdev_raidz_scalar_impl; -static boolean_t vdev_raidz_impl_user_set = B_FALSE; - -/* RAIDZ op that contain the fastest routines */ -static raidz_impl_ops_t vdev_raidz_fastest_impl = { - .name = "fastest" -}; +#define IMPL_FASTEST (UINT32_MAX) +#define IMPL_CYCLE (UINT32_MAX - 1) +#define IMPL_ORIGINAL (0) +#define IMPL_SCALAR (1) + +#define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i)) + +static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR; +static uint32_t user_sel_impl = IMPL_FASTEST; /* Hold all supported implementations */ -size_t raidz_supp_impl_cnt = 1; -raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths) + 1] = { - (raidz_impl_ops_t *) &vdev_raidz_scalar_impl, /* scalar is supported */ - NULL -}; +static size_t raidz_supp_impl_cnt = 0; +static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)]; /* * kstats values for supported impl & original methods @@ -87,33 +90,52 @@ static kstat_t *raidz_math_kstat = NULL; * Selects the raidz operation for raidz_map * If rm_ops is set to NULL original raidz implementation will be used */ -void -vdev_raidz_math_get_ops(raidz_map_t *rm) +raidz_impl_ops_t * +vdev_raidz_math_get_ops() { - rw_enter(&vdev_raidz_impl_lock, RW_READER); - - rm->rm_ops = vdev_raidz_used_impl; - + raidz_impl_ops_t *ops = NULL; + const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl); + + switch (impl) { + case IMPL_FASTEST: + ASSERT(raidz_math_initialized); + ops = &vdev_raidz_fastest_impl; + break; #if !defined(_KERNEL) - if (zfs_vdev_raidz_impl == IMPL_CYCLE) { + case IMPL_CYCLE: + { + ASSERT(raidz_math_initialized); + ASSERT3U(raidz_supp_impl_cnt, >, 0); + /* Cycle through all supported implementations */ static size_t cycle_impl_idx = 0; - size_t idx; - /* - * Cycle through all supported new implementations, and - * when idx == raidz_supp_impl_cnt, use the original - */ - idx = (++cycle_impl_idx) % (raidz_supp_impl_cnt + 1); - rm->rm_ops = raidz_supp_impl[idx]; + size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt; + ops = raidz_supp_impl[idx]; } + break; #endif + case IMPL_ORIGINAL: + ops = (raidz_impl_ops_t *) &vdev_raidz_original_impl; + break; + case IMPL_SCALAR: + ops = (raidz_impl_ops_t *) &vdev_raidz_scalar_impl; + break; + default: + ASSERT(raidz_math_initialized); + ASSERT3U(impl, <, raidz_supp_impl_cnt); + ASSERT3U(raidz_supp_impl_cnt, >, 0); + ops = raidz_supp_impl[impl]; + break; + } - rw_exit(&vdev_raidz_impl_lock); + ASSERT3P(ops, !=, NULL); + + return (ops); } /* * Select parity generation method for raidz_map */ -void +int vdev_raidz_math_generate(raidz_map_t *rm) { raidz_gen_f gen_parity = NULL; @@ -135,13 +157,17 @@ vdev_raidz_math_generate(raidz_map_t *rm) break; } - ASSERT(gen_parity != NULL); + /* if method is NULL execute the original implementation */ + if (gen_parity == NULL) + return (RAIDZ_ORIGINAL_IMPL); gen_parity(rm); + + return (0); } static raidz_rec_f -_reconstruct_fun_raidz1(raidz_map_t *rm, const int *parity_valid, +reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid, const int nbaddata) { if (nbaddata == 1 && parity_valid[CODE_P]) { @@ -151,7 +177,7 @@ _reconstruct_fun_raidz1(raidz_map_t *rm, const int *parity_valid, } static raidz_rec_f -_reconstruct_fun_raidz2(raidz_map_t *rm, const int *parity_valid, +reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid, const int nbaddata) { if (nbaddata == 1) { @@ -168,7 +194,7 @@ _reconstruct_fun_raidz2(raidz_map_t *rm, const int *parity_valid, } static raidz_rec_f -_reconstruct_fun_raidz3(raidz_map_t *rm, const int *parity_valid, +reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid, const int nbaddata) { if (nbaddata == 1) { @@ -208,27 +234,25 @@ vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, raidz_rec_f rec_data = NULL; switch (raidz_parity(rm)) { - case 1: - rec_data = _reconstruct_fun_raidz1(rm, parity_valid, - nbaddata); - break; - case 2: - rec_data = _reconstruct_fun_raidz2(rm, parity_valid, - nbaddata); - break; - case 3: - rec_data = _reconstruct_fun_raidz3(rm, parity_valid, - nbaddata); - break; - default: - cmn_err(CE_PANIC, "invalid RAID-Z configuration %d", - raidz_parity(rm)); - break; + case PARITY_P: + rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata); + break; + case PARITY_PQ: + rec_data = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata); + break; + case PARITY_PQR: + rec_data = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata); + break; + default: + cmn_err(CE_PANIC, "invalid RAID-Z configuration %d", + raidz_parity(rm)); + break; } - ASSERT(rec_data != NULL); - - return (rec_data(rm, dt)); + if (rec_data == NULL) + return (RAIDZ_ORIGINAL_IMPL); + else + return (rec_data(rm, dt)); } const char *raidz_gen_name[] = { @@ -309,13 +333,10 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn) uint64_t run_cnt, speed, best_speed = 0; hrtime_t t_start, t_diff; raidz_impl_ops_t *curr_impl; + raidz_impl_kstat_t * fstat = &raidz_impl_kstats[raidz_supp_impl_cnt]; int impl, i; - /* - * Use the sentinel (NULL) from the end of raidz_supp_impl_cnt - * to run "original" implementation (bench_rm->rm_ops = NULL) - */ - for (impl = 0; impl <= raidz_supp_impl_cnt; impl++) { + for (impl = 0; impl < raidz_supp_impl_cnt; impl++) { /* set an implementation to benchmark */ curr_impl = raidz_supp_impl[impl]; bench_rm->rm_ops = curr_impl; @@ -338,16 +359,19 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn) else raidz_impl_kstats[impl].rec[fn].value.ui64 = speed; - /* if curr_impl==NULL the original impl is benchmarked */ - if (curr_impl != NULL && speed > best_speed) { + /* Update fastest implementation method */ + if (speed > best_speed) { best_speed = speed; - if (bench_fn == benchmark_gen_impl) + if (bench_fn == benchmark_gen_impl) { vdev_raidz_fastest_impl.gen[fn] = curr_impl->gen[fn]; - else + fstat->gen[fn].value.ui64 = speed; + } else { vdev_raidz_fastest_impl.rec[fn] = curr_impl->rec[fn]; + fstat->rec[fn].value.ui64 = speed; + } } } } @@ -361,9 +385,6 @@ vdev_raidz_math_init(void) uint64_t bench_parity; int i, c, fn; - /* init & vdev_raidz_impl_lock */ - rw_init(&vdev_raidz_impl_lock, NULL, RW_DEFAULT, NULL); - /* move supported impl into raidz_supp_impl */ for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { curr_impl = (raidz_impl_ops_t *) raidz_all_maths[i]; @@ -379,20 +400,16 @@ vdev_raidz_math_init(void) raidz_supp_impl[c++] = (raidz_impl_ops_t *) curr_impl; } } + membar_producer(); /* complete raidz_supp_impl[] init */ raidz_supp_impl_cnt = c; /* number of supported impl */ - raidz_supp_impl[c] = NULL; /* sentinel */ - /* init kstat for original routines */ - init_raidz_kstat(&(raidz_impl_kstats[raidz_supp_impl_cnt]), "original"); + init_raidz_kstat(&(raidz_impl_kstats[raidz_supp_impl_cnt]), "fastest"); #if !defined(_KERNEL) - /* - * Skip benchmarking and use last implementation as fastest - */ + /* Skip benchmarking and use last implementation as fastest */ memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1], sizeof (vdev_raidz_fastest_impl)); - - vdev_raidz_fastest_impl.name = "fastest"; + strcpy(vdev_raidz_fastest_impl.name, "fastest"); raidz_math_initialized = B_TRUE; @@ -407,12 +424,13 @@ vdev_raidz_math_init(void) bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */ bench_zio->io_data = zio_data_buf_alloc(BENCH_ZIO_SIZE); VERIFY(bench_zio->io_data); + memset(bench_zio->io_data, 0xAA, BENCH_ZIO_SIZE); /* warm up */ /* Benchmark parity generation methods */ for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { bench_parity = fn + 1; /* New raidz_map is needed for each generate_p/q/r */ - bench_rm = vdev_raidz_map_alloc(bench_zio, 9, + bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, BENCH_D_COLS + bench_parity, bench_parity); benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl); @@ -421,7 +439,8 @@ vdev_raidz_math_init(void) } /* Benchmark data reconstruction methods */ - bench_rm = vdev_raidz_map_alloc(bench_zio, 9, BENCH_COLS, PARITY_PQR); + bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, + BENCH_COLS, PARITY_PQR); for (fn = 0; fn < RAIDZ_REC_NUM; fn++) benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl); @@ -444,9 +463,8 @@ vdev_raidz_math_init(void) } /* Finish initialization */ + atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl); raidz_math_initialized = B_TRUE; - if (!vdev_raidz_impl_user_set) - VERIFY0(vdev_raidz_impl_set("fastest")); } void @@ -460,33 +478,33 @@ vdev_raidz_math_fini(void) raidz_math_kstat = NULL; } - rw_destroy(&vdev_raidz_impl_lock); - /* fini impl */ for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { curr_impl = raidz_all_maths[i]; - if (curr_impl->fini) curr_impl->fini(); } } -static const -struct { - char *name; - raidz_impl_ops_t *impl; - enum vdev_raidz_impl_sel sel; +static const struct { + char *name; + uint32_t sel; } math_impl_opts[] = { - { "fastest", &vdev_raidz_fastest_impl, IMPL_FASTEST }, - { "original", NULL, IMPL_ORIGINAL }, #if !defined(_KERNEL) - { "cycle", NULL, IMPL_CYCLE }, + { "cycle", IMPL_CYCLE }, #endif + { "fastest", IMPL_FASTEST }, + { "original", IMPL_ORIGINAL }, + { "scalar", IMPL_SCALAR } }; /* * Function sets desired raidz implementation. - * If called after module_init(), vdev_raidz_impl_lock must be held for writing. + * + * If we are called before init(), user preference will be saved in + * user_sel_impl, and applied in later init() call. This occurs when module + * parameter is specified on module load. Otherwise, directly update + * zfs_vdev_raidz_impl. * * @val Name of raidz implementation to use * @param Unused. @@ -494,42 +512,58 @@ struct { static int zfs_vdev_raidz_impl_set(const char *val, struct kernel_param *kp) { + int err = -EINVAL; + char req_name[RAIDZ_IMPL_NAME_MAX]; + uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl); size_t i; + /* sanitize input */ + i = strnlen(val, RAIDZ_IMPL_NAME_MAX); + if (i == 0 || i == RAIDZ_IMPL_NAME_MAX) + return (err); + + strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX); + while (i > 0 && !!isspace(req_name[i-1])) + i--; + req_name[i] = '\0'; + /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) { - if (strcmp(val, math_impl_opts[i].name) == 0) { - zfs_vdev_raidz_impl = math_impl_opts[i].sel; - vdev_raidz_used_impl = math_impl_opts[i].impl; - vdev_raidz_impl_user_set = B_TRUE; - return (0); + if (strcmp(req_name, math_impl_opts[i].name) == 0) { + impl = math_impl_opts[i].sel; + err = 0; + break; } } - /* check all supported implementations */ - for (i = 0; i < raidz_supp_impl_cnt; i++) { - if (strcmp(val, raidz_supp_impl[i]->name) == 0) { - zfs_vdev_raidz_impl = i; - vdev_raidz_used_impl = raidz_supp_impl[i]; - vdev_raidz_impl_user_set = B_TRUE; - return (0); + /* check all supported impl if init() was already called */ + if (err != 0 && raidz_math_initialized) { + /* check all supported implementations */ + for (i = 0; i < raidz_supp_impl_cnt; i++) { + if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) { + impl = i; + err = 0; + break; + } } } - return (-EINVAL); + if (err == 0) { + if (raidz_math_initialized) + atomic_swap_32(&zfs_vdev_raidz_impl, impl); + else + atomic_swap_32(&user_sel_impl, impl); + } + + return (err); } int vdev_raidz_impl_set(const char *val) { - int err; - ASSERT(raidz_math_initialized); - rw_enter(&vdev_raidz_impl_lock, RW_WRITER); - err = zfs_vdev_raidz_impl_set(val, NULL); - rw_exit(&vdev_raidz_impl_lock); - return (err); + return (zfs_vdev_raidz_impl_set(val, NULL)); } #if defined(_KERNEL) && defined(HAVE_SPL) @@ -538,29 +572,22 @@ zfs_vdev_raidz_impl_get(char *buffer, struct kernel_param *kp) { int i, cnt = 0; char *fmt; + const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl); ASSERT(raidz_math_initialized); - rw_enter(&vdev_raidz_impl_lock, RW_READER); - /* list mandatory options */ - for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) { - if (math_impl_opts[i].sel == zfs_vdev_raidz_impl) - fmt = "[%s] "; - else - fmt = "%s "; - + for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) { + fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name); } /* list all supported implementations */ for (i = 0; i < raidz_supp_impl_cnt; i++) { - fmt = (i == zfs_vdev_raidz_impl) ? "[%s] " : "%s "; + fmt = (i == impl) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name); } - rw_exit(&vdev_raidz_impl_lock); - return (cnt); } diff --git a/module/zfs/vdev_raidz_math_avx2.c b/module/zfs/vdev_raidz_math_avx2.c index 1df9c55b1..9ca1688c1 100644 --- a/module/zfs/vdev_raidz_math_avx2.c +++ b/module/zfs/vdev_raidz_math_avx2.c @@ -47,10 +47,10 @@ #define VR1(r...) VR1_(r) #define VR2(r...) VR2_(r, 1) #define VR3(r...) VR3_(r, 1, 2) -#define VR4(r...) VR4_(r, 1) -#define VR5(r...) VR5_(r, 1, 2) -#define VR6(r...) VR6_(r, 1, 2, 3) -#define VR7(r...) VR7_(r, 1, 2, 3, 4) +#define VR4(r...) VR4_(r, 1, 2) +#define VR5(r...) VR5_(r, 1, 2, 3) +#define VR6(r...) VR6_(r, 1, 2, 3, 4) +#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5) #define R_01(REG1, REG2, ...) REG1, REG2 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3 diff --git a/module/zfs/vdev_raidz_math_scalar.c b/module/zfs/vdev_raidz_math_scalar.c index 846b2e5e4..540cb9314 100644 --- a/module/zfs/vdev_raidz_math_scalar.c +++ b/module/zfs/vdev_raidz_math_scalar.c @@ -222,7 +222,7 @@ static const struct { DEFINE_GEN_METHODS(scalar); DEFINE_REC_METHODS(scalar); -static boolean_t +boolean_t raidz_will_scalar_work(void) { return (B_TRUE); /* always */ diff --git a/module/zfs/vdev_raidz_math_ssse3.c b/module/zfs/vdev_raidz_math_ssse3.c index 24b4dccc4..982e27efc 100644 --- a/module/zfs/vdev_raidz_math_ssse3.c +++ b/module/zfs/vdev_raidz_math_ssse3.c @@ -47,10 +47,10 @@ #define VR1(r...) VR1_(r) #define VR2(r...) VR2_(r, 1) #define VR3(r...) VR3_(r, 1, 2) -#define VR4(r...) VR4_(r, 1) -#define VR5(r...) VR5_(r, 1, 2) -#define VR6(r...) VR6_(r, 1, 2, 3) -#define VR7(r...) VR7_(r, 1, 2, 3, 4) +#define VR4(r...) VR4_(r, 1, 2) +#define VR5(r...) VR5_(r, 1, 2, 3) +#define VR6(r...) VR6_(r, 1, 2, 3, 4) +#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5) #define R_01(REG1, REG2, ...) REG1, REG2 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3 |