diff options
author | George Amanakis <[email protected]> | 2021-12-17 21:35:28 +0100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2023-05-18 11:59:42 -0700 |
commit | 482eeef804f0f325faddb102f112c0f1ec86a1b6 (patch) | |
tree | f5b052e7fed06ad527285841ec6de2d7503d39ed /cmd | |
parent | e34e15ed6d1882d29e314321b7642305d99f1b78 (diff) |
Teach zpool scrub to scrub only blocks in error log
Added a flag '-e' in zpool scrub to scrub only blocks in error log. A
user can pause, resume and cancel the error scrub by passing additional
command line arguments -p -s just like a regular scrub. This involves
adding a new flag, creating new libzfs interfaces, a new ioctl, and the
actual iteration and read-issuing logic. Error scrubbing is executed in
multiple txg to make sure pool performance is not affected.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Tony Hutter <[email protected]>
Co-authored-by: TulsiJain [email protected]
Signed-off-by: George Amanakis <[email protected]>
Closes #8995
Closes #12355
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zpool/zpool_main.c | 111 |
1 files changed, 101 insertions, 10 deletions
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 3e08e0314..013dd4a23 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -401,7 +401,7 @@ get_usage(zpool_help_t idx) return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> " "[<device> ...]\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n")); + return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n")); case HELP_RESILVER: return (gettext("\tresilver <pool> ...\n")); case HELP_TRIM: @@ -7309,8 +7309,9 @@ wait_callback(zpool_handle_t *zhp, void *data) } /* - * zpool scrub [-s | -p] [-w] <pool> ... + * zpool scrub [-s | -p] [-w] [-e] <pool> ... * + * -e Only scrub blocks in the error log. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. @@ -7326,14 +7327,21 @@ zpool_do_scrub(int argc, char **argv) cb.cb_type = POOL_SCAN_SCRUB; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + boolean_t is_error_scrub = B_FALSE; + boolean_t is_pause = B_FALSE; + boolean_t is_stop = B_FALSE; + /* check options */ - while ((c = getopt(argc, argv, "spw")) != -1) { + while ((c = getopt(argc, argv, "spwe")) != -1) { switch (c) { + case 'e': + is_error_scrub = B_TRUE; + break; case 's': - cb.cb_type = POOL_SCAN_NONE; + is_stop = B_TRUE; break; case 'p': - cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + is_pause = B_TRUE; break; case 'w': wait = B_TRUE; @@ -7345,11 +7353,21 @@ zpool_do_scrub(int argc, char **argv) } } - if (cb.cb_type == POOL_SCAN_NONE && - cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) { - (void) fprintf(stderr, gettext("invalid option combination: " - "-s and -p are mutually exclusive\n")); + if (is_pause && is_stop) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-s and -p are mutually exclusive\n")); usage(B_FALSE); + } else { + if (is_error_scrub) + cb.cb_type = POOL_SCAN_ERRORSCRUB; + + if (is_pause) { + cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + } else if (is_stop) { + cb.cb_type = POOL_SCAN_NONE; + } else { + cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + } } if (wait && (cb.cb_type == POOL_SCAN_NONE || @@ -7574,6 +7592,70 @@ secs_to_dhms(uint64_t total, char *buf) } /* + * Print out detailed error scrub status. + */ +static void +print_err_scrub_status(pool_scan_stat_t *ps) +{ + time_t start, end, pause; + uint64_t total_secs_left; + uint64_t secs_left, mins_left, hours_left, days_left; + uint64_t examined, to_be_examined; + + if (ps == NULL || ps->pss_error_scrub_func != POOL_SCAN_ERRORSCRUB) { + return; + } + + (void) printf(gettext(" scrub: ")); + + start = ps->pss_error_scrub_start; + end = ps->pss_error_scrub_end; + pause = ps->pss_pass_error_scrub_pause; + examined = ps->pss_error_scrub_examined; + to_be_examined = ps->pss_error_scrub_to_be_examined; + + assert(ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB); + + if (ps->pss_error_scrub_state == DSS_FINISHED) { + total_secs_left = end - start; + days_left = total_secs_left / 60 / 60 / 24; + hours_left = (total_secs_left / 60 / 60) % 24; + mins_left = (total_secs_left / 60) % 60; + secs_left = (total_secs_left % 60); + + (void) printf(gettext("scrubbed %llu error blocks in %llu days " + "%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined, + (u_longlong_t)days_left, (u_longlong_t)hours_left, + (u_longlong_t)mins_left, (u_longlong_t)secs_left, + ctime(&end)); + + return; + } else if (ps->pss_error_scrub_state == DSS_CANCELED) { + (void) printf(gettext("error scrub canceled on %s"), + ctime(&end)); + return; + } + assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING); + + /* Error scrub is in progress. */ + if (pause == 0) { + (void) printf(gettext("error scrub in progress since %s"), + ctime(&start)); + } else { + (void) printf(gettext("error scrub paused since %s"), + ctime(&pause)); + (void) printf(gettext("\terror scrub started on %s"), + ctime(&start)); + } + + double fraction_done = (double)examined / (to_be_examined + examined); + (void) printf(gettext("\t%.2f%% done, issued I/O for %llu error" + " blocks"), 100 * fraction_done, (u_longlong_t)examined); + + (void) printf("\n"); +} + +/* * Print out detailed scrub status. */ static void @@ -7909,10 +7991,12 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot) { uint64_t rebuild_end_time = 0, resilver_end_time = 0; boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE; + boolean_t have_errorscrub = B_FALSE; boolean_t active_resilver = B_FALSE; pool_checkpoint_stat_t *pcs = NULL; pool_scan_stat_t *ps = NULL; uint_t c; + time_t scrub_start = 0, errorscrub_start = 0; if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c) == 0) { @@ -7921,16 +8005,23 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot) active_resilver = (ps->pss_state == DSS_SCANNING); } + have_resilver = (ps->pss_func == POOL_SCAN_RESILVER); have_scrub = (ps->pss_func == POOL_SCAN_SCRUB); + scrub_start = ps->pss_start_time; + have_errorscrub = (ps->pss_error_scrub_func == + POOL_SCAN_ERRORSCRUB); + errorscrub_start = ps->pss_error_scrub_start; } boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time); boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0)); /* Always print the scrub status when available. */ - if (have_scrub) + if (have_scrub && scrub_start > errorscrub_start) print_scan_scrub_resilver_status(ps); + else if (have_errorscrub && errorscrub_start >= scrub_start) + print_err_scrub_status(ps); /* * When there is an active resilver or rebuild print its status. |