aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorGeorge Amanakis <[email protected]>2021-12-17 21:35:28 +0100
committerBrian Behlendorf <[email protected]>2023-05-18 11:59:42 -0700
commit482eeef804f0f325faddb102f112c0f1ec86a1b6 (patch)
treef5b052e7fed06ad527285841ec6de2d7503d39ed /cmd
parente34e15ed6d1882d29e314321b7642305d99f1b78 (diff)
Teach zpool scrub to scrub only blocks in error log
Added a flag '-e' in zpool scrub to scrub only blocks in error log. A user can pause, resume and cancel the error scrub by passing additional command line arguments -p -s just like a regular scrub. This involves adding a new flag, creating new libzfs interfaces, a new ioctl, and the actual iteration and read-issuing logic. Error scrubbing is executed in multiple txg to make sure pool performance is not affected. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Co-authored-by: TulsiJain [email protected] Signed-off-by: George Amanakis <[email protected]> Closes #8995 Closes #12355
Diffstat (limited to 'cmd')
-rw-r--r--cmd/zpool/zpool_main.c111
1 files changed, 101 insertions, 10 deletions
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 3e08e0314..013dd4a23 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -401,7 +401,7 @@ get_usage(zpool_help_t idx)
return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
"[<device> ...]\n"));
case HELP_SCRUB:
- return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
+ return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
@@ -7309,8 +7309,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
}
/*
- * zpool scrub [-s | -p] [-w] <pool> ...
+ * zpool scrub [-s | -p] [-w] [-e] <pool> ...
*
+ * -e Only scrub blocks in the error log.
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
* -w Wait. Blocks until scrub has completed.
@@ -7326,14 +7327,21 @@ zpool_do_scrub(int argc, char **argv)
cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
+ boolean_t is_error_scrub = B_FALSE;
+ boolean_t is_pause = B_FALSE;
+ boolean_t is_stop = B_FALSE;
+
/* check options */
- while ((c = getopt(argc, argv, "spw")) != -1) {
+ while ((c = getopt(argc, argv, "spwe")) != -1) {
switch (c) {
+ case 'e':
+ is_error_scrub = B_TRUE;
+ break;
case 's':
- cb.cb_type = POOL_SCAN_NONE;
+ is_stop = B_TRUE;
break;
case 'p':
- cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
+ is_pause = B_TRUE;
break;
case 'w':
wait = B_TRUE;
@@ -7345,11 +7353,21 @@ zpool_do_scrub(int argc, char **argv)
}
}
- if (cb.cb_type == POOL_SCAN_NONE &&
- cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
- (void) fprintf(stderr, gettext("invalid option combination: "
- "-s and -p are mutually exclusive\n"));
+ if (is_pause && is_stop) {
+ (void) fprintf(stderr, gettext("invalid option "
+ "combination :-s and -p are mutually exclusive\n"));
usage(B_FALSE);
+ } else {
+ if (is_error_scrub)
+ cb.cb_type = POOL_SCAN_ERRORSCRUB;
+
+ if (is_pause) {
+ cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
+ } else if (is_stop) {
+ cb.cb_type = POOL_SCAN_NONE;
+ } else {
+ cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
+ }
}
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
@@ -7574,6 +7592,70 @@ secs_to_dhms(uint64_t total, char *buf)
}
/*
+ * Print out detailed error scrub status.
+ */
+static void
+print_err_scrub_status(pool_scan_stat_t *ps)
+{
+ time_t start, end, pause;
+ uint64_t total_secs_left;
+ uint64_t secs_left, mins_left, hours_left, days_left;
+ uint64_t examined, to_be_examined;
+
+ if (ps == NULL || ps->pss_error_scrub_func != POOL_SCAN_ERRORSCRUB) {
+ return;
+ }
+
+ (void) printf(gettext(" scrub: "));
+
+ start = ps->pss_error_scrub_start;
+ end = ps->pss_error_scrub_end;
+ pause = ps->pss_pass_error_scrub_pause;
+ examined = ps->pss_error_scrub_examined;
+ to_be_examined = ps->pss_error_scrub_to_be_examined;
+
+ assert(ps->pss_error_scrub_func == POOL_SCAN_ERRORSCRUB);
+
+ if (ps->pss_error_scrub_state == DSS_FINISHED) {
+ total_secs_left = end - start;
+ days_left = total_secs_left / 60 / 60 / 24;
+ hours_left = (total_secs_left / 60 / 60) % 24;
+ mins_left = (total_secs_left / 60) % 60;
+ secs_left = (total_secs_left % 60);
+
+ (void) printf(gettext("scrubbed %llu error blocks in %llu days "
+ "%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
+ (u_longlong_t)days_left, (u_longlong_t)hours_left,
+ (u_longlong_t)mins_left, (u_longlong_t)secs_left,
+ ctime(&end));
+
+ return;
+ } else if (ps->pss_error_scrub_state == DSS_CANCELED) {
+ (void) printf(gettext("error scrub canceled on %s"),
+ ctime(&end));
+ return;
+ }
+ assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);
+
+ /* Error scrub is in progress. */
+ if (pause == 0) {
+ (void) printf(gettext("error scrub in progress since %s"),
+ ctime(&start));
+ } else {
+ (void) printf(gettext("error scrub paused since %s"),
+ ctime(&pause));
+ (void) printf(gettext("\terror scrub started on %s"),
+ ctime(&start));
+ }
+
+ double fraction_done = (double)examined / (to_be_examined + examined);
+ (void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
+ " blocks"), 100 * fraction_done, (u_longlong_t)examined);
+
+ (void) printf("\n");
+}
+
+/*
* Print out detailed scrub status.
*/
static void
@@ -7909,10 +7991,12 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
{
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
+ boolean_t have_errorscrub = B_FALSE;
boolean_t active_resilver = B_FALSE;
pool_checkpoint_stat_t *pcs = NULL;
pool_scan_stat_t *ps = NULL;
uint_t c;
+ time_t scrub_start = 0, errorscrub_start = 0;
if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &c) == 0) {
@@ -7921,16 +8005,23 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
active_resilver = (ps->pss_state == DSS_SCANNING);
}
+
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
+ scrub_start = ps->pss_start_time;
+ have_errorscrub = (ps->pss_error_scrub_func ==
+ POOL_SCAN_ERRORSCRUB);
+ errorscrub_start = ps->pss_error_scrub_start;
}
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
boolean_t have_rebuild = (active_rebuild || (rebuild_end_time > 0));
/* Always print the scrub status when available. */
- if (have_scrub)
+ if (have_scrub && scrub_start > errorscrub_start)
print_scan_scrub_resilver_status(ps);
+ else if (have_errorscrub && errorscrub_start >= scrub_start)
+ print_err_scrub_status(ps);
/*
* When there is an active resilver or rebuild print its status.