aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2018-10-19 00:06:18 -0400
committerBrian Behlendorf <[email protected]>2018-10-18 21:06:18 -0700
commit80a91e7469669e2a5da5873b8f09a752f7869062 (patch)
treeef5a4462892becccb939b2cd42a54ed580f5894f /cmd
parent9f438c5f948c0072f16431407a373ead34fabf6e (diff)
Defer new resilvers until the current one ends
Currently, if a resilver is triggered for any reason while an existing one is running, zfs will immediately restart the existing resilver from the beginning to include the new drive. This causes problems for system administrators when a drive fails while another is already resilvering. In this case, the optimal thing to do to reduce risk of data loss is to wait for the current resilver to end before immediately replacing the second failed drive, which allows the system to operate with two incomplete drives for the minimum amount of time. This patch introduces the resilver_defer feature that essentially does this for the admin without forcing them to wait and monitor the resilver manually. The change requires an on-disk feature since we must mark drives that are part of a deferred resilver in the vdev config to ensure that we do not assume they are done resilvering when an existing resilver completes. Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: John Kennedy <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: @mmaybee Signed-off-by: Tom Caputi <[email protected]> Closes #7732
Diffstat (limited to 'cmd')
-rw-r--r--cmd/zpool/zpool_main.c58
1 files changed, 52 insertions, 6 deletions
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 5af626558..4845956e5 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -97,6 +97,7 @@ static int zpool_do_replace(int, char **);
static int zpool_do_split(int, char **);
static int zpool_do_scrub(int, char **);
+static int zpool_do_resilver(int, char **);
static int zpool_do_import(int, char **);
static int zpool_do_export(int, char **);
@@ -149,6 +150,7 @@ typedef enum {
HELP_REPLACE,
HELP_REMOVE,
HELP_SCRUB,
+ HELP_RESILVER,
HELP_STATUS,
HELP_UPGRADE,
HELP_EVENTS,
@@ -276,6 +278,7 @@ static zpool_command_t command_table[] = {
{ "split", zpool_do_split, HELP_SPLIT },
{ NULL },
{ "scrub", zpool_do_scrub, HELP_SCRUB },
+ { "resilver", zpool_do_resilver, HELP_RESILVER },
{ NULL },
{ "import", zpool_do_import, HELP_IMPORT },
{ "export", zpool_do_export, HELP_EXPORT },
@@ -358,6 +361,8 @@ get_usage(zpool_help_t idx)
return (gettext("\treopen [-n] <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
+ case HELP_RESILVER:
+ return (gettext("\tresilver <pool> ...\n"));
case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]"
"[-T d|u] [pool] ... \n"
@@ -1874,11 +1879,14 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
(void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &c);
- if (ps != NULL && ps->pss_state == DSS_SCANNING &&
- vs->vs_scan_processed != 0 && children == 0) {
- (void) printf(gettext(" (%s)"),
- (ps->pss_func == POOL_SCAN_RESILVER) ?
- "resilvering" : "repairing");
+ if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0) {
+ if (vs->vs_scan_processed != 0) {
+ (void) printf(gettext(" (%s)"),
+ (ps->pss_func == POOL_SCAN_RESILVER) ?
+ "resilvering" : "repairing");
+ } else if (vs->vs_resilver_deferred) {
+ (void) printf(gettext(" (awaiting resilver)"));
+ }
}
if (cb->vcdl != NULL) {
@@ -6251,7 +6259,7 @@ scrub_callback(zpool_handle_t *zhp, void *data)
* Ignore faulted pools.
*/
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
- (void) fprintf(stderr, gettext("cannot scrub '%s': pool is "
+ (void) fprintf(stderr, gettext("cannot scan '%s': pool is "
"currently unavailable\n"), zpool_get_name(zhp));
return (1);
}
@@ -6320,6 +6328,44 @@ zpool_do_scrub(int argc, char **argv)
}
/*
+ * zpool resilver <pool> ...
+ *
+ * Restarts any in-progress resilver
+ */
+int
+zpool_do_resilver(int argc, char **argv)
+{
+ int c;
+ scrub_cbdata_t cb;
+
+ cb.cb_type = POOL_SCAN_RESILVER;
+ cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
+ cb.cb_argc = argc;
+ cb.cb_argv = argv;
+
+ /* check options */
+ while ((c = getopt(argc, argv, "")) != -1) {
+ switch (c) {
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("missing pool name argument\n"));
+ usage(B_FALSE);
+ }
+
+ return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
+}
+
+
+/*
* Print out detailed scrub status.
*/
static void