summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2020-07-03 11:05:50 -0700
committerGitHub <[email protected]>2020-07-03 11:05:50 -0700
commit9a49d3f3d3bfa26df4e5e54d574cb490f0ee284b (patch)
tree715c2fa00e55762764cadef8460da09f919910ad /lib
parent7ddb753d17f2c12f152647c0e34eb9c42ee5e4af (diff)
Add device rebuild feature
The device_rebuild feature enables sequential reconstruction when resilvering. Mirror vdevs can be rebuilt in LBA order which may more quickly restore redundancy depending on the pools average block size, overall fragmentation and the performance characteristics of the devices. However, block checksums cannot be verified as part of the rebuild thus a scrub is automatically started after the sequential resilver completes. The new '-s' option has been added to the `zpool attach` and `zpool replace` command to request sequential reconstruction instead of healing reconstruction when resilvering. zpool attach -s <pool> <existing vdev> <new vdev> zpool replace -s <pool> <old vdev> <new vdev> The `zpool status` output has been updated to report the progress of sequential resilvering in the same way as healing resilvering. The one notable difference is that multiple sequential resilvers may be in progress as long as they're operating on different top-level vdevs. The `zpool wait -t resilver` command was extended to wait on sequential resilvers. From this perspective they are no different than healing resilvers. Sequential resilvers cannot be supported for RAIDZ, but are compatible with the dRAID feature being developed. As part of this change the resilver_restart_* tests were moved in to the functional/replacement directory. Additionally, the replacement tests were renamed and extended to verify both resilvering and rebuilding. Original-patch-by: Isaac Huang <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: John Poduska <[email protected]> Co-authored-by: Mark Maybee <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #10349
Diffstat (limited to 'lib')
-rw-r--r--lib/libzfs/libzfs_pool.c26
-rw-r--r--lib/libzfs/libzfs_status.c47
-rw-r--r--lib/libzfs/libzfs_util.c9
-rw-r--r--lib/libzpool/Makefile.am1
4 files changed, 76 insertions, 7 deletions
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 11b3d4cd9..f848cb3cf 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -2446,7 +2446,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
- if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
+ if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
+ ps->pss_state == DSS_SCANNING) {
if (cmd == POOL_SCRUB_PAUSE)
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
else
@@ -3128,8 +3129,8 @@ is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
* If 'replacing' is specified, the new disk will replace the old one.
*/
int
-zpool_vdev_attach(zpool_handle_t *zhp,
- const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
+zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
+ const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
{
zfs_cmd_t zc = {"\0"};
char msg[1024];
@@ -3164,6 +3165,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
zc.zc_cookie = replacing;
+ zc.zc_simple = rebuild;
+
+ if (rebuild &&
+ zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "the loaded zfs module doesn't support device rebuilds"));
+ return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+ }
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0 || children != 1) {
@@ -3224,16 +3233,21 @@ zpool_vdev_attach(zpool_handle_t *zhp,
uint64_t version = zpool_get_prop_int(zhp,
ZPOOL_PROP_VERSION, NULL);
- if (islog)
+ if (islog) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a log with a spare"));
- else if (version >= SPA_VERSION_MULTI_REPLACE)
+ } else if (rebuild) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "only mirror vdevs support sequential "
+ "reconstruction"));
+ } else if (version >= SPA_VERSION_MULTI_REPLACE) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"already in replacing/spare config; wait "
"for completion or use 'zpool detach'"));
- else
+ } else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a replacing device"));
+ }
} else {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"can only attach to mirrors and top-level "
diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
index ebf497db6..67b8ea33e 100644
--- a/lib/libzfs/libzfs_status.c
+++ b/lib/libzfs/libzfs_status.c
@@ -84,6 +84,8 @@ static char *zfs_msgid_table[] = {
* ZPOOL_STATUS_RESILVERING
* ZPOOL_STATUS_OFFLINE_DEV
* ZPOOL_STATUS_REMOVED_DEV
+ * ZPOOL_STATUS_REBUILDING
+ * ZPOOL_STATUS_REBUILD_SCRUB
* ZPOOL_STATUS_OK
*/
};
@@ -195,7 +197,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
* - Check for any data errors
* - Check for any faulted or missing devices in a replicated config
* - Look for any devices showing errors
- * - Check for any resilvering devices
+ * - Check for any resilvering or rebuilding devices
*
* There can obviously be multiple errors within a single pool, so this routine
* only picks the most damaging of all the current errors to report.
@@ -234,6 +236,49 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
return (ZPOOL_STATUS_RESILVERING);
/*
+ * Currently rebuilding a vdev, check top-level vdevs.
+ */
+ vdev_rebuild_stat_t *vrs = NULL;
+ nvlist_t **child;
+ uint_t c, i, children;
+ uint64_t rebuild_end_time = 0;
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if ((nvlist_lookup_uint64_array(child[c],
+ ZPOOL_CONFIG_REBUILD_STATS,
+ (uint64_t **)&vrs, &i) == 0) && (vrs != NULL)) {
+ uint64_t state = vrs->vrs_state;
+
+ if (state == VDEV_REBUILD_ACTIVE) {
+ return (ZPOOL_STATUS_REBUILDING);
+ } else if (state == VDEV_REBUILD_COMPLETE &&
+ vrs->vrs_end_time > rebuild_end_time) {
+ rebuild_end_time = vrs->vrs_end_time;
+ }
+ }
+ }
+
+ /*
+ * If we can determine when the last scrub was run, and it
+ * was before the last rebuild completed, then recommend
+ * that the pool be scrubbed to verify all checksums. When
+ * ps is NULL we can infer the pool has never been scrubbed.
+ */
+ if (rebuild_end_time > 0) {
+ if (ps != NULL) {
+ if ((ps->pss_state == DSS_FINISHED &&
+ ps->pss_func == POOL_SCAN_SCRUB &&
+ rebuild_end_time > ps->pss_end_time) ||
+ ps->pss_state == DSS_NONE)
+ return (ZPOOL_STATUS_REBUILD_SCRUB);
+ } else {
+ return (ZPOOL_STATUS_REBUILD_SCRUB);
+ }
+ }
+ }
+
+ /*
* The multihost property is set and the pool may be active.
*/
if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 21bd8289c..2f4aaed32 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -286,6 +286,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
"resilver_defer feature"));
case EZFS_EXPORT_IN_PROGRESS:
return (dgettext(TEXT_DOMAIN, "pool export in progress"));
+ case EZFS_REBUILDING:
+ return (dgettext(TEXT_DOMAIN, "currently sequentially "
+ "resilvering"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -693,6 +696,12 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_EXPORT_IN_PROGRESS:
zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap);
break;
+ case ZFS_ERR_RESILVER_IN_PROGRESS:
+ zfs_verror(hdl, EZFS_RESILVERING, fmt, ap);
+ break;
+ case ZFS_ERR_REBUILD_IN_PROGRESS:
+ zfs_verror(hdl, EZFS_REBUILDING, fmt, ap);
+ break;
case ZFS_ERR_IOC_CMD_UNAVAIL:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
"module does not support this operation. A reboot may "
diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am
index 46befa7d4..06b89fe0a 100644
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@@ -132,6 +132,7 @@ KERNEL_C = \
vdev_raidz_math_sse2.c \
vdev_raidz_math_ssse3.c \
vdev_raidz_math_powerpc_altivec.c \
+ vdev_rebuild.c \
vdev_removal.c \
vdev_root.c \
vdev_trim.c \