diff options
author | Tony Hutter <[email protected]> | 2017-05-19 12:30:16 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2017-05-19 12:30:16 -0700 |
commit | 4a283c7f77eb5065e9f03b122bf8ead4f4a1e2be (patch) | |
tree | 2a4daf72ae62343aa7f24305a592fc2f31bb2eeb /module | |
parent | a32df59e187a6187eb96c52b93cf02e8af51f327 (diff) |
Force fault a vdev with 'zpool offline -f'
This patch adds a '-f' option to 'zpool offline' to fault a vdev
instead of bringing it offline. Unlike the OFFLINE state, the
FAULTED state will trigger the FMA code, allowing for things like
autoreplace and triggering the slot fault LED. The -f faults
persist across imports, unless they were set with the temporary
(-t) flag. Both persistent and temporary faults can be cleared
with zpool clear.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Tony Hutter <[email protected]>
Closes #6094
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/spa_misc.c | 14 | ||||
-rw-r--r-- | module/zfs/vdev.c | 58 | ||||
-rw-r--r-- | module/zfs/vdev_label.c | 10 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 6 |
4 files changed, 76 insertions, 12 deletions
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 831f83b33..fb425e121 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1181,13 +1181,21 @@ int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) { boolean_t config_changed = B_FALSE; + vdev_t *vdev_top; + + if (vd == NULL || vd == spa->spa_root_vdev) { + vdev_top = spa->spa_root_vdev; + } else { + vdev_top = vd->vdev_top; + } if (vd != NULL || error == 0) - vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, - 0, 0, B_FALSE); + vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE); if (vd != NULL) { - vdev_state_dirty(vd->vdev_top); + if (vd != spa->spa_root_vdev) + vdev_state_dirty(vdev_top); + config_changed = B_TRUE; spa->spa_config_generation++; } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index f44d338ef..1bca227db 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -394,6 +394,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, char *type; uint64_t guid = 0, islog, nparity; vdev_t *vd; + char *tmp = NULL; + int rc; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); @@ -487,6 +489,19 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) vd->vdev_path = spa_strdup(vd->vdev_path); + + /* + * ZPOOL_CONFIG_AUX_STATE = "external" means we previously forced a + * fault on a vdev and want it to persist across imports (like with + * zpool offline -f). + */ + rc = nvlist_lookup_string(nv, ZPOOL_CONFIG_AUX_STATE, &tmp); + if (rc == 0 && tmp != NULL && strcmp(tmp, "external") == 0) { + vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL; + vd->vdev_faulted = 1; + vd->vdev_label_aux = VDEV_AUX_EXTERNAL; + } + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) vd->vdev_devid = spa_strdup(vd->vdev_devid); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, @@ -591,12 +606,17 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_resilver_txg); /* - * When importing a pool, we want to ignore the persistent fault - * state, as the diagnosis made on another system may not be - * valid in the current context. Local vdevs will - * remain in the faulted state. + * In general, when importing a pool we want to ignore the + * persistent fault state, as the diagnosis made on another + * system may not be valid in the current context. The only + * exception is if we forced a vdev to a persistently faulted + * state with 'zpool offline -f'. The persistent fault will + * remain across imports until cleared. + * + * Local vdevs will remain in the faulted state. */ - if (spa_load_state(spa) == SPA_LOAD_OPEN) { + if (spa_load_state(spa) == SPA_LOAD_OPEN || + spa_load_state(spa) == SPA_LOAD_IMPORT) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &vd->vdev_faulted); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, @@ -2479,6 +2499,32 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux) tvd = vd->vdev_top; /* + * If user did a 'zpool offline -f' then make the fault persist across + * reboots. + */ + if (aux == VDEV_AUX_EXTERNAL_PERSIST) { + /* + * There are two kinds of forced faults: temporary and + * persistent. Temporary faults go away at pool import, while + * persistent faults stay set. Both types of faults can be + * cleared with a zpool clear. + * + * We tell if a vdev is persistently faulted by looking at the + * ZPOOL_CONFIG_AUX_STATE nvpair. If it's set to "external" at + * import then it's a persistent fault. Otherwise, it's + * temporary. We get ZPOOL_CONFIG_AUX_STATE set to "external" + * by setting vd.vdev_stat.vs_aux to VDEV_AUX_EXTERNAL. This + * tells vdev_config_generate() (which gets run later) to set + * ZPOOL_CONFIG_AUX_STATE to "external" in the nvlist. + */ + vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL; + vd->vdev_tmpoffline = B_FALSE; + aux = VDEV_AUX_EXTERNAL; + } else { + vd->vdev_tmpoffline = B_TRUE; + } + + /* * We don't directly use the aux state here, but if we do a * vdev_reopen(), we need this value to be present to remember why we * were faulted. @@ -2753,7 +2799,6 @@ vdev_clear(spa_t *spa, vdev_t *vd) */ if (vd->vdev_faulted || vd->vdev_degraded || !vdev_readable(vd) || !vdev_writeable(vd)) { - /* * When reopening in response to a clear event, it may be due to * a fmadm repair request. In this case, if the device is @@ -2764,6 +2809,7 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_faulted = vd->vdev_degraded = 0ULL; vd->vdev_cant_read = B_FALSE; vd->vdev_cant_write = B_FALSE; + vd->vdev_stat.vs_aux = 0; vdev_reopen(vd == rvd ? rvd : vd->vdev_top); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 20c0ac86a..021f4774b 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -519,6 +519,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_ishole) fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE); + /* Set the reason why we're FAULTED/DEGRADED. */ switch (vd->vdev_stat.vs_aux) { case VDEV_AUX_ERR_EXCEEDED: aux = "err_exceeded"; @@ -529,8 +530,15 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, break; } - if (aux != NULL) + if (aux != NULL && !vd->vdev_tmpoffline) { fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux); + } else { + /* + * We're healthy - clear any previous AUX_STATE values. + */ + if (nvlist_exists(nv, ZPOOL_CONFIG_AUX_STATE)) + nvlist_remove_all(nv, ZPOOL_CONFIG_AUX_STATE); + } if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index f94bf3b1c..268e79714 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -156,6 +156,7 @@ #include <sys/spa.h> #include <sys/spa_impl.h> #include <sys/vdev.h> +#include <sys/vdev_impl.h> #include <sys/priv_impl.h> #include <sys/dmu.h> #include <sys/dsl_dir.h> @@ -1896,7 +1897,8 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc) case VDEV_STATE_FAULTED: if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED && - zc->zc_obj != VDEV_AUX_EXTERNAL) + zc->zc_obj != VDEV_AUX_EXTERNAL && + zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST) zc->zc_obj = VDEV_AUX_ERR_EXCEEDED; error = vdev_fault(spa, zc->zc_guid, zc->zc_obj); @@ -4919,7 +4921,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) vdev_clear(spa, vd); - (void) spa_vdev_state_exit(spa, NULL, 0); + (void) spa_vdev_state_exit(spa, spa->spa_root_vdev, 0); /* * Resume any suspended I/Os. |