diff options
author | Tom Caputi <[email protected]> | 2018-10-19 00:06:18 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-10-18 21:06:18 -0700 |
commit | 80a91e7469669e2a5da5873b8f09a752f7869062 (patch) | |
tree | ef5a4462892becccb939b2cd42a54ed580f5894f /module/zfs/vdev.c | |
parent | 9f438c5f948c0072f16431407a373ead34fabf6e (diff) |
Defer new resilvers until the current one ends
Currently, if a resilver is triggered for any reason while an
existing one is running, zfs will immediately restart the existing
resilver from the beginning to include the new drive. This causes
problems for system administrators when a drive fails while another
is already resilvering. In this case, the optimal thing to do to
reduce risk of data loss is to wait for the current resilver to end
before immediately replacing the second failed drive, which allows
the system to operate with two incomplete drives for the minimum
amount of time.
This patch introduces the resilver_defer feature that essentially
does this for the admin without forcing them to wait and monitor
the resilver manually. The change requires an on-disk feature
since we must mark drives that are part of a deferred resilver in
the vdev config to ensure that we do not assume they are done
resilvering when an existing resilver completes.
Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: @mmaybee
Signed-off-by: Tom Caputi <[email protected]>
Closes #7732
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r-- | module/zfs/vdev.c | 35 |
1 files changed, 31 insertions, 4 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 2c95626c4..1521acc40 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -790,6 +790,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, &vd->vdev_resilver_txg); + if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER)) + vdev_set_deferred_resilver(spa, vd); + /* * In general, when importing a pool we want to ignore the * persistent fault state, as the diagnosis made on another @@ -1798,8 +1801,13 @@ vdev_open(vdev_t *vd) * since this would just restart the scrub we are already doing. */ if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && - vdev_resilver_needed(vd, NULL, NULL)) - spa_async_request(spa, SPA_ASYNC_RESILVER); + vdev_resilver_needed(vd, NULL, NULL)) { + if (dsl_scan_resilvering(spa->spa_dsl_pool) && + spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_set_deferred_resilver(spa, vd); + else + spa_async_request(spa, SPA_ASYNC_RESILVER); + } return (0); } @@ -2488,6 +2496,9 @@ vdev_dtl_should_excise(vdev_t *vd) if (vd->vdev_state < VDEV_STATE_DEGRADED) return (B_FALSE); + if (vd->vdev_resilver_deferred) + return (B_FALSE); + if (vd->vdev_resilver_txg == 0 || range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) return (B_TRUE); @@ -3618,8 +3629,14 @@ vdev_clear(spa_t *spa, vdev_t *vd) if (vd != rvd && vdev_writeable(vd->vdev_top)) vdev_state_dirty(vd->vdev_top); - if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) - spa_async_request(spa, SPA_ASYNC_RESILVER); + if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) { + if (dsl_scan_resilvering(spa->spa_dsl_pool) && + spa_feature_is_enabled(spa, + SPA_FEATURE_RESILVER_DEFER)) + vdev_set_deferred_resilver(spa, vd); + else + spa_async_request(spa, SPA_ASYNC_RESILVER); + } spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR); } @@ -3840,6 +3857,8 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) vs->vs_fragmentation = (vd->vdev_mg != NULL) ? vd->vdev_mg->mg_fragmentation : 0; } + if (vd->vdev_ops->vdev_op_leaf) + vs->vs_resilver_deferred = vd->vdev_resilver_deferred; } ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0); @@ -4578,6 +4597,14 @@ vdev_deadman(vdev_t *vd, char *tag) } } +void +vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd) +{ + ASSERT(vd->vdev_ops->vdev_op_leaf); + vd->vdev_resilver_deferred = B_TRUE; + spa->spa_resilver_deferred = B_TRUE; +} + #if defined(_KERNEL) EXPORT_SYMBOL(vdev_fault); EXPORT_SYMBOL(vdev_degrade); |