aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dsl_scan.c103
-rw-r--r--module/zfs/spa.c16
-rw-r--r--module/zfs/vdev.c35
-rw-r--r--module/zfs/vdev_label.c6
4 files changed, 150 insertions, 10 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index b84c2aa45..aff99f275 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -175,6 +175,8 @@ enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
/* max number of blocks to free in a single TXG */
unsigned long zfs_async_block_max_blocks = 100000;
+int zfs_resilver_disable_defer = 0; /* set to disable resilver deferring */
+
/*
* We wait a few txgs after importing a pool to begin scanning so that
* the import / mounting code isn't held up by scrub / resilver IO.
@@ -720,6 +722,11 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
+ if (func == POOL_SCAN_RESILVER) {
+ dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ return (0);
+ }
+
if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
/* got scrub start cmd, resume paused scrub */
int err = dsl_scrub_set_pause_resume(scn->scn_dp,
@@ -736,6 +743,41 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
+/*
+ * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+static boolean_t
+dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+ boolean_t resilver_needed = B_FALSE;
+ spa_t *spa = vd->vdev_spa;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ resilver_needed |=
+ dsl_scan_clear_deferred(vd->vdev_child[c], tx);
+ }
+
+ if (vd == spa->spa_root_vdev &&
+ spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+ vdev_config_dirty(vd);
+ spa->spa_resilver_deferred = B_FALSE;
+ return (resilver_needed);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (resilver_needed);
+
+ if (vd->vdev_resilver_deferred)
+ vd->vdev_resilver_deferred = B_FALSE;
+
+ return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+ vdev_resilver_needed(vd, NULL, NULL));
+}
+
/* ARGSUSED */
static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@@ -835,6 +877,25 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
* Let the async thread assess this and handle the detach.
*/
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
+
+ /*
+ * Clear any deferred_resilver flags in the config.
+ * If there are drives that need resilvering, kick
+ * off an asynchronous request to start resilver.
+ * dsl_scan_clear_deferred() may update the config
+ * before the resilver can restart. In the event of
+ * a crash during this period, the spa loading code
+ * will find the drives that need to be resilvered
+ * when the machine reboots and start the resilver then.
+ */
+ boolean_t resilver_needed =
+ dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
+ if (resilver_needed) {
+ spa_history_log_internal(spa,
+ "starting deferred resilver", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
}
scn->scn_phys.scn_end_time = gethrestime_sec();
@@ -2967,6 +3028,26 @@ dsl_scan_active(dsl_scan_t *scn)
}
static boolean_t
+dsl_scan_check_deferred(vdev_t *vd)
+{
+ boolean_t need_resilver = B_FALSE;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ need_resilver |=
+ dsl_scan_check_deferred(vd->vdev_child[c]);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (need_resilver);
+
+ if (!vd->vdev_resilver_deferred)
+ need_resilver = B_TRUE;
+
+ return (need_resilver);
+}
+
+static boolean_t
dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
uint64_t phys_birth)
{
@@ -3013,6 +3094,13 @@ dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
return (B_FALSE);
+ /*
+ * Check that this top-level vdev has a device under it which
+ * is resilvering and is not deferred.
+ */
+ if (!dsl_scan_check_deferred(vd))
+ return (B_FALSE);
+
return (B_TRUE);
}
@@ -3173,12 +3261,19 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
spa_t *spa = dp->dp_spa;
state_sync_type_t sync_type = SYNC_OPTIONAL;
+ if (spa->spa_resilver_deferred &&
+ !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
+ spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+
/*
* Check for scn_restart_txg before checking spa_load_state, so
* that we can restart an old-style scan while the pool is being
- * imported (see dsl_scan_init).
+ * imported (see dsl_scan_init). We also restart scans if there
+ * is a deferred resilver and the user has manually disabled
+ * deferred resilvers via the tunable.
*/
- if (dsl_scan_restarting(scn, tx)) {
+ if (dsl_scan_restarting(scn, tx) ||
+ (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
@@ -4000,4 +4095,8 @@ MODULE_PARM_DESC(zfs_scan_strict_mem_lim,
module_param(zfs_scan_fill_weight, int, 0644);
MODULE_PARM_DESC(zfs_scan_fill_weight,
"Tunable to adjust bias towards more filled segments during scans");
+
+module_param(zfs_resilver_disable_defer, int, 0644);
+MODULE_PARM_DESC(zfs_resilver_disable_defer,
+ "Process all resilvers immediately");
#endif
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index fdce49c40..3785981b7 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -6059,9 +6059,14 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
/*
* Schedule the resilver to restart in the future. We do this to
* ensure that dmu_sync-ed blocks have been stitched into the
- * respective datasets.
+ * respective datasets. We do not do this if resilvers have been
+ * deferred.
*/
- dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+ if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, newvd);
+ else
+ dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -6933,6 +6938,7 @@ static void
spa_async_thread(void *arg)
{
spa_t *spa = (spa_t *)arg;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
int tasks;
ASSERT(spa->spa_sync_on);
@@ -7008,8 +7014,10 @@ spa_async_thread(void *arg)
/*
* Kick off a resilver.
*/
- if (tasks & SPA_ASYNC_RESILVER)
- dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ if (tasks & SPA_ASYNC_RESILVER &&
+ (!dsl_scan_resilvering(dp) ||
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
+ dsl_resilver_restart(dp, 0);
/*
* Let the world know that we're done.
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 2c95626c4..1521acc40 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -790,6 +790,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
&vd->vdev_resilver_txg);
+ if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, vd);
+
/*
* In general, when importing a pool we want to ignore the
* persistent fault state, as the diagnosis made on another
@@ -1798,8 +1801,13 @@ vdev_open(vdev_t *vd)
* since this would just restart the scrub we are already doing.
*/
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
- vdev_resilver_needed(vd, NULL, NULL))
- spa_async_request(spa, SPA_ASYNC_RESILVER);
+ vdev_resilver_needed(vd, NULL, NULL)) {
+ if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, vd);
+ else
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
return (0);
}
@@ -2488,6 +2496,9 @@ vdev_dtl_should_excise(vdev_t *vd)
if (vd->vdev_state < VDEV_STATE_DEGRADED)
return (B_FALSE);
+ if (vd->vdev_resilver_deferred)
+ return (B_FALSE);
+
if (vd->vdev_resilver_txg == 0 ||
range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
return (B_TRUE);
@@ -3618,8 +3629,14 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);
- if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
- spa_async_request(spa, SPA_ASYNC_RESILVER);
+ if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
+ if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ spa_feature_is_enabled(spa,
+ SPA_FEATURE_RESILVER_DEFER))
+ vdev_set_deferred_resilver(spa, vd);
+ else
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
@@ -3840,6 +3857,8 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
vd->vdev_mg->mg_fragmentation : 0;
}
+ if (vd->vdev_ops->vdev_op_leaf)
+ vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
}
ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0);
@@ -4578,6 +4597,14 @@ vdev_deadman(vdev_t *vd, char *tag)
}
}
+void
+vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+{
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ vd->vdev_resilver_deferred = B_TRUE;
+ spa->spa_resilver_deferred = B_TRUE;
+}
+
#if defined(_KERNEL)
EXPORT_SYMBOL(vdev_fault);
EXPORT_SYMBOL(vdev_degrade);
diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c
index 439ab7438..7e86e3a8b 100644
--- a/module/zfs/vdev_label.c
+++ b/module/zfs/vdev_label.c
@@ -524,6 +524,12 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
vd->vdev_top_zap);
}
+
+ if (vd->vdev_resilver_deferred) {
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ ASSERT(spa->spa_resilver_deferred);
+ fnvlist_add_boolean(nv, ZPOOL_CONFIG_RESILVER_DEFER);
+ }
}
if (getstats) {