aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Poduska <[email protected]>2020-05-13 13:54:27 -0400
committerGitHub <[email protected]>2020-05-13 10:54:27 -0700
commit41035a049643ff7083a6cb6cd43b8eb70a7d18a1 (patch)
tree121cec91a2b10ca6e9fb7d376a7599d47f4aee8c
parentb29e31d80d6cb78dbd889e9b529333944b4c3ba1 (diff)
Resilver restarts unnecessarily when it encounters errors
When a resilver finishes, vdev_dtl_reassess is called to hopefully excise DTL_MISSING (amongst other things). If there are errors during the resilver, they are tracked in DTL_SCRUB, as spelled out in the block comment in vdev.c. DTL_SCRUB is in-core only, so it can only be used if the pool was online for the whole resilver. This state is tracked with the spa_scrub_started flag, which only gets set when the scan is initialized. Unfortunately, this flag gets cleared right before vdev_dtl_reassess gets called, so if there are any errors during the scan, DTL_MISSING will never get excised and the resilver will just continually restart. This fix simply moves clearing that flag until after the call to vdev_dtl_reasses. In addition, if a pool is imported and already has scn_errors > 0, this change will restart the resilver immediately instead of doing the rest of the scan and then restarting it from the beginning. On the other hand, if scn_errors == 0 at import, then no errors have been encountered so far, so the spa_scrub_started flag can be safely set. A test has been added to verify that resilver does not restart when relevant DTL's are available. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Paul Zuchowski <[email protected]> Signed-off-by: John Poduska <[email protected]> Closes #10291
-rw-r--r--module/zfs/dsl_scan.c23
-rw-r--r--module/zfs/vdev.c22
-rw-r--r--tests/runfiles/common.run2
-rw-r--r--tests/zfs-tests/include/tunables.cfg1
-rw-r--r--tests/zfs-tests/tests/functional/resilver/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh102
6 files changed, 149 insertions, 4 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 74ef2e155..f09501793 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -542,6 +542,22 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
zfs_dbgmsg("new-style scrub was modified "
"by old software; restarting in txg %llu",
(longlong_t)scn->scn_restart_txg);
+ } else if (dsl_scan_resilvering(dp)) {
+ /*
+ * If a resilver is in progress and there are already
+ * errors, restart it instead of finishing this scan and
+ * then restarting it. If there haven't been any errors
+ * then remember that the incore DTL is valid.
+ */
+ if (scn->scn_phys.scn_errors > 0) {
+ scn->scn_restart_txg = txg;
+ zfs_dbgmsg("resilver can't excise DTL_MISSING "
+ "when finished; restarting in txg %llu",
+ (u_longlong_t)scn->scn_restart_txg);
+ } else {
+ /* it's safe to excise DTL when finished */
+ spa->spa_scrub_started = B_TRUE;
+ }
}
}
@@ -887,7 +903,6 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
- spa->spa_scrub_started = B_FALSE;
spa->spa_scrub_active = B_FALSE;
/*
@@ -915,6 +930,12 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_errlog_rotate(spa);
/*
+ * Don't clear flag until after vdev_dtl_reassess to ensure that
+ * DTL_MISSING will get updated when possible.
+ */
+ spa->spa_scrub_started = B_FALSE;
+
+ /*
* We may have finished replacing a device.
* Let the async thread assess this and handle the detach.
*/
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 3c2135029..923bf2e33 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2583,7 +2583,6 @@ vdev_dtl_should_excise(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
- ASSERT0(scn->scn_phys.scn_errors);
ASSERT0(vd->vdev_children);
if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2634,6 +2633,7 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (vd->vdev_ops->vdev_op_leaf) {
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+ boolean_t wasempty = B_TRUE;
mutex_enter(&vd->vdev_dtl_lock);
@@ -2643,6 +2643,18 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (zfs_scan_ignore_errors && scn)
scn->scn_phys.scn_errors = 0;
+ if (scrub_txg != 0 &&
+ !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ wasempty = B_FALSE;
+ zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d "
+ "dtl:%llu/%llu errors:%llu",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg,
+ (u_longlong_t)scrub_txg, spa->spa_scrub_started,
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd),
+ (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0));
+ }
+
/*
* If we've completed a scan cleanly then determine
* if this vdev should remove any DTLs. We only want to
@@ -2679,6 +2691,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
space_reftree_generate_map(&reftree,
vd->vdev_dtl[DTL_MISSING], 1);
space_reftree_destroy(&reftree);
+
+ if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ zfs_dbgmsg("update DTL_MISSING:%llu/%llu",
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd));
+ } else if (!wasempty) {
+ zfs_dbgmsg("DTL_MISSING is now empty");
+ }
}
range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 2fcde83b3..a9bede475 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -758,7 +758,7 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
tags = ['functional', 'reservation']
[tests/functional/resilver]
-tests = ['resilver_restart_001']
+tests = ['resilver_restart_001', 'resilver_restart_002']
tags = ['functional', 'resilver']
[tests/functional/rootpool]
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index f85b15658..efbcc09e7 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -59,6 +59,7 @@ OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_esti
REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress
REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment
RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms
+SCAN_LEGACY scan_legacy zfs_scan_legacy
SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress
SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit
SEND_HOLES_WITHOUT_BIRTH_TIME send_holes_without_birth_time send_holes_without_birth_time
diff --git a/tests/zfs-tests/tests/functional/resilver/Makefile.am b/tests/zfs-tests/tests/functional/resilver/Makefile.am
index 465d8f3a3..38136a843 100644
--- a/tests/zfs-tests/tests/functional/resilver/Makefile.am
+++ b/tests/zfs-tests/tests/functional/resilver/Makefile.am
@@ -2,7 +2,8 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/resilver
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
- resilver_restart_001.ksh
+ resilver_restart_001.ksh \
+ resilver_restart_002.ksh
dist_pkgdata_DATA = \
resilver.cfg
diff --git a/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh b/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh
new file mode 100755
index 000000000..ebe5e693b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh
@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/resilver/resilver.cfg
+
+#
+# DESCRIPTION:
+# Testing resilver completes when scan errors are encountered, but relevant
+# DTL's have not been lost.
+#
+# STRATEGY:
+# 1. Create a pool (1k recordsize)
+# 2. Create a 32m file (32k records)
+# 3. Inject an error halfway through the file
+# 4. Start a resilver, ensure the error is triggered and that the resilver
+# does not restart after finishing
+#
+# NB: use legacy scanning to ensure scan of specific block causes error
+#
+
+function cleanup
+{
+ log_must zinject -c all
+ destroy_pool $TESTPOOL
+ rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+ log_must set_tunable32 SCAN_LEGACY $ORIG_SCAN_LEGACY
+}
+
+log_assert "Check for resilver restarts caused by scan errors"
+
+ORIG_SCAN_LEGACY=$(get_tunable SCAN_LEGACY)
+
+log_onexit cleanup
+
+# use legacy scan to ensure injected error will be triggered
+log_must set_tunable32 SCAN_LEGACY 1
+
+ # create the pool and a 32M file (32k blocks)
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+log_must zpool create -f -O recordsize=1k $TESTPOOL ${VDEV_FILES[0]}
+log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=32 > /dev/null 2>&1
+
+# determine objset/object
+objset=$(zdb -d $TESTPOOL/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p')
+object=$(ls -i /$TESTPOOL/file | awk '{print $1}')
+
+# inject event to cause error during resilver
+log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL
+
+# clear events and start resilver
+log_must zpool events -c
+log_must zpool attach $TESTPOOL ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+
+log_note "waiting for read errors to start showing up"
+for iter in {0..59}
+do
+ zpool sync $TESTPOOL
+ err=$(zpool status $TESTPOOL | grep ${VDEV_FILES[0]} | awk '{print $3}')
+ (( $err > 0 )) && break
+ sleep 1
+done
+
+(( $err == 0 )) && log_fail "Unable to induce errors in resilver"
+
+log_note "waiting for resilver to finish"
+for iter in {0..59}
+do
+ finish=$(zpool events | grep "sysevent.fs.zfs.resilver_finish" | wc -l)
+ (( $finish > 0 )) && break
+ sleep 1
+done
+
+(( $finish == 0 )) && log_fail "resilver took too long to finish"
+
+# wait a few syncs to ensure that zfs does not restart the resilver
+log_must zpool sync $TESTPOOL
+log_must zpool sync $TESTPOOL
+
+# check if resilver was restarted
+start=$(zpool events | grep "sysevent.fs.zfs.resilver_start" | wc -l)
+(( $start != 1 )) && log_fail "resilver restarted unnecessarily"
+
+log_pass "Resilver did not restart unnecessarily from scan errors"