aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--module/zfs/vdev.c43
-rw-r--r--tests/runfiles/linux.run3
-rw-r--r--tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/fault_limits.ksh96
4 files changed, 132 insertions, 11 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 9305bd894..860572336 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -3149,9 +3149,9 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
* Reassess DTLs after a config change or scrub completion. If txg == 0 no
* write operations will be issued to the pool.
*/
-void
-vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
- boolean_t scrub_done, boolean_t rebuild_done)
+static void
+vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
+ boolean_t scrub_done, boolean_t rebuild_done, boolean_t faulting)
{
spa_t *spa = vd->vdev_spa;
avl_tree_t reftree;
@@ -3160,8 +3160,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
for (int c = 0; c < vd->vdev_children; c++)
- vdev_dtl_reassess(vd->vdev_child[c], txg,
- scrub_txg, scrub_done, rebuild_done);
+ vdev_dtl_reassess_impl(vd->vdev_child[c], txg,
+ scrub_txg, scrub_done, rebuild_done, faulting);
if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
return;
@@ -3255,11 +3255,21 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
if (scrub_done)
range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
- if (!vdev_readable(vd))
+
+ /*
+ * For the faulting case, treat members of a replacing vdev
+ * as if they are not available. It's more likely than not that
+ * a vdev in a replacing vdev could encounter read errors so
+ * treat it as not being able to contribute.
+ */
+ if (!vdev_readable(vd) ||
+ (faulting && vd->vdev_parent != NULL &&
+ vd->vdev_parent->vdev_ops == &vdev_replacing_ops)) {
range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
- else
+ } else {
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
+ }
/*
* If the vdev was resilvering or rebuilding and no longer
@@ -3321,6 +3331,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
}
}
+void
+vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
+ boolean_t scrub_done, boolean_t rebuild_done)
+{
+ return (vdev_dtl_reassess_impl(vd, txg, scrub_txg, scrub_done,
+ rebuild_done, B_FALSE));
+}
+
/*
* Iterate over all the vdevs except spare, and post kobj events
*/
@@ -3548,7 +3566,11 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
}
/*
- * Determine whether the specified vdev can be offlined/detached/removed
+ * Determine whether the specified vdev can be
+ * - offlined
+ * - detached
+ * - removed
+ * - faulted
* without losing data.
*/
boolean_t
@@ -3558,6 +3580,7 @@ vdev_dtl_required(vdev_t *vd)
vdev_t *tvd = vd->vdev_top;
uint8_t cant_read = vd->vdev_cant_read;
boolean_t required;
+ boolean_t faulting = vd->vdev_state == VDEV_STATE_FAULTED;
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
@@ -3570,10 +3593,10 @@ vdev_dtl_required(vdev_t *vd)
* If not, we can safely offline/detach/remove the device.
*/
vd->vdev_cant_read = B_TRUE;
- vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
+ vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
vd->vdev_cant_read = cant_read;
- vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
+ vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
if (!required && zio_injection_enabled) {
required = !!zio_handle_device_injection(vd, NULL,
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index d791ce57c..4e262affb 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -125,7 +125,8 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
- 'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s']
+ 'fault_limits', 'scrub_after_resilver', 'suspend_resume_single',
+ 'zpool_status_-s']
tags = ['functional', 'fault']
[tests/functional/features/large_dnode:Linux]
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index 994fba173..da6dc235c 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1525,6 +1525,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/fault/cleanup.ksh \
functional/fault/decompress_fault.ksh \
functional/fault/decrypt_fault.ksh \
+ functional/fault/fault_limits.ksh \
functional/fault/scrub_after_resilver.ksh \
functional/fault/suspend_resume_single.ksh \
functional/fault/setup.ksh \
diff --git a/tests/zfs-tests/tests/functional/fault/fault_limits.ksh b/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
new file mode 100755
index 000000000..2f62a15c3
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/fault_limits.ksh
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2024 by Klara, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION: Verify that raidz children vdev fault count is restricted
+#
+# STRATEGY:
+# 1. Create a raidz2 or raidz3 pool and add some data to it
+# 2. Replace one of the child vdevs to create a replacing vdev
+# 3. While it is resilvering, attempt to fault disks
+# 4. Verify that less than parity count was faulted while replacing
+#
+
+TESTPOOL="fault-test-pool"
+PARITY=$((RANDOM%(2) + 2))
+VDEV_CNT=$((4 + (2 * PARITY)))
+VDEV_SIZ=512M
+
+function cleanup
+{
+ poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"
+
+ for i in {0..$((VDEV_CNT - 1))}; do
+ log_must rm -f "$TEST_BASE_DIR/dev-$i"
+ done
+}
+
+log_onexit cleanup
+log_assert "restricts raidz children vdev fault count"
+
+log_note "creating $VDEV_CNT vdevs for parity $PARITY test"
+typeset -a disks
+for i in {0..$((VDEV_CNT - 1))}; do
+ device=$TEST_BASE_DIR/dev-$i
+ log_must truncate -s $VDEV_SIZ $device
+ disks[${#disks[*]}+1]=$device
+done
+
+log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))]}
+
+# Add some data to the pool
+log_must zfs create $TESTPOOL/fs
+MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
+log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
+sync_pool $TESTPOOL
+
+# Replace the last child vdev to form a replacing vdev
+log_must zpool replace ${TESTPOOL} ${disks[$((VDEV_CNT - 1))]} ${disks[$VDEV_CNT]}
+# imediately offline replacement disk to keep replacing vdev around
+log_must zpool offline ${TESTPOOL} ${disks[$VDEV_CNT]}
+
+# Fault disks while a replacing vdev is still active
+for disk in ${disks[0..$PARITY]}; do
+ log_must zpool offline -tf ${TESTPOOL} $disk
+done
+
+zpool status $TESTPOOL
+
+# Count the faults that succeeded
+faults=0
+for disk in ${disks[0..$PARITY]}; do
+ state=$(zpool get -H -o value state ${TESTPOOL} ${disk})
+ if [ "$state" = "FAULTED" ] ; then
+ ((faults=faults+1))
+ fi
+done
+
+log_must test "$faults" -lt "$PARITY"
+log_must test "$faults" -gt 0
+
+log_pass "restricts raidz children vdev fault count"