aboutsummaryrefslogtreecommitdiffstats
path: root/tests/zfs-tests
diff options
context:
space:
mode:
authorLOLi <[email protected]>2017-12-09 01:58:41 +0100
committerBrian Behlendorf <[email protected]>2017-12-08 16:58:41 -0800
commit4e9b156960562373e005798575a3fbc6d66e32ff (patch)
treea554474cae9721dbb6756620e5009b1fa4f31ad6 /tests/zfs-tests
parent3ab3166347b84c6004002f9a6d06f7a87fe1cd4a (diff)
Various ZED fixes
* Teach ZED to handle spares usingi the configured ashift: if the zpool 'ashift' property is set then ZED should use its value when kicking in a hotspare; with this change 512e disks can be used as spares for VDEVs that were created with ashift=9, even if ZFS natively detects them as 4K block devices. * Introduce an additional auto_spare test case which verifies that in the face of multiple device failures an appropiate number of spares are kicked in. * Fix zed_stop() in "libtest.shlib" which did not correctly wait the target pid. * Fix ZED crashing on startup caused by a race condition in libzfs when used in multi-threaded context. * Convert ZED over to using the tpool library which is already present in the Illumos FMA code. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: loli10K <[email protected]> Closes #2562 Closes #6858
Diffstat (limited to 'tests/zfs-tests')
-rw-r--r--tests/zfs-tests/include/blkdev.shlib36
-rw-r--r--tests/zfs-tests/include/libtest.shlib16
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh2
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh2
-rw-r--r--tests/zfs-tests/tests/functional/fault/Makefile.am4
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh9
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh20
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh5
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh11
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh101
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh152
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/cleanup.ksh10
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/setup.ksh4
13 files changed, 331 insertions, 41 deletions
diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib
index 876c84356..28ac1052c 100644
--- a/tests/zfs-tests/include/blkdev.shlib
+++ b/tests/zfs-tests/include/blkdev.shlib
@@ -353,16 +353,35 @@ function insert_disk #disk scsi_host
#
# Load scsi_debug module with specified parameters
+# $blksz can be either one of: < 512b | 512e | 4Kn >
#
-function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
+function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
{
typeset devsize=$1
typeset hosts=$2
typeset tgts=$3
typeset luns=$4
+ typeset blksz=$5
[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
- [[ -z $luns ]] && log_fail "Arguments invalid or missing"
+ [[ -z $luns ]] || [[ -z $blksz ]] && \
+ log_fail "Arguments invalid or missing"
+
+ case "$5" in
+ '512b')
+ typeset sector=512
+ typeset blkexp=0
+ ;;
+ '512e')
+ typeset sector=512
+ typeset blkexp=3
+ ;;
+ '4Kn')
+ typeset sector=4096
+ typeset blkexp=0
+ ;;
+ *) log_fail "Unsupported blksz value: $5" ;;
+ esac
if is_linux; then
modprobe -n scsi_debug
@@ -375,7 +394,8 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
log_fail "scsi_debug module already installed"
else
log_must modprobe scsi_debug dev_size_mb=$devsize \
- add_host=$hosts num_tgts=$tgts max_luns=$luns
+ add_host=$hosts num_tgts=$tgts max_luns=$luns \
+ sector_size=$sector physblk_exp=$blkexp
block_device_wait
lsscsi | egrep scsi_debug > /dev/null
if (($? == 1)); then
@@ -386,6 +406,16 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
}
#
+# Unload scsi_debug module, if needed.
+#
+function unload_scsi_debug
+{
+ if lsmod | grep scsi_debug >/dev/null; then
+ log_must modprobe -r scsi_debug
+ fi
+}
+
+#
# Get scsi_debug device name.
# Returns basename of scsi_debug device (for example "sdb").
#
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
index 6b1658c1b..cc9fddd41 100644
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -3158,14 +3158,26 @@ function zed_stop
if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
zedpid=$(cat ${ZEDLET_DIR}/zed.pid)
kill $zedpid
- wait $zedpid
+ while ps -p $zedpid > /dev/null; do
+ sleep 1
+ done
rm -f ${ZEDLET_DIR}/zed.pid
fi
-
return 0
}
#
+# Drain all zevents
+#
+function zed_events_drain
+{
+ while [ $(zpool events -H | wc -l) -ne 0 ]; do
+ sleep 1
+ zpool events -c >/dev/null
+ done
+}
+
+#
# Check is provided device is being active used as a swap device.
#
function is_swap_inuse
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
index 4477e5402..99c51351c 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
@@ -27,7 +27,7 @@ if is_linux; then
for SDDEVICE in $(get_debug_device); do
unplug $SDDEVICE
done
- modprobe -r scsi_debug
+ unload_scsi_debug
fi
log_pass
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh
index 4dbf8965d..59b8764ce 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh
@@ -22,7 +22,7 @@ verify_runnable "global"
# Create scsi_debug devices for the reopen tests
if is_linux; then
- load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
+ load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
else
log_unsupported "scsi debug module unsupported"
fi
diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am
index 436f3e8be..ef4380835 100644
--- a/tests/zfs-tests/tests/functional/fault/Makefile.am
+++ b/tests/zfs-tests/tests/functional/fault/Makefile.am
@@ -6,4 +6,6 @@ dist_pkgdata_SCRIPTS = \
auto_online_001_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
- auto_spare_002_pos.ksh
+ auto_spare_002_pos.ksh \
+ auto_spare_ashift.ksh \
+ auto_spare_multiple.ksh
diff --git a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
index 0f6e38ac2..beff5dc87 100755
--- a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
@@ -54,9 +54,8 @@ fi
function cleanup
{
- #online last disk before fail
- insert_disk $offline_disk $host
- poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ destroy_pool $TESTPOOL
+ unload_scsi_debug
}
log_assert "Testing automated auto-online FMA test"
@@ -65,8 +64,8 @@ log_onexit cleanup
# If using the default loop devices, need a scsi_debug device for auto-online
if is_loop_device $DISK1; then
- SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}')
- SDDEVICE=$(echo $SD | nawk -F / '{print $3}')
+ load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
+ SDDEVICE=$(get_debug_device)
SDDEVICE_ID=$(get_persistent_disk_name $SDDEVICE)
autoonline_disks="$SDDEVICE"
else
diff --git a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
index 40a680a9b..8e48b2ab4 100755
--- a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
@@ -57,27 +57,23 @@ fi
function setup
{
- lsmod | egrep scsi_debug > /dev/null
- if (($? == 1)); then
- load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
- fi
+ load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
+ SD=$(get_debug_device)
+ SDDEVICE_ID=$(get_persistent_disk_name $SD)
# Register vdev_id alias rule for scsi_debug device to create a
# persistent path
- SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' \
- | nawk -F / '{print $3}')
- SDDEVICE_ID=$(get_persistent_disk_name $SD)
log_must eval "echo "alias scsidebug /dev/disk/by-id/$SDDEVICE_ID" \
>> $VDEVID_CONF"
block_device_wait
-
- SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | egrep ID_VDEV \
- | nawk '{print $2; exit}' | nawk -F = '{print $2; exit}')
+ SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD \
+ | awk -F'=' '/ID_VDEV=/{print $2; exit}')
[[ -z $SDDEVICE ]] && log_fail "vdev rule was not registered properly"
}
function cleanup
{
- poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ destroy_pool $TESTPOOL
+ unload_scsi_debug
}
log_assert "Testing automated auto-replace FMA test"
@@ -112,7 +108,7 @@ log_must zpool export -F $TESTPOOL
# Offline disk
remove_disk $SD
block_device_wait
-log_must modprobe -r scsi_debug
+unload_scsi_debug
# Reimport pool with drive missing
log_must zpool import $TESTPOOL
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
index 82f7f4834..b6af1a3f4 100755
--- a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
@@ -42,7 +42,7 @@ verify_runnable "both"
function cleanup
{
log_must zinject -c all
- poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ destroy_pool $TESTPOOL
rm -f $VDEV_FILES $SPARE_FILE
}
@@ -50,6 +50,9 @@ log_assert "Testing automated auto-spare FMA test"
log_onexit cleanup
+# Clear events from previous runs
+zed_events_drain
+
TESTFILE="/$TESTPOOL/$TESTFS/testfile"
for type in "mirror" "raidz" "raidz2"; do
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
index f0ddac35c..cfa748d30 100755
--- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
@@ -42,7 +42,7 @@ verify_runnable "both"
function cleanup
{
log_must zinject -c all
- poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ destroy_pool $TESTPOOL
rm -f $VDEV_FILES $SPARE_FILE
}
@@ -50,6 +50,9 @@ log_assert "Testing automated auto-spare FMA test"
log_onexit cleanup
+# Clear events from previous runs
+zed_events_drain
+
TESTFILE="/$TESTPOOL/$TESTFS/testfile"
for type in "mirror" "raidz" "raidz2"; do
@@ -65,8 +68,14 @@ for type in "mirror" "raidz" "raidz2"; do
log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
+ # NOTE: checksum events are ratelimited to max 5 per second, ZED needs
+ # 10 to kick in a spare
log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
log_must cp $TESTFILE /dev/null
+ log_must sleep 1
+ log_must cp $TESTFILE /dev/null
+ log_must sleep 1
+ log_must cp $TESTFILE /dev/null
# 5. Verify the ZED kicks in a hot spare and expected pool/device status
log_note "Wait for ZED to auto-spare"
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh
new file mode 100755
index 000000000..e9857518e
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh
@@ -0,0 +1,101 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+# Copyright 2017, loli10K <[email protected]>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/math.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
+# drive is faulted and a custom ashift value needs to be provided to replace it.
+#
+# STRATEGY:
+# 1. Create a pool from 512b devices and set "ashift" pool property accordingly
+# 2. Add one 512e spare device (4Kn would generate IO errors on replace)
+# 3. Inject IO errors with a zinject error handler
+# 4. Start a scrub
+# 5. Verify the ZED kicks in the hot spare and expected pool/device status
+# 6. Clear the fault
+# 7. Verify the hot spare is available and expected pool/device status
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must zinject -c all
+ destroy_pool $TESTPOOL
+ unload_scsi_debug
+ rm -f $SAFE_DEVICE $FAIL_DEVICE
+}
+
+log_assert "ZED should replace a device using the configured ashift property"
+log_onexit cleanup
+
+# Clear events from previous runs
+zed_events_drain
+
+SAFE_DEVICE="$TEST_BASE_DIR/safe-dev"
+FAIL_DEVICE="$TEST_BASE_DIR/fail-dev"
+
+# 1. Create a pool from 512b devices and set "ashift" pool property accordingly
+for vdev in $SAFE_DEVICE $FAIL_DEVICE; do
+ truncate -s $SPA_MINDEVSIZE $vdev
+done
+log_must zpool create -f $TESTPOOL mirror $SAFE_DEVICE $FAIL_DEVICE
+# NOTE: file VDEVs should be added as 512b devices, verify this "just in case"
+for vdev in $SAFE_DEVICE $FAIL_DEVICE; do
+ verify_eq "9" "$(zdb -e -l $vdev | awk '/ashift: /{print $2}')" "ashift"
+done
+log_must zpool set ashift=9 $TESTPOOL
+
+# 2. Add one 512e spare device (4Kn would generate IO errors on replace)
+# NOTE: must be larger than the existing 512b devices, add 32m of fudge
+load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) $SDHOSTS $SDTGTS $SDLUNS '512e'
+SPARE_DEVICE=$(get_debug_device)
+log_must_busy zpool add $TESTPOOL spare $SPARE_DEVICE
+
+# 3. Inject IO errors with a zinject error handler
+log_must zinject -d $FAIL_DEVICE -e io -T all -f 100 $TESTPOOL
+
+# 4. Start a scrub
+log_must zpool scrub $TESTPOOL
+
+# 5. Verify the ZED kicks in a hot spare and expected pool/device status
+log_note "Wait for ZED to auto-spare"
+log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "FAULTED" 60
+log_must wait_vdev_state $TESTPOOL $SPARE_DEVICE "ONLINE" 60
+log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "INUSE"
+log_must check_state $TESTPOOL "" "DEGRADED"
+
+# 6. Clear the fault
+log_must zinject -c all
+log_must zpool clear $TESTPOOL $FAIL_DEVICE
+
+# 7. Verify the hot spare is available and expected pool/device status
+log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "ONLINE" 60
+log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "AVAIL"
+log_must is_pool_resilvered $TESTPOOL
+log_must check_state $TESTPOOL "" "ONLINE"
+
+log_pass "ZED successfully replaces a device using the configured ashift property"
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
new file mode 100755
index 000000000..8650ceff7
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
@@ -0,0 +1,152 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+# Copyright 2017, loli10K <[email protected]>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
+# multiple drives are faulted.
+#
+# STRATEGY:
+# 1. Create a pool with two hot spares
+# 2. Inject IO ERRORS with a zinject error handler on the first device
+# 3. Start a scrub
+# 4. Verify the ZED kicks in a hot spare and expected pool/device status
+# 5. Inject IO ERRORS on a second device
+# 6. Start a scrub
+# 7. Verify the ZED kicks in a second hot spare
+# 8. Clear the fault on both devices
+# 9. Verify the hot spares are available and expected pool/device status
+# 10. Rinse and repeat, this time faulting both devices at the same time
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must zinject -c all
+ destroy_pool $TESTPOOL
+ rm -f $DATA_DEVS $SPARE_DEVS
+}
+
+log_assert "ZED should be able to handle multiple faulted devices"
+log_onexit cleanup
+
+# Clear events from previous runs
+zed_events_drain
+
+FAULT_DEV1="$TEST_BASE_DIR/fault-dev1"
+FAULT_DEV2="$TEST_BASE_DIR/fault-dev2"
+SAFE_DEV1="$TEST_BASE_DIR/safe-dev1"
+SAFE_DEV2="$TEST_BASE_DIR/safe-dev2"
+DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2"
+SPARE_DEV1="$TEST_BASE_DIR/spare-dev1"
+SPARE_DEV2="$TEST_BASE_DIR/spare-dev2"
+SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2"
+
+for type in "mirror" "raidz" "raidz2" "raidz3"; do
+ # 1. Create a pool with two hot spares
+ truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
+ log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
+
+ # 2. Inject IO ERRORS with a zinject error handler on the first device
+ log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL
+
+ # 3. Start a scrub
+ log_must zpool scrub $TESTPOOL
+
+ # 4. Verify the ZED kicks in a hot spare and expected pool/device status
+ log_note "Wait for ZED to auto-spare"
+ log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
+ log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
+ log_must check_state $TESTPOOL "" "DEGRADED"
+
+ # 5. Inject IO ERRORS on a second device
+ log_must zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL
+
+ # 6. Start a scrub
+ while is_pool_scrubbing $TESTPOOL || is_pool_resilvering $TESTPOOL; do
+ sleep 1
+ done
+ log_must zpool scrub $TESTPOOL
+
+ # 7. Verify the ZED kicks in a second hot spare
+ log_note "Wait for ZED to auto-spare"
+ log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
+ log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
+ log_must check_state $TESTPOOL "" "DEGRADED"
+
+ # 8. Clear the fault on both devices
+ log_must zinject -c all
+ log_must zpool clear $TESTPOOL $FAULT_DEV1
+ log_must zpool clear $TESTPOOL $FAULT_DEV2
+
+ # 9. Verify the hot spares are available and expected pool/device status
+ log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60
+ log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL"
+ log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL"
+ log_must check_state $TESTPOOL "" "ONLINE"
+
+ # Cleanup
+ cleanup
+done
+
+# Rinse and repeat, this time faulting both devices at the same time
+# NOTE: "raidz" is exluded since it cannot survive 2 faulted devices
+# NOTE: "mirror" is a 4-way mirror here and should survive this test
+for type in "mirror" "raidz2" "raidz3"; do
+ # 1. Create a pool with two hot spares
+ truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
+ log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
+
+ # 2. Inject IO ERRORS with a zinject error handler on two devices
+ log_must eval "zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL &"
+ log_must eval "zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL &"
+
+ # 3. Start a scrub
+ log_must zpool scrub $TESTPOOL
+
+ # 4. Verify the ZED kicks in two hot spares and expected pool/device status
+ log_note "Wait for ZED to auto-spare"
+ log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
+ log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
+ log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
+ log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
+ log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
+ log_must check_state $TESTPOOL "" "DEGRADED"
+
+ # 5. Clear the fault on both devices
+ log_must zinject -c all
+ log_must zpool clear $TESTPOOL $FAULT_DEV1
+ log_must zpool clear $TESTPOOL $FAULT_DEV2
+
+ # Cleanup
+ cleanup
+done
+
+log_pass "ZED successfully handles multiple faulted devices"
diff --git a/tests/zfs-tests/tests/functional/fault/cleanup.ksh b/tests/zfs-tests/tests/functional/fault/cleanup.ksh
index 82e379b0d..9d354f30e 100755
--- a/tests/zfs-tests/tests/functional/fault/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/fault/cleanup.ksh
@@ -33,14 +33,4 @@ cleanup_devices $DISKS
zed_stop
zed_cleanup
-SDDEVICE=$(get_debug_device)
-
-# Offline disk and remove scsi_debug module
-if is_linux; then
- if [ -n "$SDDEVICE" ]; then
- remove_disk $SDDEVICE
- fi
- modprobe -r scsi_debug
-fi
-
log_pass
diff --git a/tests/zfs-tests/tests/functional/fault/setup.ksh b/tests/zfs-tests/tests/functional/fault/setup.ksh
index 3d54d4f21..3d3cbc9e5 100755
--- a/tests/zfs-tests/tests/functional/fault/setup.ksh
+++ b/tests/zfs-tests/tests/functional/fault/setup.ksh
@@ -31,8 +31,4 @@ verify_runnable "global"
zed_setup
zed_start
-# Create a scsi_debug device to be used with auto-online (if using loop devices)
-# and auto-replace regardless of other devices
-load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
-
log_pass