summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Quigley <[email protected]>2017-10-23 12:42:37 -0600
committerBrian Behlendorf <[email protected]>2017-10-23 11:42:37 -0700
commitd9daa7abcf04f75ba013ec954c4f2d4854ba1cbc (patch)
tree794726af3d7e4b9f28e1a930f39725bb8a83a7a2
parentf8cd871a01a5e38e35cb4d44768dce09265265f4 (diff)
ZTS: Add auto-spare tests
The ZED is expected to automatically kick in a hot spare device when there's one available in the pool and a sufficient number of read errors have been encountered. Use zinject to simulate the failure condition and verify the hot spare is used. auto_spare_001_pos.ksh: read IO errors, the vdev is FAULTED auto_spare_002_pos.ksh: read CHECKSUM errors, the vdev is DEGRADE Reviewed by: Richard Elling <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: David Quigley <[email protected]> Closes #6280
-rw-r--r--tests/runfiles/linux.run3
-rw-r--r--tests/zfs-tests/include/libtest.shlib50
-rw-r--r--tests/zfs-tests/tests/functional/fault/Makefile.am4
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh91
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh90
-rw-r--r--tests/zfs-tests/tests/functional/fault/fault.cfg5
6 files changed, 241 insertions, 2 deletions
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 47ddc6bc3..6fe6b6588 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -381,7 +381,8 @@ tests = ['events_001_pos', 'events_002_pos']
tests = ['exec_001_pos', 'exec_002_neg']
[tests/functional/fault]
-tests = ['auto_online_001_pos', 'auto_replace_001_pos']
+tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
+ 'auto_spare_002_pos.ksh']
[tests/functional/features/async_destroy]
tests = ['async_destroy_001_pos']
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
index eef678455..0e7f20f0e 100644
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -2030,6 +2030,31 @@ function check_hotspare_state # pool disk state{inuse,avail}
}
#
+# Wait until a hotspare transitions to a given state or times out.
+#
+# Return 0 when pool/disk matches expected state, 1 on timeout.
+#
+function wait_hotspare_state # pool disk state timeout
+{
+ typeset pool=$1
+ typeset disk=${2#$/DEV_DSKDIR/}
+ typeset state=$3
+ typeset timeout=${4:-60}
+ typeset -i i=0
+
+ while [[ $i -lt $timeout ]]; do
+ if check_hotspare_state $pool $disk $state; then
+ return 0
+ fi
+
+ i=$((i+1))
+ sleep 1
+ done
+
+ return 1
+}
+
+#
# Verify a given slog disk is inuse or avail
#
# Return 0 is pool/disk matches expected state, 1 otherwise
@@ -2068,6 +2093,31 @@ function check_vdev_state # pool disk state{online,offline,unavail}
}
#
+# Wait until a vdev transitions to a given state or times out.
+#
+# Return 0 when pool/disk matches expected state, 1 on timeout.
+#
+function wait_vdev_state # pool disk state timeout
+{
+ typeset pool=$1
+ typeset disk=${2#$/DEV_DSKDIR/}
+ typeset state=$3
+ typeset timeout=${4:-60}
+ typeset -i i=0
+
+ while [[ $i -lt $timeout ]]; do
+ if check_vdev_state $pool $disk $state; then
+ return 0
+ fi
+
+ i=$((i+1))
+ sleep 1
+ done
+
+ return 1
+}
+
+#
# Check the output of 'zpool status -v <pool>',
# and to see if the content of <token> contain the <keyword> specified.
#
diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am
index eeff31261..436f3e8be 100644
--- a/tests/zfs-tests/tests/functional/fault/Makefile.am
+++ b/tests/zfs-tests/tests/functional/fault/Makefile.am
@@ -4,4 +4,6 @@ dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
auto_online_001_pos.ksh \
- auto_replace_001_pos.ksh
+ auto_replace_001_pos.ksh \
+ auto_spare_001_pos.ksh \
+ auto_spare_002_pos.ksh
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
new file mode 100755
index 000000000..82f7f4834
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
@@ -0,0 +1,91 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
+# drive is faulted due to IO ERRORS.
+#
+# STRATEGY:
+# 1. Create a pool with hot spares
+# 2. Create a filesystem with the primary cache disable to force reads
+# 3. Write a file to the pool to be read back
+# 4. Inject IO ERRORS on read with a zinject error handler
+# 5. Verify the ZED kicks in a hot spare and expected pool/device status
+# 6. Clear the fault
+# 7. Verify the hot spare is available and expected pool/device status
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must zinject -c all
+ poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ rm -f $VDEV_FILES $SPARE_FILE
+}
+
+log_assert "Testing automated auto-spare FMA test"
+
+log_onexit cleanup
+
+TESTFILE="/$TESTPOOL/$TESTFS/testfile"
+
+for type in "mirror" "raidz" "raidz2"; do
+ # 1. Create a pool with hot spares
+ truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
+ log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
+
+ # 2. Create a filesystem with the primary cache disable to force reads
+ log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
+ log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
+
+ # 3. Write a file to the pool to be read back
+ log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
+
+ # 4. Inject IO ERRORS on read with a zinject error handler
+ log_must zinject -d $FAULT_FILE -e io -T read $TESTPOOL
+ log_must cp $TESTFILE /dev/null
+
+ # 5. Verify the ZED kicks in a hot spare and expected pool/device status
+ log_note "Wait for ZED to auto-spare"
+ log_must wait_vdev_state $TESTPOOL $FAULT_FILE "FAULTED" 60
+ log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
+ log_must check_state $TESTPOOL "" "DEGRADED"
+
+ # 6. Clear the fault
+ log_must zinject -c all
+ log_must zpool clear $TESTPOOL $FAULT_FILE
+
+ # 7. Verify the hot spare is available and expected pool/device status
+ log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
+ log_must is_pool_resilvered $TESTPOOL
+ log_must check_state $TESTPOOL "" "ONLINE"
+
+ cleanup
+done
+
+log_pass "Auto-spare test successful"
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
new file mode 100755
index 000000000..f0ddac35c
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
@@ -0,0 +1,90 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
+# drive is faulted due to CHECKSUM ERRORS.
+#
+# STRATEGY:
+# 1. Create a pool with hot spares
+# 2. Create a filesystem with the primary cache disable to force reads
+# 3. Write a file to the pool to be read back
+# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
+# 5. Verify the ZED kicks in a hot spare and expected pool/device status
+# 6. Clear the fault
+# 7. Verify the hot spare is available and expected pool/device status
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must zinject -c all
+ poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ rm -f $VDEV_FILES $SPARE_FILE
+}
+
+log_assert "Testing automated auto-spare FMA test"
+
+log_onexit cleanup
+
+TESTFILE="/$TESTPOOL/$TESTFS/testfile"
+
+for type in "mirror" "raidz" "raidz2"; do
+ # 1. Create a pool with hot spares
+ truncate -s $SPA_MINDEVSIZE $VDEV_FILES $SPARE_FILE
+ log_must zpool create -f $TESTPOOL $type $VDEV_FILES spare $SPARE_FILE
+
+ # 2. Create a filesystem with the primary cache disable to force reads
+ log_must zfs create -o primarycache=none $TESTPOOL/$TESTFS
+ log_must zfs set recordsize=16k $TESTPOOL/$TESTFS
+
+ # 3. Write a file to the pool to be read back
+ log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
+
+ # 4. Inject CHECKSUM ERRORS on read with a zinject error handler
+ log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
+ log_must cp $TESTFILE /dev/null
+
+ # 5. Verify the ZED kicks in a hot spare and expected pool/device status
+ log_note "Wait for ZED to auto-spare"
+ log_must wait_vdev_state $TESTPOOL $FAULT_FILE "DEGRADED" 60
+ log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE"
+ log_must check_state $TESTPOOL "" "DEGRADED"
+
+ # 6. Clear the fault
+ log_must zinject -c all
+ log_must zpool clear $TESTPOOL $FAULT_FILE
+
+ # 7. Verify the hot spare is available and expected pool/device status
+ log_must wait_vdev_state $TESTPOOL $FAULT_FILE "ONLINE" 60
+ log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "AVAIL"
+ log_must check_state $TESTPOOL "" "ONLINE"
+
+ cleanup
+done
+
+log_pass "Auto-spare test successful"
diff --git a/tests/zfs-tests/tests/functional/fault/fault.cfg b/tests/zfs-tests/tests/functional/fault/fault.cfg
index e6e4fe582..16a4fb835 100644
--- a/tests/zfs-tests/tests/functional/fault/fault.cfg
+++ b/tests/zfs-tests/tests/functional/fault/fault.cfg
@@ -51,3 +51,8 @@ if is_linux; then
else
DEV_DSKDIR="/dev"
fi
+
+export VDEV_FILES="$TEST_BASE_DIR/file-1 $TEST_BASE_DIR/file-2 \
+ $TEST_BASE_DIR/file-3 $TEST_BASE_DIR/file-4"
+export SPARE_FILE="$TEST_BASE_DIR/spare-1"
+export FAULT_FILE="$TEST_BASE_DIR/file-1"