summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorTom Caputi <tcaputi@datto.com>2019-03-15 17:14:31 -0400
committerBrian Behlendorf <behlendorf1@llnl.gov>2019-03-15 14:14:31 -0700
commitab7615d92c9bf4bdbbc6a675724b763d16f05280 (patch)
tree4a61231e1be8887feb8cf3ccdae070a016cc45b7 /tests
parent2bbec1c910a24bf61c6f41e0762e50face4b8907 (diff)
Multiple DVA Scrubbing Fix
Currently, there is an issue in the sequential scrub code which prevents self healing from working in some cases. The scrub code will split up all DVA copies of a bp and issue each of them separately. The problem is that, since each of the DVAs is no longer associated with the others, the self healing code doesn't have the opportunity to repair problems that show up in one of the DVAs with the data from the others. This patch fixes this issue by ensuring that all IOs issued by the sequential scrub code include all DVAs. Initially, only the first DVA of each is attempted. If an issue arises, the IO is retried with all available copies, giving the self healing code a chance to correct the issue. To test this change, this patch also adds the ability for zinject to specify individual DVAs to inject read errors into. We then add a new test case that utilizes this functionality to ensure scrubs and self-healing reads can handle and transparently fix issues with individual copies of blocks. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Matt Ahrens <mahrens@delphix.com> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #8453
Diffstat (limited to 'tests')
-rw-r--r--tests/runfiles/linux.run2
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh77
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh22
4 files changed, 91 insertions, 13 deletions
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 8a3b4d4ee..93f0c03aa 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -444,7 +444,7 @@ tags = ['functional', 'cli_root', 'zpool_resilver']
tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing',
- 'zpool_scrub_offline_device']
+ 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies']
tags = ['functional', 'cli_root', 'zpool_scrub']
[tests/functional/cli_root/zpool_set]
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am
index 69f4b5d3b..e2dfd9d64 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am
@@ -9,7 +9,8 @@ dist_pkgdata_SCRIPTS = \
zpool_scrub_005_pos.ksh \
zpool_scrub_encrypted_unloaded.ksh \
zpool_scrub_offline_device.ksh \
- zpool_scrub_print_repairing.ksh
+ zpool_scrub_print_repairing.ksh \
+ zpool_scrub_multiple_copies.ksh
dist_pkgdata_DATA = \
zpool_scrub.cfg
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh
new file mode 100755
index 000000000..2dd33c99c
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh
@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Scrubs and self-healing should be able to repair data from additional
+# copies that may be stored.
+#
+#
+# STRATEGY:
+# 1. Create a dataset with copies=3
+# 2. Write a file to the dataset
+# 3. zinject errors into the first and second DVAs of that file
+# 4. Scrub and verify the scrub repaired all errors
+# 7. Read the file normally to check that self healing also works
+# 8. Remove the zinject handler
+# 9. Scrub again and confirm 0 bytes were scrubbed
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ destroy_dataset $TESTPOOL/$TESTFS2
+ log_must zinject -c all
+}
+log_onexit cleanup
+
+log_assert "Scrubs and self healing must work with additional copies"
+
+log_must zfs create -o copies=3 $TESTPOOL/$TESTFS2
+typeset mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS2)
+log_must mkfile 10m $mntpnt/file
+log_must zpool sync $TESTPOOL
+
+log_must zinject -a -t data -C 0,1 -e io $mntpnt/file
+
+log_must zpool scrub $TESTPOOL
+log_must wait_scrubbed $TESTPOOL
+
+log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+log_must dd if=$mntpnt/file of=/dev/null bs=1M iflag=fullblock
+log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+log_must zinject -c all
+
+log_must zpool scrub $TESTPOOL
+log_must wait_scrubbed $TESTPOOL
+
+zpool status
+
+log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+
+log_pass "Scrubs and self healing work with additional copies"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh
index fdf315dea..7a07e6433 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh
@@ -49,7 +49,7 @@ verify_runnable "global"
function cleanup
{
- poolexists $TESTPOOL && destroy_pool $TESTPOOL
+ poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
log_must rm -f $DISK1 $DISK2 $DISK3 $DISK4
}
@@ -103,31 +103,31 @@ log_must truncate -s $DEVSIZE $DISK1
log_must truncate -s $DEVSIZE $DISK2
log_must truncate -s $DEVSIZE $DISK3
log_must truncate -s $DEVSIZE $DISK4
-poolexists $TESTPOOL && destroy_pool $TESTPOOL
-log_must zpool create -O mountpoint=$TESTDIR $TESTPOOL \
+poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
+log_must zpool create -O mountpoint=$TESTDIR $TESTPOOL2 \
raidz2 $DISK1 $DISK2 $DISK3 $DISK4
# 2. Offline the first device
-zpool_do_sync 'offline' $TESTPOOL $DISK1
+zpool_do_sync 'offline' $TESTPOOL2 $DISK1
# 3. Write to the pool
log_must mkfile $FILESIZE "$TESTDIR/data.bin"
# 4. Scrub the pool
-zpool_scrub_sync $TESTPOOL
+zpool_scrub_sync $TESTPOOL2
# 5. Online the first device and offline the second device
-zpool_do_sync 'online' $TESTPOOL $DISK1
-zpool_do_sync 'offline' $TESTPOOL $DISK2
-log_must wait_for_resilver_end $TESTPOOL $RESILVER_TIMEOUT
+zpool_do_sync 'online' $TESTPOOL2 $DISK1
+zpool_do_sync 'offline' $TESTPOOL2 $DISK2
+log_must wait_for_resilver_end $TESTPOOL2 $RESILVER_TIMEOUT
# 6. Scrub the pool again
-zpool_scrub_sync $TESTPOOL
+zpool_scrub_sync $TESTPOOL2
# 7. Verify data integrity
-cksum=$(zpool status $TESTPOOL | awk 'L{print $NF;L=0} /CKSUM$/{L=1}')
+cksum=$(zpool status $TESTPOOL2 | awk 'L{print $NF;L=0} /CKSUM$/{L=1}')
if [[ $cksum != 0 ]]; then
- log_fail "Unexpected CKSUM errors found on $TESTPOOL ($cksum)"
+ log_fail "Unexpected CKSUM errors found on $TESTPOOL2 ($cksum)"
fi
log_pass "Scrubbing a pool with offline devices correctly preserves DTLs"