diff options
author | Tom Caputi <[email protected]> | 2018-10-19 00:06:18 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-10-18 21:06:18 -0700 |
commit | 80a91e7469669e2a5da5873b8f09a752f7869062 (patch) | |
tree | ef5a4462892becccb939b2cd42a54ed580f5894f /tests | |
parent | 9f438c5f948c0072f16431407a373ead34fabf6e (diff) |
Defer new resilvers until the current one ends
Currently, if a resilver is triggered for any reason while an
existing one is running, zfs will immediately restart the existing
resilver from the beginning to include the new drive. This causes
problems for system administrators when a drive fails while another
is already resilvering. In this case, the optimal thing to do to
reduce risk of data loss is to wait for the current resilver to end
before immediately replacing the second failed drive, which allows
the system to operate with two incomplete drives for the minimum
amount of time.
This patch introduces the resilver_defer feature that essentially
does this for the admin without forcing them to wait and monitor
the resilver manually. The change requires an on-disk feature
since we must mark drives that are part of a deferred resilver in
the vdev config to ensure that we do not assume they are done
resilvering when an existing resilver completes.
Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: @mmaybee
Signed-off-by: Tom Caputi <[email protected]>
Closes #7732
Diffstat (limited to 'tests')
13 files changed, 289 insertions, 5 deletions
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 613660df9..4f556acde 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -421,6 +421,10 @@ tags = ['functional', 'cli_root', 'zpool_reopen'] tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift'] tags = ['functional', 'cli_root', 'zpool_replace'] +[tests/functional/cli_root/zpool_resilver] +tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart'] +tags = ['functional', 'cli_root', 'zpool_resilver'] + [tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', diff --git a/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/Makefile.am index 7a765a160..13ff889d8 100644 --- a/tests/zfs-tests/tests/functional/cli_root/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/Makefile.am @@ -52,6 +52,7 @@ SUBDIRS = \ zpool_remove \ zpool_reopen \ zpool_replace \ + zpool_resilver \ zpool_scrub \ zpool_set \ zpool_split \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 8907533c9..48a32174f 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -87,5 +87,6 @@ if is_linux; then "feature@encryption" "feature@project_quota" "feature@allocation_classes" + "feature@resilver_defer" ) fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib index 82860deb3..075ad85e9 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib @@ -115,3 +115,10 @@ function is_scan_restarted #pool zpool history -i $pool | grep -q "scan aborted, restarting" return $? } + +function is_deferred_scan_started #pool +{ + typeset pool=$1 + zpool history -i $pool | grep -q "starting deferred resilver" + return $? +} diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh index 30c389ce8..956ceebaf 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh @@ -29,7 +29,7 @@ # 4. Execute scrub. # 5. "Plug back" disk. # 6. Reopen a pool with an -n flag. -# 7. Check if scrub scan is NOT replaced by resilver. +# 7. Check if resilver was deferred. # 8. Check if trying to put device to offline fails because of no valid # replicas. # @@ -75,11 +75,12 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "online" log_must zinject -c all # 7. Check if scrub scan is NOT replaced by resilver. log_must wait_for_scrub_end $TESTPOOL $MAXTIMEOUT -log_mustnot is_scan_restarted $TESTPOOL +log_must is_deferred_scan_started $TESTPOOL # 8. Check if trying to put device to offline fails because of no valid # replicas. -log_mustnot zpool offline $TESTPOOL $DISK2 +log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT +log_must zpool offline $TESTPOOL $DISK2 # clean up log_must zpool destroy $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh index 95029a8b6..fc298d010 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh @@ -72,13 +72,13 @@ log_must zinject -d $REMOVED_DISK_ID -D25:1 $TESTPOOL log_must wait_for_resilver_start $TESTPOOL $MAXTIMEOUT # 6. Reopen a pool again with -n flag. -zpool reopen -n $TESTPOOL +log_must zpool reopen -n $TESTPOOL # 7. Wait until resilvering is finished and check if it was restarted. log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT # remove delay from disk log_must zinject -c all -log_must is_scan_restarted $TESTPOOL +log_mustnot is_scan_restarted $TESTPOOL # clean up log_must zpool destroy $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am new file mode 100644 index 000000000..2cec5335f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am @@ -0,0 +1,9 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_resilver +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zpool_resilver_bad_args.ksh \ + zpool_resilver_restart.ksh + +dist_pkgdata_DATA = \ + zpool_resilver.cfg diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/cleanup.ksh new file mode 100755 index 000000000..c74e23919 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/cleanup.ksh @@ -0,0 +1,33 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg + +verify_runnable "global" + +destroy_mirrors diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/setup.ksh new file mode 100755 index 000000000..48ceecdf9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/setup.ksh @@ -0,0 +1,39 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Datto. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg + +verify_runnable "global" +verify_disk_count "$DISKS" 3 + +default_mirror_setup_noexit $DISK1 $DISK2 $DISK3 + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +# Create 256M of data +log_must file_write -b 1048576 -c 256 -o create -d 0 -f $mntpnt/bigfile +log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg new file mode 100644 index 000000000..7d92984d6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Datto. All rights reserved. +# + +export DISK1=$(echo $DISKS | nawk '{print $1}') +export DISK2=$(echo $DISKS | nawk '{print $2}') +export DISK3=$(echo $DISKS | nawk '{print $3}') + +export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024)) +export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024)) + +export MAXTIMEOUT=80 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh new file mode 100755 index 000000000..9d973bec7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Datto. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# A badly formed parameter passed to 'zpool resilver' should +# return an error. +# +# STRATEGY: +# 1. Create an array containing bad 'zpool reilver' parameters. +# 2. For each element, execute the sub-command. +# 3. Verify it returns an error. +# + +verify_runnable "global" + +set -A args "" "-?" "blah blah" "-%" "--?" "-*" "-=" \ + "-a" "-b" "-c" "-d" "-e" "-f" "-g" "-h" "-i" "-j" "-k" "-l" \ + "-m" "-n" "-o" "-p" "-q" "-r" "-s" "-t" "-u" "-v" "-w" "-x" "-y" "-z" \ + "-A" "-B" "-C" "-D" "-E" "-F" "-G" "-H" "-I" "-J" "-K" "-L" \ + "-M" "-N" "-O" "-P" "-Q" "-R" "-S" "-T" "-U" "-V" "-W" "-X" "-W" "-Z" + + +log_assert "Execute 'zpool resilver' using invalid parameters." + +typeset -i i=0 +while [[ $i -lt ${#args[*]} ]]; do + log_mustnot zpool resilver ${args[i]} + + ((i = i + 1)) +done + +log_pass "Badly formed 'zpool resilver' parameters fail as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh new file mode 100755 index 000000000..de9e5ecdf --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 Datto Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg + +# +# DESCRIPTION: +# "Verify 'zpool resilver' restarts in-progress resilvers" +# +# STRATEGY: +# 1. Write some data and detatch the first drive so it has resilver +# work to do +# 2. Repeat the process with a second disk +# 3. Reattach the drives, causing the second drive's resilver to be +# deferred +# 4. Manually restart the resilver with all drives +# +# NOTES: +# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit +# low and adding a 50ms zio delay in order to ensure that the resilver +# does not complete early. +# + +verify_runnable "global" + +function cleanup +{ + log_must zinject -c all + log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT + log_must rm -f $mntpnt/biggerfile1 + log_must rm -f $mntpnt/biggerfile2 +} + +log_onexit cleanup + +log_assert "Verify 'zpool resilver' restarts in-progress resilvers" + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +# 1. Write some data and detatch the first drive so it has resilver work to do +log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile1 +log_must sync +log_must zpool detach $TESTPOOL $DISK2 + +# 2. Repeat the process with a second disk +log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile2 +log_must sync +log_must zpool detach $TESTPOOL $DISK3 + +# 3. Reattach the drives, causing the second drive's resilver to be deferred +log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW + +log_must zpool attach $TESTPOOL $DISK1 $DISK2 +log_must zinject -d $DISK2 -D50:1 $TESTPOOL +log_must is_pool_resilvering $TESTPOOL true + +log_must zpool attach $TESTPOOL $DISK1 $DISK3 +log_must zinject -d $DISK3 -D50:1 $TESTPOOL +log_must is_pool_resilvering $TESTPOOL true + +# 4. Manually restart the resilver with all drives +log_must zpool resilver $TESTPOOL +log_must zinject -c all +log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT +log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT +log_must is_deferred_scan_started $TESTPOOL +log_must check_state $TESTPOOL "$DISK2" "online" +log_must check_state $TESTPOOL "$DISK3" "online" + +log_pass "Verified 'zpool resilver' restarts in-progress resilvers" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh index 3bc798d1a..fdf315dea 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh @@ -25,6 +25,7 @@ # . $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib . $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg # @@ -95,6 +96,7 @@ DISK1="$TEST_BASE_DIR/zpool_disk1.dat" DISK2="$TEST_BASE_DIR/zpool_disk2.dat" DISK3="$TEST_BASE_DIR/zpool_disk3.dat" DISK4="$TEST_BASE_DIR/zpool_disk4.dat" +RESILVER_TIMEOUT=40 # 1. Create the pool log_must truncate -s $DEVSIZE $DISK1 @@ -117,6 +119,7 @@ zpool_scrub_sync $TESTPOOL # 5. Online the first device and offline the second device zpool_do_sync 'online' $TESTPOOL $DISK1 zpool_do_sync 'offline' $TESTPOOL $DISK2 +log_must wait_for_resilver_end $TESTPOOL $RESILVER_TIMEOUT # 6. Scrub the pool again zpool_scrub_sync $TESTPOOL |