diff options
author | Brian Behlendorf <[email protected]> | 2021-05-08 08:57:25 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2021-05-10 12:18:36 -0700 |
commit | 2085a5f992e686d6470cccefa7cdbc395f075125 (patch) | |
tree | bceef23ec48325ece8ad8b5ee96f30453979958e | |
parent | b1dd6351bb16f57f95dab79cfba36c3e05a82e76 (diff) |
Fix dRAID self-healing short columns
When dRAID performs a normal read operation only the data columns
in the raid map are read from disk. This is enough information to
calculate the checksum, verify it, and return the needed data to the
application. It's only in the event of a checksum failure that the
additional parity and any empty columns must be read since they are
required for parity reconstruction.
Reading these additional columns is handled by vdev_raidz_read_all()
which calls vdev_draid_map_alloc_empty() to expand the raid_map_t
and submit IOs for the missing columns. This all works correctly,
but it fails to account for any "short" columns. These are data
columns which are padded with a empty skip sector at the end.
Since that empty sector is not needed for a normal read it's not
read when columns is first read from disk. However, like the parity
and empty columns the skip sector is needed to perform reconstruction.
The fix is to mark any "short" columns as never being read by clearing
the rc_tried flag when expanding the raid_map_t. This will cause
the entire column to re-read from disk in the event of a checksum
failure allowing the self-healing functionality to repair the block.
Note that this only effects the self-healing feature because when
scrubbing a pool the parity, data, and empty columns are all read
initially to verify their contents. Furthermore, only blocks which
contain "short" columns would be effected, and only when the memory
backing the skip sector wasn't already zeroed out.
This change extends the existing redundancy_raidz.ksh test case to
verify self-healing (as well as resilver and scrub). Then applies
the same test case to dRAID with a slightly modified version of
the test script called redundancy_draid.ksh. The unused variable
combrec was also removed from both test cases.
Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: Mark Maybee <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #12010
-rw-r--r-- | module/zfs/vdev_draid.c | 8 | ||||
-rw-r--r-- | tests/runfiles/common.run | 4 | ||||
-rw-r--r-- | tests/zfs-tests/tests/functional/redundancy/Makefile.am | 1 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh | 248 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh | 52 |
5 files changed, 309 insertions, 4 deletions
diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index fb2143e94..c65ce1cd6 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -812,7 +812,12 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) /* this is a "big column", nothing to add */ ASSERT3P(rc->rc_abd, !=, NULL); } else { - /* short data column, add a skip sector */ + /* + * short data column, add a skip sector and clear + * rc_tried to force the entire column to be re-read + * thereby including the missing skip sector data + * which is needed for reconstruction. + */ ASSERT3U(rc->rc_size + skip_size, ==, parity_size); ASSERT3U(rr->rr_nempty, !=, 0); ASSERT3P(rc->rc_abd, !=, NULL); @@ -823,6 +828,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) abd_gang_add(rc->rc_abd, abd_get_offset_size( rr->rr_abd_empty, skip_off, skip_size), B_TRUE); skip_off += skip_size; + rc->rc_tried = 0; } /* diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 07c816f52..f1aa649cb 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -741,8 +741,8 @@ tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos'] tags = ['functional', 'raidz'] [tests/functional/redundancy] -tests = ['redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3', - 'redundancy_draid_spare1', 'redundancy_draid_spare2', +tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2', + 'redundancy_draid3', 'redundancy_draid_spare1', 'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3', 'redundancy_stripe'] diff --git a/tests/zfs-tests/tests/functional/redundancy/Makefile.am b/tests/zfs-tests/tests/functional/redundancy/Makefile.am index 7b85d6a1b..ac323c893 100644 --- a/tests/zfs-tests/tests/functional/redundancy/Makefile.am +++ b/tests/zfs-tests/tests/functional/redundancy/Makefile.am @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/redundancy dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ + redundancy_draid.ksh \ redundancy_draid1.ksh \ redundancy_draid2.ksh \ redundancy_draid3.ksh \ diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh b/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh new file mode 100755 index 000000000..8015e682c --- /dev/null +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh @@ -0,0 +1,248 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by vStack. All rights reserved. +# Copyright (c) 2021 by Delphix. All rights reserved. +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib + +# +# DESCRIPTION: +# dRAID should provide redundancy +# +# STRATEGY: +# 1. Create block device files for the test draid pool +# 2. For each parity value [1..3] +# - create draid pool +# - fill it with some directories/files +# - verify self-healing by overwriting devices +# - verify resilver by replacing devices +# - verify scrub by zeroing devices +# - destroy the draid pool + +typeset -r devs=6 +typeset -r dev_size_mb=512 + +typeset -a disks + +prefetch_disable=$(get_tunable PREFETCH_DISABLE) + +function cleanup +{ + poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL" + + for i in {0..$devs}; do + rm -f "$TEST_BASE_DIR/dev-$i" + done + + set_tunable32 PREFETCH_DISABLE $prefetch_disable +} + +function test_selfheal # <pool> <parity> <dir> +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + + log_must zpool export $pool + + for (( i=0; i<$nparity; i=i+1 )); do + log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + typeset mntpnt=$(get_prop mountpoint $pool/fs) + log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1 + log_must check_pool_status $pool "errors" "No known data errors" + + # + # Scrub the pool because the find command will only self-heal blocks + # from the files which were read. Before overwriting additional + # devices we need to repair all of the blocks in the pool. + # + log_must zpool scrub -w $pool + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool + + log_must zpool export $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + typeset mntpnt=$(get_prop mountpoint $pool/fs) + log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1 + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool scrub -w $pool + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool +} + +function test_resilver # <pool> <parity> <dir> +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool offline $pool $dir/dev-$i + done + + log_must zpool export $pool + + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool labelclear -f $dir/dev-$i + done + + log_must zpool import -o cachefile=none -d $dir $pool + + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool replace -fw $pool $dir/dev-$i + done + + log_must check_pool_status $pool "errors" "No known data errors" + resilver_cksum=$(cksum_pool $pool) + if [[ $resilver_cksum != 0 ]]; then + log_must zpool status -v $pool + log_fail "resilver cksum errors: $resilver_cksum" + fi + + log_must zpool clear $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must zpool offline $pool $dir/dev-$i + done + + log_must zpool export $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must zpool labelclear -f $dir/dev-$i + done + + log_must zpool import -o cachefile=none -d $dir $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must zpool replace -fw $pool $dir/dev-$i + done + + log_must check_pool_status $pool "errors" "No known data errors" + resilver_cksum=$(cksum_pool $pool) + if [[ $resilver_cksum != 0 ]]; then + log_must zpool status -v $pool + log_fail "resilver cksum errors: $resilver_cksum" + fi + + log_must zpool clear $pool +} + +function test_scrub # <pool> <parity> <dir> +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + + log_must zpool export $pool + + for (( i=0; i<$nparity; i=i+1 )); do + dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + log_must zpool scrub -w $pool + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool + + log_must zpool export $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + log_must zpool scrub -w $pool + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool +} + +log_onexit cleanup + +log_must set_tunable32 PREFETCH_DISABLE 1 + +# Disk files which will be used by pool +for i in {0..$(($devs - 1))}; do + device=$TEST_BASE_DIR/dev-$i + log_must truncate -s ${dev_size_mb}M $device + disks[${#disks[*]}+1]=$device +done + +# Disk file which will be attached +log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs + +for nparity in 1 2 3; do + raid=draid$nparity + dir=$TEST_BASE_DIR + + log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]} + log_must zfs set primarycache=metadata $TESTPOOL + + log_must zfs create $TESTPOOL/fs + log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R + + log_must zfs create -o compress=on $TESTPOOL/fs2 + log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R + + log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3 + log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R + + typeset pool_size=$(get_pool_prop size $TESTPOOL) + + log_must zpool export $TESTPOOL + log_must zpool import -o cachefile=none -d $dir $TESTPOOL + + log_must check_pool_status $TESTPOOL "errors" "No known data errors" + + test_selfheal $TESTPOOL $nparity $dir + test_resilver $TESTPOOL $nparity $dir + test_scrub $TESTPOOL $nparity $dir + + log_must zpool destroy "$TESTPOOL" +done + +log_pass "draid redundancy test succeeded." diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh b/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh index 8d32e0603..d73688391 100755 --- a/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy_raidz.ksh @@ -23,6 +23,7 @@ # # Copyright (c) 2020 by vStack. All rights reserved. # Copyright (c) 2021 by Delphix. All rights reserved. +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. # . $STF_SUITE/include/libtest.shlib @@ -37,6 +38,7 @@ # 2. For each parity value [1..3] # - create raidz pool # - fill it with some directories/files +# - verify self-healing by overwriting devices # - verify resilver by replacing devices # - verify scrub by zeroing devices # - destroy the raidz pool @@ -59,6 +61,54 @@ function cleanup set_tunable32 PREFETCH_DISABLE $prefetch_disable } +function test_selfheal # <pool> <parity> <dir> +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + + log_must zpool export $pool + + for (( i=0; i<$nparity; i=i+1 )); do + log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + typeset mntpnt=$(get_prop mountpoint $pool/fs) + log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1 + log_must check_pool_status $pool "errors" "No known data errors" + + # + # Scrub the pool because the find command will only self-heal blocks + # from the files which were read. Before overwriting additional + # devices we need to repair all of the blocks in the pool. + # + log_must zpool scrub -w $pool + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool + + log_must zpool export $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + typeset mntpnt=$(get_prop mountpoint $pool/fs) + log_must find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1 + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool scrub -w $pool + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool +} + function test_resilver # <pool> <parity> <dir> { typeset pool=$1 @@ -121,7 +171,6 @@ function test_scrub # <pool> <parity> <dir> typeset pool=$1 typeset nparity=$2 typeset dir=$3 - typeset combrec=$4 log_must zpool export $pool @@ -189,6 +238,7 @@ for nparity in 1 2 3; do log_must check_pool_status $TESTPOOL "errors" "No known data errors" + test_selfheal $TESTPOOL $nparity $dir test_resilver $TESTPOOL $nparity $dir test_scrub $TESTPOOL $nparity $dir |