diff options
-rw-r--r-- | module/zfs/vdev.c | 11 | ||||
-rw-r--r-- | module/zfs/vdev_raidz.c | 9 | ||||
-rw-r--r-- | module/zfs/zio.c | 15 | ||||
-rw-r--r-- | tests/runfiles/linux.run | 2 | ||||
-rw-r--r-- | tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am | 3 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh | 152 |
6 files changed, 176 insertions, 16 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 890bb1135..ae1c2bcec 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -4051,17 +4051,6 @@ vdev_stat_update(zio_t *zio, uint64_t psize) if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) return; - mutex_enter(&vd->vdev_stat_lock); - if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) { - if (zio->io_error == ECKSUM) - vs->vs_checksum_errors++; - else - vs->vs_read_errors++; - } - if (type == ZIO_TYPE_WRITE && !vdev_is_dead(vd)) - vs->vs_write_errors++; - mutex_exit(&vd->vdev_stat_lock); - if (spa->spa_load_state == SPA_LOAD_NONE && type == ZIO_TYPE_WRITE && txg != 0 && (!(flags & ZIO_FLAG_IO_REPAIR) || diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index d10d89f3e..d11287bdc 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2274,16 +2274,21 @@ vdev_raidz_io_done(zio_t *zio) if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { for (c = 0; c < rm->rm_cols; c++) { + vdev_t *cvd; rc = &rm->rm_col[c]; + cvd = vd->vdev_child[rc->rc_devidx]; if (rc->rc_error == 0) { zio_bad_cksum_t zbc; zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; + mutex_enter(&cvd->vdev_stat_lock); + cvd->vdev_stat.vs_checksum_errors++; + mutex_exit(&cvd->vdev_stat_lock); + zfs_ereport_start_checksum( - zio->io_spa, - vd->vdev_child[rc->rc_devidx], + zio->io_spa, cvd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, (void *)(uintptr_t)c, &zbc); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 7bb3c0825..0912f607f 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4132,6 +4132,10 @@ zio_checksum_verify(zio_t *zio) zio->io_error = error; if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + zfs_ereport_start_checksum(zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, zio->io_offset, zio->io_size, NULL, &info); @@ -4467,9 +4471,18 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && zio->io_vd != NULL && - !vdev_is_dead(zio->io_vd)) + !vdev_is_dead(zio->io_vd)) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + if (zio->io_type == ZIO_TYPE_READ) { + zio->io_vd->vdev_stat.vs_read_errors++; + } else if (zio->io_type == ZIO_TYPE_WRITE) { + zio->io_vd->vdev_stat.vs_write_errors++; + } + mutex_exit(&zio->io_vd->vdev_stat_lock); + zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0, 0); + } if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 698717335..8a3b4d4ee 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -344,7 +344,7 @@ tags = ['functional', 'cli_root', 'zpool_detach'] [tests/functional/cli_root/zpool_events] tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow', - 'zpool_events_poolname'] + 'zpool_events_poolname', 'zpool_events_errors'] tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_expand] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am index 0d4c3862b..7fb6e4f7a 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am @@ -5,7 +5,8 @@ dist_pkgdata_SCRIPTS = \ zpool_events_clear.ksh \ zpool_events_cliargs.ksh \ zpool_events_follow.ksh \ - zpool_events_poolname.ksh + zpool_events_poolname.ksh \ + zpool_events_errors.ksh dist_pkgdata_DATA = \ zpool_events.cfg \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh new file mode 100755 index 000000000..0dc551bbd --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_errors.ksh @@ -0,0 +1,152 @@ +#!/bin/ksh -p +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Verify the number of IO and checksum events match the error counters +# in zpool status. +# +# STRATEGY: +# 1. Create a raidz or mirror pool +# 2. Inject read/write IO errors or checksum errors +# 3. Verify the number of errors in zpool status match the corresponding +# number of error events. +# 4. Repeat for all combinations of raidz/mirror and io/checksum errors. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +MOUNTDIR=$TEST_BASE_DIR/mount +VDEV1=$TEST_BASE_DIR/file1 +VDEV2=$TEST_BASE_DIR/file2 +VDEV3=$TEST_BASE_DIR/file3 +POOL=error_pool +FILESIZE=$((20 * 1024 * 1024)) +OLD_CHECKSUMS=$(get_tunable zfs_checksum_events_per_second) +OLD_LEN_MAX=$(get_tunable zfs_zevent_len_max) + +function cleanup +{ + log_must set_tunable64 zfs_checksum_events_per_second $OLD_CHECKSUMS + log_must set_tunable64 zfs_zevent_len_max $OLD_LEN_MAX + + log_must zinject -c all + log_must zpool events -c + if poolexists $POOL ; then + log_must destroy_pool $POOL + fi + log_must rm -f $VDEV1 $VDEV2 $VDEV3 +} + +log_assert "Check that the number of zpool errors match the number of events" + +log_onexit cleanup + +# Set our thresholds high so we never ratelimit or drop events. +set_tunable64 zfs_checksum_events_per_second 20000 +set_tunable64 zfs_zevent_len_max 20000 + +log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3 +log_must mkdir -p $MOUNTDIR + +# Run error test on a specific type of pool +# +# $1: pool - raidz, mirror +# $2: test type - corrupt (checksum error), io +# $3: read, write +function do_test +{ + POOLTYPE=$1 + ERR=$2 + RW=$3 + + log_note "Testing $ERR $RW on $POOLTYPE" + log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL $POOLTYPE $VDEV1 $VDEV2 $VDEV3 + log_must zpool events -c + log_must zfs set compression=off $POOL + + if [ "$RW" == "read" ] ; then + log_must mkfile $FILESIZE $MOUNTDIR/file + fi + + log_must zinject -d $VDEV1 -e $ERR -T $RW -f 100 $POOL + + if [ "$RW" == "write" ] ; then + log_must mkfile $FILESIZE $MOUNTDIR/file + log_must zpool sync $POOL + else + log_must zpool scrub $POOL + wait_scrubbed $POOL + fi + + log_must zinject -c all + + # Wait for the pool to settle down and finish resilvering (if + # necessary). We want the errors to stop incrementing before we + # check the error and event counts. + while is_pool_resilvering $POOL ; do + sleep 1 + done + + out="$(zpool status -p | grep $VDEV1)" + + if [ "$ERR" == "corrupt" ] ; then + events=$(zpool events | grep checksum | wc -l) + val=$(echo "$out" | awk '{print $5}') + str="checksum" + elif [ "$ERR" == "io" ] ; then + allevents=$(zpool events | grep io) + events=$(echo "$allevents" | wc -l) + if [ "$RW" == "read" ] ; then + str="read IO" + val=$(echo "$out" | awk '{print $3}') + else + str="write IO" + val=$(echo "$out" | awk '{print $4}') + fi + fi + + if [ "$val" == "0" ] || [ "$events" == "" ] ; then + log_fail "Didn't see any errors or events ($val/$events)" + fi + + if [ "$val" != "$events" ] ; then + log_fail "$val $POOLTYPE $str errors != $events events" + else + log_note "$val $POOLTYPE $str errors == $events events" + fi + + log_must zpool destroy $POOL +} + +# Test all types of errors on mirror and raidz pools +for pooltype in mirror raidz ; do + do_test $pooltype corrupt read + do_test $pooltype io read + do_test $pooltype io write +done + +log_pass "The number of errors matched the number of events" |