summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/spa.h3
-rw-r--r--module/zfs/dsl_scan.c6
-rw-r--r--module/zfs/vdev.c5
-rw-r--r--module/zfs/vdev_indirect.c10
-rw-r--r--module/zfs/vdev_raidz.c20
-rw-r--r--module/zfs/vdev_rebuild.c3
-rw-r--r--module/zfs/zfs_fm.c46
-rw-r--r--module/zfs/zio.c11
-rw-r--r--tests/runfiles/common.run3
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh135
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh11
12 files changed, 214 insertions, 42 deletions
diff --git a/include/sys/spa.h b/include/sys/spa.h
index 0762ae8a3..8391be832 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -1150,6 +1150,7 @@ extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
zio_t *zio);
extern void zfs_ereport_taskq_fini(void);
+extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 40adfbcee..a54cd6ca8 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
* Copyright 2016 Gary Mills
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
@@ -987,6 +987,10 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
(u_longlong_t)spa_get_errlog_size(spa));
spa_async_request(spa, SPA_ASYNC_RESILVER);
}
+
+ /* Clear recent error events (i.e. duplicate events tracking) */
+ if (complete)
+ zfs_ereport_clear(spa, NULL);
}
scn->scn_phys.scn_end_time = gethrestime_sec();
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 36001e0a6..64016311b 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2021 by Delphix. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <[email protected]>
@@ -4170,6 +4170,9 @@ vdev_clear(spa_t *spa, vdev_t *vd)
vd->vdev_parent->vdev_ops == &vdev_spare_ops &&
vd->vdev_parent->vdev_child[0] == vd)
vd->vdev_unspare = B_TRUE;
+
+ /* Clear recent error events cache (i.e. duplicate events tracking) */
+ zfs_ereport_clear(spa, vd);
}
boolean_t
diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c
index b26d09937..416f4c54d 100644
--- a/module/zfs/vdev_indirect.c
+++ b/module/zfs/vdev_indirect.c
@@ -1485,14 +1485,12 @@ vdev_indirect_all_checksum_errors(zio_t *zio)
vdev_t *vd = ic->ic_vdev;
- int ret = zfs_ereport_post_checksum(zio->io_spa, vd,
+ (void) zfs_ereport_post_checksum(zio->io_spa, vd,
NULL, zio, is->is_target_offset, is->is_size,
NULL, NULL, NULL);
- if (ret != EALREADY) {
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&vd->vdev_stat_lock);
- }
+ mutex_enter(&vd->vdev_stat_lock);
+ vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&vd->vdev_stat_lock);
}
}
}
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index f4812e612..57a594c80 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -1852,14 +1852,12 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
- int ret = zfs_ereport_post_checksum(zio->io_spa, vd,
+ (void) zfs_ereport_post_checksum(zio->io_spa, vd,
&zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
rc->rc_abd, bad_data, &zbc);
- if (ret != EALREADY) {
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&vd->vdev_stat_lock);
- }
+ mutex_enter(&vd->vdev_stat_lock);
+ vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&vd->vdev_stat_lock);
}
}
@@ -2453,14 +2451,12 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio)
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
- int ret = zfs_ereport_start_checksum(zio->io_spa,
+ (void) zfs_ereport_start_checksum(zio->io_spa,
cvd, &zio->io_bookmark, zio, rc->rc_offset,
rc->rc_size, (void *)(uintptr_t)c, &zbc);
- if (ret != EALREADY) {
- mutex_enter(&cvd->vdev_stat_lock);
- cvd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&cvd->vdev_stat_lock);
- }
+ mutex_enter(&cvd->vdev_stat_lock);
+ cvd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&cvd->vdev_stat_lock);
}
}
}
diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
index 784d1af15..037abc01f 100644
--- a/module/zfs/vdev_rebuild.c
+++ b/module/zfs/vdev_rebuild.c
@@ -338,6 +338,9 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
}
cv_broadcast(&vd->vdev_rebuild_cv);
+
+ /* Clear recent error events (i.e. duplicate events tracking) */
+ zfs_ereport_clear(spa, NULL);
}
/*
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index ea71ef325..9e9f4a80b 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2012,2020 by Delphix. All rights reserved.
+ * Copyright (c) 2012,2021 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@@ -248,6 +248,44 @@ zfs_ereport_schedule_cleaner(void)
}
/*
+ * Clear entries for a given vdev or all vdevs in a pool when vdev == NULL
+ */
+void
+zfs_ereport_clear(spa_t *spa, vdev_t *vd)
+{
+ uint64_t vdev_guid, pool_guid;
+ int cnt = 0;
+
+ ASSERT(vd != NULL || spa != NULL);
+ if (vd == NULL) {
+ vdev_guid = 0;
+ pool_guid = spa_guid(spa);
+ } else {
+ vdev_guid = vd->vdev_guid;
+ pool_guid = 0;
+ }
+
+ mutex_enter(&recent_events_lock);
+
+ recent_events_node_t *next = list_head(&recent_events_list);
+ while (next != NULL) {
+ recent_events_node_t *entry = next;
+
+ next = list_next(&recent_events_list, next);
+
+ if (entry->re_vdev_guid == vdev_guid ||
+ entry->re_pool_guid == pool_guid) {
+ avl_remove(&recent_events_tree, entry);
+ list_remove(&recent_events_list, entry);
+ kmem_free(entry, sizeof (*entry));
+ cnt++;
+ }
+ }
+
+ mutex_exit(&recent_events_lock);
+}
+
+/*
* Check if an ereport would be a duplicate of one recently posted.
*
* An ereport is considered a duplicate if the set of criteria in
@@ -951,6 +989,12 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
}
return (eip);
}
+#else
+/*ARGSUSED*/
+void
+zfs_ereport_clear(spa_t *spa, vdev_t *vd)
+{
+}
#endif
/*
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 538a2a2cd..74d1595a8 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -4255,15 +4255,12 @@ zio_checksum_verify(zio_t *zio)
zio->io_error = error;
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- int ret = zfs_ereport_start_checksum(zio->io_spa,
+ (void) zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio,
zio->io_offset, zio->io_size, NULL, &info);
-
- if (ret != EALREADY) {
- mutex_enter(&zio->io_vd->vdev_stat_lock);
- zio->io_vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&zio->io_vd->vdev_stat_lock);
- }
+ mutex_enter(&zio->io_vd->vdev_stat_lock);
+ zio->io_vd->vdev_stat.vs_checksum_errors++;
+ mutex_exit(&zio->io_vd->vdev_stat_lock);
}
}
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 9468ee06d..e656785d9 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -362,7 +362,8 @@ tags = ['functional', 'cli_root', 'zpool_detach']
[tests/functional/cli_root/zpool_events]
tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow',
- 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates']
+ 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates',
+ 'zpool_events_clear_retained']
tags = ['functional', 'cli_root', 'zpool_events']
[tests/functional/cli_root/zpool_export]
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
index 99c46f014..765df1022 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am
@@ -11,7 +11,8 @@ dist_pkgdata_SCRIPTS = \
zpool_events_follow.ksh \
zpool_events_poolname.ksh \
zpool_events_errors.ksh \
- zpool_events_duplicates.ksh
+ zpool_events_duplicates.ksh \
+ zpool_events_clear_retained.ksh
dist_pkgdata_DATA = \
zpool_events.cfg \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh
new file mode 100755
index 000000000..fdf56b2cf
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh
@@ -0,0 +1,135 @@
+#!/bin/ksh -p
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION:
+# Verify that new errors after a pool scrub are considered a duplicate
+#
+# STRATEGY:
+# 1. Create a raidz pool with a file
+# 2. Inject garbage into one of the vdevs
+# 3. Scrub the pool
+# 4. Observe the checksum error counts
+# 5. Repeat inject and pool scrub
+# 6. Verify that second pass also produces similar errors (i.e. not
+# treated as a duplicate)
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+MOUNTDIR=$TEST_BASE_DIR/mount
+FILEPATH=$MOUNTDIR/target
+VDEV1=$TEST_BASE_DIR/vfile1
+VDEV2=$TEST_BASE_DIR/vfile2
+VDEV3=$TEST_BASE_DIR/vfile3
+SUPPLY=$TEST_BASE_DIR/supply
+POOL=test_pool
+FILESIZE="15M"
+DAMAGEBLKS=10
+
+OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
+RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
+OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND)
+
+EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
+
+function cleanup
+{
+ log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS
+ log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
+
+ zpool events -c
+ if poolexists $POOL ; then
+ zpool export $POOL
+ fi
+ log_must rm -f $VDEV1 $VDEV2 $VDEV3
+}
+
+function damage_and_repair
+{
+ log_must zpool clear $POOL $VDEV1
+ log_must zpool events -c
+
+ log_note injecting damage to $VDEV1
+ log_must dd conv=notrunc if=$SUPPLY of=$VDEV1 bs=1M seek=4 count=$DAMAGEBLKS
+ log_must zpool scrub $POOL
+ log_must zpool wait -t scrub $POOL
+ log_note "pass $1 observed $($EREPORTS | grep -c checksum) checksum ereports"
+
+ repaired=$(zpool status $POOL | grep "scan: scrub repaired" | awk '{print $4}')
+ if [ "$repaired" == "0B" ]; then
+ log_fail "INVALID TEST -- expected scrub to repair some blocks"
+ else
+ log_note "$repaired repaired during scrub"
+ fi
+}
+
+function checksum_error_count
+{
+ zpool status -p $POOL | grep $VDEV1 | awk '{print $5}'
+}
+
+assertion="Damage to recently repaired blocks should be reported/counted"
+log_assert "$assertion"
+log_note "zevent retain max setting: $RETAIN_MAX"
+
+log_onexit cleanup
+
+# Set our threshold high to avoid dropping events.
+set_tunable64 ZEVENT_LEN_MAX 20000
+set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000
+
+# Initialize resources for the test
+log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3
+log_must dd if=/dev/urandom of=$SUPPLY bs=1M count=$DAMAGEBLKS
+log_must mkdir -p $MOUNTDIR
+log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL raidz $VDEV1 $VDEV2 $VDEV3
+log_must zfs set compression=off recordsize=16k $POOL
+# create a file full of zeros
+log_must mkfile -v $FILESIZE $FILEPATH
+log_must zpool sync $POOL
+
+# run once and observe the checksum errors
+damage_and_repair 1
+errcnt=$(checksum_error_count)
+log_note "$errcnt errors observed"
+# set expectaton of at least 75% of what we observed in first pass
+(( expected = (errcnt * 75) / 100 ))
+
+# run again and we should observe new checksum errors
+damage_and_repair 2
+errcnt=$(checksum_error_count)
+
+log_must zpool destroy $POOL
+
+if (( errcnt < expected )); then
+ log_fail "FAILED -- expecting at least $expected checksum errors but only observed $errcnt"
+else
+ log_note observed $errcnt new checksum errors after a scrub
+ log_pass "$assertion"
+fi
+
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
index 1ba7b1b34..d4194a5b8 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
@@ -114,21 +114,10 @@ function do_dup_test
if [ "$RW" == "write" ] ; then
log_must mkfile $FILESIZE $FILEPATH
log_must zpool sync $POOL
- else
- # scrub twice to generate some duplicates
- log_must zpool scrub $POOL
- log_must zpool wait -t scrub $POOL
- log_must zpool scrub $POOL
- log_must zpool wait -t scrub $POOL
fi
log_must zinject -c all
- # Wait for the pool to settle down and finish resilvering (if
- # necessary). We want the errors to stop incrementing before we
- # check for duplicates.
- zpool wait -t resilver $POOL
-
ereports="$($EREPORTS | sort)"
actual=$(echo "$ereports" | wc -l)
unique=$(echo "$ereports" | uniq | wc -l)