summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRyan Moeller <[email protected]>2021-04-07 19:23:57 -0400
committerBrian Behlendorf <[email protected]>2021-04-14 13:19:49 -0700
commit7822c01eb6b7d3d6a20e1a733747050ed9d7ddcb (patch)
treed7430c06664d011ba5adf26ad371b3c5e6f94907
parent96e15d29fab03e2876654c42702b81d3b0b14303 (diff)
Ratelimit deadman zevents as with delay zevents
Just as delay zevents can flood the zevent pipe when a vdev becomes unresponsive, so do the deadman zevents. Ratelimit deadman zevents according to the same tunable as for delay zevents. Enable deadman tests on FreeBSD and add a test for deadman event ratelimiting. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Don Brady <[email protected]> Signed-off-by: Ryan Moeller <[email protected]> Closes #11786
-rw-r--r--include/sys/vdev_impl.h3
-rw-r--r--man/man5/zfs-module-parameters.53
-rw-r--r--module/zfs/vdev.c3
-rw-r--r--module/zfs/zfs_fm.c8
-rw-r--r--tests/runfiles/common.run6
-rw-r--r--tests/runfiles/linux.run6
-rw-r--r--tests/zfs-tests/tests/functional/deadman/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh78
-rwxr-xr-xtests/zfs-tests/tests/functional/deadman/deadman_sync.ksh6
9 files changed, 102 insertions, 12 deletions
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 6c3295353..3cfde40a7 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -458,10 +458,11 @@ struct vdev {
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
/*
- * We rate limit ZIO delay and ZIO checksum events, since they
+ * We rate limit ZIO delay, deadman, and checksum events, since they
* can flood ZED with tons of events when a drive is acting up.
*/
zfs_ratelimit_t vdev_delay_rl;
+ zfs_ratelimit_t vdev_deadman_rl;
zfs_ratelimit_t vdev_checksum_rl;
};
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index cda4661fe..935c8afb0 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1695,7 +1695,8 @@ Default value: \fB64\fR.
\fBzfs_slow_io_events_per_second\fR (int)
.ad
.RS 12n
-Rate limit delay zevents (which report slow I/Os) to this many per second.
+Rate limit delay and deadman zevents (which report slow I/Os) to this many per
+second.
.sp
Default value: 20
.RE
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 69e44b48e..d00782d93 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -625,6 +625,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
*/
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second,
1);
+ zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second,
+ 1);
zfs_ratelimit_init(&vd->vdev_checksum_rl,
&zfs_checksum_events_per_second, 1);
@@ -1106,6 +1108,7 @@ vdev_free(vdev_t *vd)
cv_destroy(&vd->vdev_rebuild_cv);
zfs_ratelimit_fini(&vd->vdev_delay_rl);
+ zfs_ratelimit_fini(&vd->vdev_deadman_rl);
zfs_ratelimit_fini(&vd->vdev_checksum_rl);
if (vd == spa->spa_root_vdev)
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index f0f953405..60e631567 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -395,8 +395,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
}
/*
- * We want to rate limit ZIO delay and checksum events so as to not
- * flood ZED when a disk is acting up.
+ * We want to rate limit ZIO delay, deadman, and checksum events so as to not
+ * flood zevent consumers when a disk is acting up.
*
* Returns 1 if we're ratelimiting, 0 if not.
*/
@@ -405,11 +405,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
{
int rc = 0;
/*
- * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
+ * zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
* are. Invert it to get our return value.
*/
if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
rc = !zfs_ratelimit(&vd->vdev_delay_rl);
+ } else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) {
+ rc = !zfs_ratelimit(&vd->vdev_deadman_rl);
} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
}
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index cc2414550..32be9ef05 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -577,6 +577,12 @@ tags = ['functional', 'cp_files']
tests = ['ctime_001_pos' ]
tags = ['functional', 'ctime']
+[tests/functional/deadman]
+tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio']
+pre =
+post =
+tags = ['functional', 'deadman']
+
[tests/functional/delegate]
tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 9f6bd856a..4952f3013 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -85,12 +85,6 @@ tags = ['functional', 'cli_root', 'zpool_split']
tests = ['compress_004_pos']
tags = ['functional', 'compression']
-[tests/functional/deadman:Linux]
-tests = ['deadman_sync', 'deadman_zio']
-pre =
-post =
-tags = ['functional', 'deadman']
-
[tests/functional/devices:Linux]
tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
tags = ['functional', 'devices']
diff --git a/tests/zfs-tests/tests/functional/deadman/Makefile.am b/tests/zfs-tests/tests/functional/deadman/Makefile.am
index 7b70ca09d..097f23e88 100644
--- a/tests/zfs-tests/tests/functional/deadman/Makefile.am
+++ b/tests/zfs-tests/tests/functional/deadman/Makefile.am
@@ -1,5 +1,6 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman
dist_pkgdata_SCRIPTS = \
+ deadman_ratelimit.ksh \
deadman_sync.ksh \
deadman_zio.ksh
diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh
new file mode 100755
index 000000000..469117a56
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+# DESCRIPTION:
+# Verify spa deadman events are rate limited
+#
+# STRATEGY:
+# 1. Reduce the zfs_slow_io_events_per_second to 1.
+# 2. Reduce the zfs_deadman_ziotime_ms to 1ms.
+# 3. Write data to a pool and read it back.
+# 4. Verify deadman events have been produced at a reasonable rate.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/deadman/deadman.cfg
+
+verify_runnable "both"
+
+function cleanup
+{
+ zinject -c all
+ default_cleanup_noexit
+
+ set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
+ set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT
+}
+
+log_assert "Verify spa deadman events are rate limited"
+log_onexit cleanup
+
+OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
+log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1
+log_must set_tunable64 DEADMAN_ZIOTIME_MS 1
+
+# Create a new pool in order to use the updated deadman settings.
+default_setup_noexit $DISK1
+log_must zpool events -c
+
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+log_must zinject -d $DISK1 -D 5:1 $TESTPOOL
+log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync
+
+events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman)
+log_note "events=$events"
+if [ "$events" -lt 1 ]; then
+ log_fail "Expect >= 1 deadman events, $events found"
+fi
+if [ "$events" -gt 10 ]; then
+ log_fail "Expect <= 10 deadman events, $events found"
+fi
+
+log_pass "Verify spa deadman events are rate limited"
diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh
index 5d803af85..b0b03f5d5 100755
--- a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh
+++ b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh
@@ -73,7 +73,11 @@ log_must zinject -c all
log_must zpool sync
# Log txg sync times for reference and the zpool event summary.
-log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
+if is_freebsd; then
+ log_must sysctl -n kstat.zfs.$TESTPOOL.txgs
+else
+ log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
+fi
log_must zpool events
# Verify at least 5 deadman events were logged. The first after 5 seconds,