aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zfs_fm.c
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2024-02-08 10:19:52 -0700
committerGitHub <[email protected]>2024-02-08 09:19:52 -0800
commitcbe882298e4ddc3917dfaf239eca475fe06d62d4 (patch)
treeff089153d8a4180df1dca1e7e47fd23584155f77 /module/zfs/zfs_fm.c
parent229b9f4ed05e6d14fb4d73fa04a71e99b01bb534 (diff)
Add slow disk diagnosis to ZED
Slow disk response times can be indicative of a failing drive. ZFS currently tracks slow I/Os (slower than zio_slow_io_ms) and generates events (ereport.fs.zfs.delay). However, no action is taken by ZED, like is done for checksum or I/O errors. This change adds slow disk diagnosis to ZED which is opt-in using new VDEV properties: VDEV_PROP_SLOW_IO_N VDEV_PROP_SLOW_IO_T If multiple VDEVs in a pool are undergoing slow I/Os, then it skips the zpool_vdev_degrade(). Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Allan Jude <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Co-authored-by: Rob Wing <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #15469
Diffstat (limited to 'module/zfs/zfs_fm.c')
-rw-r--r--module/zfs/zfs_fm.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index c4eb74e87..481af2ba8 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -222,6 +222,12 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
+ case VDEV_PROP_SLOW_IO_N:
+ propval = vd->vdev_slow_io_n;
+ break;
+ case VDEV_PROP_SLOW_IO_T:
+ propval = vd->vdev_slow_io_t;
+ break;
default:
propval = propdef;
break;
@@ -741,6 +747,26 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
NULL);
}
+ if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
+ uint64_t slow_io_n, slow_io_t;
+
+ slow_io_n = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_N);
+ if (slow_io_n != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N))
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N,
+ DATA_TYPE_UINT64,
+ slow_io_n,
+ NULL);
+
+ slow_io_t = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_T);
+ if (slow_io_t != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T))
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T,
+ DATA_TYPE_UINT64,
+ slow_io_t,
+ NULL);
+ }
+
mutex_exit(&spa->spa_errlist_lock);
*ereport_out = ereport;