diff options
author | Don Brady <[email protected]> | 2024-02-08 10:19:52 -0700 |
---|---|---|
committer | GitHub <[email protected]> | 2024-02-08 09:19:52 -0800 |
commit | cbe882298e4ddc3917dfaf239eca475fe06d62d4 (patch) | |
tree | ff089153d8a4180df1dca1e7e47fd23584155f77 /module/zfs/zfs_fm.c | |
parent | 229b9f4ed05e6d14fb4d73fa04a71e99b01bb534 (diff) |
Add slow disk diagnosis to ZED
Slow disk response times can be indicative of a failing drive. ZFS
currently tracks slow I/Os (slower than zio_slow_io_ms) and generates
events (ereport.fs.zfs.delay). However, no action is taken by ZED,
like is done for checksum or I/O errors. This change adds slow disk
diagnosis to ZED which is opt-in using new VDEV properties:
VDEV_PROP_SLOW_IO_N
VDEV_PROP_SLOW_IO_T
If multiple VDEVs in a pool are undergoing slow I/Os, then it skips
the zpool_vdev_degrade().
Sponsored-By: OpenDrives Inc.
Sponsored-By: Klara Inc.
Reviewed-by: Tony Hutter <[email protected]>
Reviewed-by: Allan Jude <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Co-authored-by: Rob Wing <[email protected]>
Signed-off-by: Don Brady <[email protected]>
Closes #15469
Diffstat (limited to 'module/zfs/zfs_fm.c')
-rw-r--r-- | module/zfs/zfs_fm.c | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index c4eb74e87..481af2ba8 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -222,6 +222,12 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop) case VDEV_PROP_IO_T: propval = vd->vdev_io_t; break; + case VDEV_PROP_SLOW_IO_N: + propval = vd->vdev_slow_io_n; + break; + case VDEV_PROP_SLOW_IO_T: + propval = vd->vdev_slow_io_t; + break; default: propval = propdef; break; @@ -741,6 +747,26 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, NULL); } + if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) { + uint64_t slow_io_n, slow_io_t; + + slow_io_n = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_N); + if (slow_io_n != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N, + DATA_TYPE_UINT64, + slow_io_n, + NULL); + + slow_io_t = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_T); + if (slow_io_t != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T, + DATA_TYPE_UINT64, + slow_io_t, + NULL); + } + mutex_exit(&spa->spa_errlist_lock); *ereport_out = ereport; |