aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/fm.c
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2020-09-04 11:34:28 -0600
committerGitHub <[email protected]>2020-09-04 10:34:28 -0700
commit4f0728278615eb42fc5022b2817c082f578e225f (patch)
tree598cd2bb948dd3b0eb0469139a33269ae58fc40e /module/zfs/fm.c
parent3808032489f28c1f36b39c9a3274d5f4b6f9638a (diff)
Avoid posting duplicate zpool events
Duplicate io and checksum ereport events can misrepresent that things are worse than they seem. Ideally the zpool events and the corresponding vdev stat error counts in a zpool status should be for unique errors -- not the same error being counted over and over. This can be demonstrated in a simple example. With a single bad block in a datafile and just 5 reads of the file we end up with a degraded vdev, even though there is only one unique error in the pool. The proposed solution to the above issue, is to eliminate duplicates when posting events and when updating vdev error stats. We now save recent error events of interest when posting events so that we can easily check for duplicates when posting an error. Reviewed by: Brad Lewis <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #10861
Diffstat (limited to 'module/zfs/fm.c')
-rw-r--r--module/zfs/fm.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/module/zfs/fm.c b/module/zfs/fm.c
index c00e08b8d..a5003f85d 100644
--- a/module/zfs/fm.c
+++ b/module/zfs/fm.c
@@ -104,13 +104,15 @@ struct erpt_kstat {
kstat_named_t erpt_set_failed; /* num erpt set failures */
kstat_named_t fmri_set_failed; /* num fmri set failures */
kstat_named_t payload_set_failed; /* num payload set failures */
+ kstat_named_t erpt_duplicates; /* num duplicate erpts */
};
static struct erpt_kstat erpt_kstat_data = {
{ "erpt-dropped", KSTAT_DATA_UINT64 },
{ "erpt-set-failed", KSTAT_DATA_UINT64 },
{ "fmri-set-failed", KSTAT_DATA_UINT64 },
- { "payload-set-failed", KSTAT_DATA_UINT64 }
+ { "payload-set-failed", KSTAT_DATA_UINT64 },
+ { "erpt-duplicates", KSTAT_DATA_UINT64 }
};
kstat_t *fm_ksp;
@@ -568,6 +570,12 @@ out:
return (error);
}
+void
+zfs_zevent_track_duplicate(void)
+{
+ atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
+}
+
static int
zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
{
@@ -1633,6 +1641,8 @@ fm_init(void)
list_create(&zevent_list, sizeof (zevent_t),
offsetof(zevent_t, ev_node));
cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
+
+ zfs_ereport_init();
}
void
@@ -1640,6 +1650,8 @@ fm_fini(void)
{
int count;
+ zfs_ereport_fini();
+
zfs_zevent_drain_all(&count);
mutex_enter(&zevent_lock);