From 2bbec1c910a24bf61c6f41e0762e50face4b8907 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Thu, 14 Mar 2019 18:21:53 -0700 Subject: Make zpool status counters match error events count The number of IO and checksum events should match the number of errors seen in zpool status. Previously there was a mismatch between the two counts because zpool status would only count unrecovered errors, while zpool events would get an event for *all* errors (recovered or not). This lead to situations where disks could be faulted for "too many errors", while at the same time showing zero errors in zpool status. This fixes the zpool status error counters to increment at the same times we post the error events. Reviewed-by: Tom Caputi Reviewed-by: Brian Behlendorf Reviewed-by: Olaf Faaland Signed-off-by: Tony Hutter Closes #4851 Closes #7817 --- module/zfs/vdev.c | 11 ----------- module/zfs/vdev_raidz.c | 9 +++++++-- module/zfs/zio.c | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'module') diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 890bb1135..ae1c2bcec 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -4051,17 +4051,6 @@ vdev_stat_update(zio_t *zio, uint64_t psize) if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) return; - mutex_enter(&vd->vdev_stat_lock); - if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) { - if (zio->io_error == ECKSUM) - vs->vs_checksum_errors++; - else - vs->vs_read_errors++; - } - if (type == ZIO_TYPE_WRITE && !vdev_is_dead(vd)) - vs->vs_write_errors++; - mutex_exit(&vd->vdev_stat_lock); - if (spa->spa_load_state == SPA_LOAD_NONE && type == ZIO_TYPE_WRITE && txg != 0 && (!(flags & ZIO_FLAG_IO_REPAIR) || diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index d10d89f3e..d11287bdc 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2274,16 +2274,21 @@ vdev_raidz_io_done(zio_t *zio) if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { for (c = 0; c < rm->rm_cols; c++) { + vdev_t *cvd; rc = &rm->rm_col[c]; + cvd = vd->vdev_child[rc->rc_devidx]; if (rc->rc_error == 0) { zio_bad_cksum_t zbc; zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; + mutex_enter(&cvd->vdev_stat_lock); + cvd->vdev_stat.vs_checksum_errors++; + mutex_exit(&cvd->vdev_stat_lock); + zfs_ereport_start_checksum( - zio->io_spa, - vd->vdev_child[rc->rc_devidx], + zio->io_spa, cvd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, (void *)(uintptr_t)c, &zbc); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 7bb3c0825..0912f607f 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4132,6 +4132,10 @@ zio_checksum_verify(zio_t *zio) zio->io_error = error; if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + zfs_ereport_start_checksum(zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, zio->io_offset, zio->io_size, NULL, &info); @@ -4467,9 +4471,18 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && zio->io_vd != NULL && - !vdev_is_dead(zio->io_vd)) + !vdev_is_dead(zio->io_vd)) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + if (zio->io_type == ZIO_TYPE_READ) { + zio->io_vd->vdev_stat.vs_read_errors++; + } else if (zio->io_type == ZIO_TYPE_WRITE) { + zio->io_vd->vdev_stat.vs_write_errors++; + } + mutex_exit(&zio->io_vd->vdev_stat_lock); + zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0, 0); + } if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && -- cgit v1.2.3