summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2012-10-25 13:02:31 -0700
committerBrian Behlendorf <[email protected]>2012-11-02 15:45:59 -0700
commit9dcb97198338ba2d8764dd5604b278118612f74d (patch)
tree4752e52379492cd45bdb5cd930496723dabed269 /module
parente95853a331529a6cb96fdf10476c53441e59f4e1 (diff)
Log I/Os longer than zio_delay_max (30s default)
There have been reports of ZFS deadlocking due to what appears to be a lost IO. This patch addes some debugging to determine the exact state of the IO which neither 1) completed, 2) failed, or 3) timed out after zio_delay_max (30) seconds. This information will be logged using the ZFS FMA infrastructure as a 'delay' event and posted to the internal zevent log. By default the last 64 events will be kept in the log but the limit is configurable via the zfs_zevent_len_max module option. To dump the contents of the log use the 'zpool events -v' command and look for the resource.fs.zfs.delay event. It will include various information about the pool, vdev, and zio which may shed some light on the issue. In the context of this change the 120 second kernel blocked thread watchdog has been disabled for synchronous IOs. Signed-off-by: Brian Behlendorf <[email protected]> Issue #930
Diffstat (limited to 'module')
-rw-r--r--module/zfs/zfs_fm.c4
-rw-r--r--module/zfs/zio.c32
2 files changed, 26 insertions, 10 deletions
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index 0b98231ec..db6a831d2 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -294,6 +294,10 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
DATA_TYPE_INT32, zio->io_error, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS,
DATA_TYPE_INT32, zio->io_flags, NULL);
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE,
+ DATA_TYPE_UINT32, zio->io_stage, NULL);
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE,
+ DATA_TYPE_UINT32, zio->io_pipeline, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY,
DATA_TYPE_UINT64, zio->io_delay, NULL);
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index ece3329d0..bcfc081d1 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1305,18 +1305,34 @@ __zio_execute(zio_t *zio)
int
zio_wait(zio_t *zio)
{
+ uint64_t timeout;
int error;
ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
ASSERT(zio->io_executor == NULL);
zio->io_waiter = curthread;
+ timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz);
__zio_execute(zio);
mutex_enter(&zio->io_lock);
- while (zio->io_executor != NULL)
- cv_wait(&zio->io_cv, &zio->io_lock);
+ while (zio->io_executor != NULL) {
+ /*
+ * Wake up periodically to prevent the kernel from complaining
+ * about a blocked task. However, check zio_delay_max to see
+ * if the I/O has exceeded the timeout and post an ereport.
+ */
+ cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock,
+ ddi_get_lbolt() + hz);
+
+ if (timeout && (ddi_get_lbolt() > timeout)) {
+ zio->io_delay = zio_delay_max;
+ zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
+ zio->io_spa, zio->io_vd, zio, 0, 0);
+ timeout = 0;
+ }
+ }
mutex_exit(&zio->io_lock);
error = zio->io_error;
@@ -2889,15 +2905,11 @@ zio_done(zio_t *zio)
vdev_stat_update(zio, zio->io_size);
/*
- * If this I/O is attached to a particular vdev is slow, exeeding
- * 30 seconds to complete, post an error described the I/O delay.
- * We ignore these errors if the device is currently unavailable.
+ * When an I/O completes but was slow post an ereport.
*/
- if (zio->io_delay >= zio_delay_max) {
- if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
- zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
- zio->io_vd, zio, 0, 0);
- }
+ if (zio->io_delay >= zio_delay_max)
+ zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
+ zio->io_vd, zio, 0, 0);
if (zio->io_error) {
/*