From 3eb3a13628d3d58b0294e4e0c0e3b47483a4053e Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Thu, 15 Mar 2018 10:56:55 -0700 Subject: Report pool suspended due to MMP When the pool is suspended, record whether it was due to an I/O error or due to MMP writes failing to succeed within the required time. Change spa_suspended from uint8_t to zio_suspend_reason_t to store the reason. When userspace queries pool status via spa_tryimport(), report the reason the pool was suspended in a new key, ZPOOL_CONFIG_SUSPENDED_REASON. In libzfs, when interpreting the returned config nvlist, report suspension due to MMP with a new pool status enum value, ZPOOL_STATUS_IO_FAILURE_MMP. In status_callback(), which generates and emits the message when 'zpool status' is executed, add a case to print an appropriate message for the new pool status enum value. Reviewed-by: George Melikov Reviewed-by: Giuseppe Di Natale Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Olaf Faaland Closes #7296 --- module/zfs/mmp.c | 2 +- module/zfs/spa.c | 8 ++++++-- module/zfs/spa_misc.c | 2 +- module/zfs/zio.c | 8 ++++---- 4 files changed, 12 insertions(+), 8 deletions(-) (limited to 'module') diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index a08e0864e..fc8346dc3 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -519,7 +519,7 @@ mmp_thread(spa_t *spa) "succeeded in over %llus; suspending pool", spa_name(spa), NSEC2SEC(start - mmp->mmp_last_write)); - zio_suspend(spa, NULL); + zio_suspend(spa, NULL, ZIO_SUSPEND_MMP); } if (multihost && !suspended) diff --git a/module/zfs/spa.c b/module/zfs/spa.c index a7a2f6281..561f4d04b 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3778,10 +3778,14 @@ spa_get_stats(const char *name, nvlist_t **config, ZPOOL_CONFIG_ERRCOUNT, spa_get_errlog_size(spa)) == 0); - if (spa_suspended(spa)) + if (spa_suspended(spa)) { VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED_REASON, + spa->spa_suspended) == 0); + } spa_add_spares(spa, *config); spa_add_l2cache(spa, *config); @@ -6969,7 +6973,7 @@ spa_sync(spa_t *spa, uint64_t txg) if (error == 0) break; - zio_suspend(spa, NULL); + zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR); zio_resume_wait(spa); } dmu_tx_commit(tx); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3787e010f..e92c39482 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1691,7 +1691,7 @@ spa_get_failmode(spa_t *spa) boolean_t spa_suspended(spa_t *spa) { - return (spa->spa_suspended); + return (spa->spa_suspended != ZIO_SUSPEND_NONE); } uint64_t diff --git a/module/zfs/zio.c b/module/zfs/zio.c index cd0a473e0..9a465e1be 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1864,7 +1864,7 @@ zio_reexecute(zio_t *pio) } void -zio_suspend(spa_t *spa, zio_t *zio) +zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) { if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) fm_panic("Pool '%s' has encountered an uncorrectable I/O " @@ -1883,7 +1883,7 @@ zio_suspend(spa_t *spa, zio_t *zio) ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); - spa->spa_suspended = B_TRUE; + spa->spa_suspended = reason; if (zio != NULL) { ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); @@ -1906,7 +1906,7 @@ zio_resume(spa_t *spa) * Reexecute all previously suspended i/o. */ mutex_enter(&spa->spa_suspend_lock); - spa->spa_suspended = B_FALSE; + spa->spa_suspended = ZIO_SUSPEND_NONE; cv_broadcast(&spa->spa_suspend_cv); pio = spa->spa_suspend_zio_root; spa->spa_suspend_zio_root = NULL; @@ -3975,7 +3975,7 @@ zio_done(zio_t *zio) * We'd fail again if we reexecuted now, so suspend * until conditions improve (e.g. device comes online). */ - zio_suspend(zio->io_spa, zio); + zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR); } else { /* * Reexecution is potentially a huge amount of work. -- cgit v1.2.3