aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOlaf Faaland <[email protected]>2018-03-15 10:56:55 -0700
committerTony Hutter <[email protected]>2018-05-07 17:19:56 -0700
commit3eb3a13628d3d58b0294e4e0c0e3b47483a4053e (patch)
tree1038e8b6c943ca75528b81aa96d8d0c9b1e2b30f
parentc234706270a2e02736809c2cf9004aa0b7798276 (diff)
Report pool suspended due to MMP
When the pool is suspended, record whether it was due to an I/O error or due to MMP writes failing to succeed within the required time. Change spa_suspended from uint8_t to zio_suspend_reason_t to store the reason. When userspace queries pool status via spa_tryimport(), report the reason the pool was suspended in a new key, ZPOOL_CONFIG_SUSPENDED_REASON. In libzfs, when interpreting the returned config nvlist, report suspension due to MMP with a new pool status enum value, ZPOOL_STATUS_IO_FAILURE_MMP. In status_callback(), which generates and emits the message when 'zpool status' is executed, add a case to print an appropriate message for the new pool status enum value. Reviewed-by: George Melikov <[email protected]> Reviewed-by: Giuseppe Di Natale <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Signed-off-by: Olaf Faaland <[email protected]> Closes #7296
-rw-r--r--cmd/zpool/zpool_main.c9
-rw-r--r--include/libzfs.h1
-rw-r--r--include/sys/fs/zfs.h1
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/zio.h8
-rw-r--r--lib/libzfs/libzfs_status.c8
-rw-r--r--module/zfs/mmp.c2
-rw-r--r--module/zfs/spa.c8
-rw-r--r--module/zfs/spa_misc.c2
-rw-r--r--module/zfs/zio.c8
10 files changed, 38 insertions, 11 deletions
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 1cfff3ade..b07569389 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -6395,6 +6395,15 @@ status_callback(zpool_handle_t *zhp, void *data)
"to be recovered.\n"));
break;
+ case ZPOOL_STATUS_IO_FAILURE_MMP:
+ (void) printf(gettext("status: The pool is suspended because "
+ "multihost writes failed or were delayed;\n\tanother "
+ "system could import the pool undetected.\n"));
+ (void) printf(gettext("action: Make sure the pool's devices "
+ "are connected, then reboot your system and\n\timport the "
+ "pool.\n"));
+ break;
+
case ZPOOL_STATUS_IO_FAILURE_WAIT:
case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
(void) printf(gettext("status: One or more devices are "
diff --git a/include/libzfs.h b/include/libzfs.h
index 147589bbf..945bd5b86 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -331,6 +331,7 @@ typedef enum {
ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
+ ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
ZPOOL_STATUS_ERRATA, /* informational errata available */
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 13b25a695..b7912313b 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -638,6 +638,7 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg"
#define ZPOOL_CONFIG_COMMENT "comment"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
+#define ZPOOL_CONFIG_SUSPENDED_REASON "suspended_reason" /* not stored */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 06de24421..73ad1c60c 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -233,7 +233,7 @@ struct spa {
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
kcondvar_t spa_suspend_cv; /* notification of resume */
- uint8_t spa_suspended; /* pool is suspended */
+ zio_suspend_reason_t spa_suspended; /* pool is suspended */
uint8_t spa_claiming; /* pool is doing zil_claim() */
boolean_t spa_debug; /* debug enabled? */
boolean_t spa_is_root; /* pool is root */
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 0d741f8e2..4b0eecc2e 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -144,6 +144,12 @@ enum zio_checksum {
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
+typedef enum zio_suspend_reason {
+ ZIO_SUSPEND_NONE = 0,
+ ZIO_SUSPEND_IOERR,
+ ZIO_SUSPEND_MMP,
+} zio_suspend_reason_t;
+
enum zio_flag {
/*
* Flags inherited by gang, ddt, and vdev children,
@@ -577,7 +583,7 @@ extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
extern enum zio_compress zio_compress_select(spa_t *spa,
enum zio_compress child, enum zio_compress parent);
-extern void zio_suspend(spa_t *spa, zio_t *zio);
+extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
extern int zio_resume(spa_t *spa);
extern void zio_resume_wait(spa_t *spa);
diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
index 05a9afce8..6cdcd3827 100644
--- a/lib/libzfs/libzfs_status.c
+++ b/lib/libzfs/libzfs_status.c
@@ -274,10 +274,16 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
return (ZPOOL_STATUS_BAD_GUID_SUM);
/*
- * Check whether the pool has suspended due to failed I/O.
+ * Check whether the pool has suspended.
*/
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
&suspended) == 0) {
+ uint64_t reason;
+
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON,
+ &reason) == 0 && reason == ZIO_SUSPEND_MMP)
+ return (ZPOOL_STATUS_IO_FAILURE_MMP);
+
if (suspended == ZIO_FAILURE_MODE_CONTINUE)
return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
return (ZPOOL_STATUS_IO_FAILURE_WAIT);
diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
index a08e0864e..fc8346dc3 100644
--- a/module/zfs/mmp.c
+++ b/module/zfs/mmp.c
@@ -519,7 +519,7 @@ mmp_thread(spa_t *spa)
"succeeded in over %llus; suspending pool",
spa_name(spa),
NSEC2SEC(start - mmp->mmp_last_write));
- zio_suspend(spa, NULL);
+ zio_suspend(spa, NULL, ZIO_SUSPEND_MMP);
}
if (multihost && !suspended)
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index a7a2f6281..561f4d04b 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -3778,10 +3778,14 @@ spa_get_stats(const char *name, nvlist_t **config,
ZPOOL_CONFIG_ERRCOUNT,
spa_get_errlog_size(spa)) == 0);
- if (spa_suspended(spa))
+ if (spa_suspended(spa)) {
VERIFY(nvlist_add_uint64(*config,
ZPOOL_CONFIG_SUSPENDED,
spa->spa_failmode) == 0);
+ VERIFY(nvlist_add_uint64(*config,
+ ZPOOL_CONFIG_SUSPENDED_REASON,
+ spa->spa_suspended) == 0);
+ }
spa_add_spares(spa, *config);
spa_add_l2cache(spa, *config);
@@ -6969,7 +6973,7 @@ spa_sync(spa_t *spa, uint64_t txg)
if (error == 0)
break;
- zio_suspend(spa, NULL);
+ zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR);
zio_resume_wait(spa);
}
dmu_tx_commit(tx);
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 3787e010f..e92c39482 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1691,7 +1691,7 @@ spa_get_failmode(spa_t *spa)
boolean_t
spa_suspended(spa_t *spa)
{
- return (spa->spa_suspended);
+ return (spa->spa_suspended != ZIO_SUSPEND_NONE);
}
uint64_t
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index cd0a473e0..9a465e1be 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1864,7 +1864,7 @@ zio_reexecute(zio_t *pio)
}
void
-zio_suspend(spa_t *spa, zio_t *zio)
+zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
{
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
@@ -1883,7 +1883,7 @@ zio_suspend(spa_t *spa, zio_t *zio)
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_GODFATHER);
- spa->spa_suspended = B_TRUE;
+ spa->spa_suspended = reason;
if (zio != NULL) {
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
@@ -1906,7 +1906,7 @@ zio_resume(spa_t *spa)
* Reexecute all previously suspended i/o.
*/
mutex_enter(&spa->spa_suspend_lock);
- spa->spa_suspended = B_FALSE;
+ spa->spa_suspended = ZIO_SUSPEND_NONE;
cv_broadcast(&spa->spa_suspend_cv);
pio = spa->spa_suspend_zio_root;
spa->spa_suspend_zio_root = NULL;
@@ -3975,7 +3975,7 @@ zio_done(zio_t *zio)
* We'd fail again if we reexecuted now, so suspend
* until conditions improve (e.g. device comes online).
*/
- zio_suspend(zio->io_spa, zio);
+ zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
} else {
/*
* Reexecution is potentially a huge amount of work.