summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--man/man5/zfs-module-parameters.534
-rw-r--r--module/zfs/spa_misc.c16
2 files changed, 41 insertions, 9 deletions
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index cca583de9..8b9a4e8cc 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -765,9 +765,28 @@ Default value: \fB0\fR.
\fBzfs_deadman_enabled\fR (int)
.ad
.RS 12n
-Enable deadman timer. See description below.
+When a pool sync operation takes longer than \fBzfs_deadman_synctime_ms\fR
+milliseconds, a "slow spa_sync" message is logged to the debug log
+(see \fBzfs_dbgmsg_enable\fR). If \fBzfs_deadman_enabled\fR is set,
+all pending IO operations are also checked and if any haven't completed
+within \fBzfs_deadman_synctime_ms\fR milliseconds, a "SLOW IO" message
+is logged to the debug log and a "delay" system event with the details of
+the hung IO is posted.
.sp
-Use \fB1\fR for yes (default) and \fB0\fR to disable.
+Use \fB1\fR (default) to enable the slow IO check and \fB0\fR to disable.
+.RE
+
+.sp
+.ne 2
+.na
+\fBzfs_deadman_checktime_ms\fR (int)
+.ad
+.RS 12n
+Once a pool sync operation has taken longer than
+\fBzfs_deadman_synctime_ms\fR milliseconds, continue to check for slow
+operations every \fBzfs_deadman_checktime_ms\fR milliseconds.
+.sp
+Default value: \fB5,000\fR.
.RE
.sp
@@ -776,12 +795,11 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable.
\fBzfs_deadman_synctime_ms\fR (ulong)
.ad
.RS 12n
-Expiration time in milliseconds. This value has two meanings. First it is
-used to determine when the spa_deadman() logic should fire. By default the
-spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
-Secondly, the value determines if an I/O is considered "hung". Any I/O that
-has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
-in a zevent being logged.
+Interval in milliseconds after which the deadman is triggered and also
+the interval after which an IO operation is considered to be "hung"
+if \fBzfs_deadman_enabled\fR is set.
+
+See \fBzfs_deadman_enabled\fR.
.sp
Default value: \fB1,000,000\fR.
.RE
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index fa9bdd7b8..c39c137e6 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -298,6 +298,12 @@ int zfs_free_leak_on_eio = B_FALSE;
unsigned long zfs_deadman_synctime_ms = 1000000ULL;
/*
+ * Check time in milliseconds. This defines the frequency at which we check
+ * for hung I/O.
+ */
+unsigned long zfs_deadman_checktime_ms = 5000ULL;
+
+/*
* By default the deadman is enabled.
*/
int zfs_deadman_enabled = 1;
@@ -524,6 +530,10 @@ spa_deadman(void *arg)
{
spa_t *spa = arg;
+ /* Disable the deadman if the pool is suspended. */
+ if (spa_suspended(spa))
+ return;
+
zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
(gethrtime() - spa->spa_sync_starttime) / NANOSEC,
++spa->spa_deadman_calls);
@@ -532,7 +542,7 @@ spa_deadman(void *arg)
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
- NSEC_TO_TICK(spa->spa_deadman_synctime));
+ MSEC_TO_TICK(zfs_deadman_checktime_ms));
}
/*
@@ -2114,6 +2124,10 @@ MODULE_PARM_DESC(zfs_free_leak_on_eio,
module_param(zfs_deadman_synctime_ms, ulong, 0644);
MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");
+module_param(zfs_deadman_checktime_ms, ulong, 0644);
+MODULE_PARM_DESC(zfs_deadman_checktime_ms,
+ "Dead I/O check interval in milliseconds");
+
module_param(zfs_deadman_enabled, int, 0644);
MODULE_PARM_DESC(zfs_deadman_enabled, "Enable deadman timer");