aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLOLi <[email protected]>2018-03-30 21:10:01 +0200
committerBrian Behlendorf <[email protected]>2018-03-30 12:10:01 -0700
commit77d8a0f1a4d0b2f59cee63088f7987cb38e66538 (patch)
treef06baa3bdb98f5a4740b66ae60fbbf687cb44ca7
parent2f291ebaed21090afd784f3b376dcb11465a37df (diff)
Fix hung z_zvol tasks during 'zfs receive'
During a receive operation zvol_create_minors_impl() can wait needlessly for the prefetch thread because both share the same tasks queue. This results in hung tasks: <3>INFO: task z_zvol:5541 blocked for more than 120 seconds. <3> Tainted: P O 3.16.0-4-amd64 <3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. The first z_zvol:5541 (zvol_task_cb) is waiting for the long running traverse_prefetch_thread:260 root@linux:~# cat /proc/spl/taskq taskq act nthr spwn maxt pri mina spl_system_taskq/0 1 2 0 64 100 1 active: [260]traverse_prefetch_thread [zfs](0xffff88003347ae40) wait: 5541 spl_delay_taskq/0 0 1 0 4 100 1 delay: spa_deadman [zfs](0xffff880039924000) z_zvol/1 1 1 0 1 120 1 active: [5541]zvol_task_cb [zfs](0xffff88001fde6400) pend: zvol_task_cb [zfs](0xffff88001fde6800) This change adds a dedicated, per-pool, prefetch taskq to prevent the traverse code from monopolizing the global (and limited) system_taskq by inappropriately scheduling long running tasks on it. Reviewed-by: Albert Lee <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: loli10K <[email protected]> Closes #6330 Closes #6890 Closes #7343
-rw-r--r--include/sys/spa_impl.h1
-rw-r--r--module/zfs/dmu_traverse.c3
-rw-r--r--module/zfs/spa.c13
3 files changed, 16 insertions, 1 deletions
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index af1d6aef0..77625d4b0 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -280,6 +280,7 @@ struct spa {
spa_keystore_t spa_keystore; /* loaded crypto keys */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
+ taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index 5407e4817..cffcd2d00 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -31,6 +31,7 @@
#include <sys/dsl_pool.h>
#include <sys/dnode.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/dmu_impl.h>
#include <sys/sa.h>
@@ -661,7 +662,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
}
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
- taskq_dispatch(system_taskq, traverse_prefetch_thread,
+ taskq_dispatch(spa->spa_prefetch_taskq, traverse_prefetch_thread,
td, TQ_NOQUEUE) == TASKQID_INVALID)
pd->pd_exited = B_TRUE;
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 1e9e7b013..4b6196cc3 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1187,6 +1187,14 @@ spa_activate(spa_t *spa, int mode)
1, INT_MAX, 0);
/*
+ * Taskq dedicated to prefetcher threads: this is used to prevent the
+ * pool traverse code from monopolizing the global (and limited)
+ * system_taskq by inappropriately scheduling long running tasks on it.
+ */
+ spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus,
+ defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
+
+ /*
* The taskq to upgrade datasets in this pool. Currently used by
* feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA.
*/
@@ -1213,6 +1221,11 @@ spa_deactivate(spa_t *spa)
spa->spa_zvol_taskq = NULL;
}
+ if (spa->spa_prefetch_taskq) {
+ taskq_destroy(spa->spa_prefetch_taskq);
+ spa->spa_prefetch_taskq = NULL;
+ }
+
if (spa->spa_upgrade_taskq) {
taskq_destroy(spa->spa_upgrade_taskq);
spa->spa_upgrade_taskq = NULL;