diff options
author | LOLi <[email protected]> | 2018-03-30 21:10:01 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-03-30 12:10:01 -0700 |
commit | 77d8a0f1a4d0b2f59cee63088f7987cb38e66538 (patch) | |
tree | f06baa3bdb98f5a4740b66ae60fbbf687cb44ca7 | |
parent | 2f291ebaed21090afd784f3b376dcb11465a37df (diff) |
Fix hung z_zvol tasks during 'zfs receive'
During a receive operation zvol_create_minors_impl() can wait
needlessly for the prefetch thread because both share the same tasks
queue. This results in hung tasks:
<3>INFO: task z_zvol:5541 blocked for more than 120 seconds.
<3> Tainted: P O 3.16.0-4-amd64
<3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
The first z_zvol:5541 (zvol_task_cb) is waiting for the long running
traverse_prefetch_thread:260
root@linux:~# cat /proc/spl/taskq
taskq act nthr spwn maxt pri mina
spl_system_taskq/0 1 2 0 64 100 1
active: [260]traverse_prefetch_thread [zfs](0xffff88003347ae40)
wait: 5541
spl_delay_taskq/0 0 1 0 4 100 1
delay: spa_deadman [zfs](0xffff880039924000)
z_zvol/1 1 1 0 1 120 1
active: [5541]zvol_task_cb [zfs](0xffff88001fde6400)
pend: zvol_task_cb [zfs](0xffff88001fde6800)
This change adds a dedicated, per-pool, prefetch taskq to prevent the
traverse code from monopolizing the global (and limited) system_taskq by
inappropriately scheduling long running tasks on it.
Reviewed-by: Albert Lee <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: loli10K <[email protected]>
Closes #6330
Closes #6890
Closes #7343
-rw-r--r-- | include/sys/spa_impl.h | 1 | ||||
-rw-r--r-- | module/zfs/dmu_traverse.c | 3 | ||||
-rw-r--r-- | module/zfs/spa.c | 13 |
3 files changed, 16 insertions, 1 deletions
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index af1d6aef0..77625d4b0 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -280,6 +280,7 @@ struct spa { spa_keystore_t spa_keystore; /* loaded crypto keys */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ taskq_t *spa_zvol_taskq; /* Taskq for minor management */ + taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */ uint64_t spa_multihost; /* multihost aware (mmp) */ mmp_thread_t spa_mmp; /* multihost mmp thread */ diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index 5407e4817..cffcd2d00 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -31,6 +31,7 @@ #include <sys/dsl_pool.h> #include <sys/dnode.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/zio.h> #include <sys/dmu_impl.h> #include <sys/sa.h> @@ -661,7 +662,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, } if (!(flags & TRAVERSE_PREFETCH_DATA) || - taskq_dispatch(system_taskq, traverse_prefetch_thread, + taskq_dispatch(spa->spa_prefetch_taskq, traverse_prefetch_thread, td, TQ_NOQUEUE) == TASKQID_INVALID) pd->pd_exited = B_TRUE; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 1e9e7b013..4b6196cc3 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1187,6 +1187,14 @@ spa_activate(spa_t *spa, int mode) 1, INT_MAX, 0); /* + * Taskq dedicated to prefetcher threads: this is used to prevent the + * pool traverse code from monopolizing the global (and limited) + * system_taskq by inappropriately scheduling long running tasks on it. + */ + spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); + + /* * The taskq to upgrade datasets in this pool. Currently used by * feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA. */ @@ -1213,6 +1221,11 @@ spa_deactivate(spa_t *spa) spa->spa_zvol_taskq = NULL; } + if (spa->spa_prefetch_taskq) { + taskq_destroy(spa->spa_prefetch_taskq); + spa->spa_prefetch_taskq = NULL; + } + if (spa->spa_upgrade_taskq) { taskq_destroy(spa->spa_upgrade_taskq); spa->spa_upgrade_taskq = NULL; |