summaryrefslogtreecommitdiffstats
path: root/module/zfs/zio.c
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2015-12-16 11:28:15 -0800
committerBrian Behlendorf <[email protected]>2015-12-18 13:27:12 -0800
commit6fe53787f38f10956b8d375133ed4559f8ce847b (patch)
treecdeeba055e332d5164459982896e7da020769770 /module/zfs/zio.c
parenta8ad3bf02cace90c45bc25df1bff1089d19e79f1 (diff)
Fix vdev_queue_aggregate() deadlock
This deadlock may manifest itself in slightly different ways but at the core it is caused by a memory allocation blocking on file- system reclaim in the zio pipeline. This is normally impossible because zio_execute() disables filesystem reclaim by setting PF_FSTRANS on the thread. However, kmem cache allocations may still indirectly block on file system reclaim while holding the critical vq->vq_lock as shown below. To resolve this issue zio_buf_alloc_flags() is introduced which allocation flags to be passed. This can then be used in vdev_queue_aggregate() with KM_NOSLEEP when allocating the aggregate IO buffer. Since aggregating the IO is purely a performance optimization we want this to either succeed or fail quickly. Trying too hard to allocate this memory under the vq->vq_lock can negatively impact performance and result in this deadlock. * z_wr_iss zio_vdev_io_start vdev_queue_io -> Takes vq->vq_lock vdev_queue_io_to_issue vdev_queue_aggregate zio_buf_alloc -> Waiting on spl_kmem_cache process * z_wr_int zio_vdev_io_done vdev_queue_io_done mutex_lock -> Waiting on vq->vq_lock held by z_wr_iss * txg_sync spa_sync dsl_pool_sync zio_wait -> Waiting on zio being handled by z_wr_int * spl_kmem_cache spl_cache_grow_work kv_alloc spl_vmalloc ... evict zpl_evict_inode zfs_inactive dmu_tx_wait txg_wait_open -> Waiting on txg_sync Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Chunwei Chen <[email protected]> Signed-off-by: Tim Chase <[email protected]> Closes #3808 Closes #3867
Diffstat (limited to 'module/zfs/zio.c')
-rw-r--r--module/zfs/zio.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index a16266a40..2bc88c52c 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -248,6 +248,20 @@ zio_data_buf_alloc(size_t size)
return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
}
+/*
+ * Use zio_buf_alloc_flags when specific allocation flags are needed. e.g.
+ * passing KM_NOSLEEP when it is acceptable for an allocation to fail.
+ */
+void *
+zio_buf_alloc_flags(size_t size, int flags)
+{
+ size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
+
+ VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+
+ return (kmem_cache_alloc(zio_buf_cache[c], flags));
+}
+
void
zio_buf_free(void *buf, size_t size)
{
@@ -3475,6 +3489,7 @@ zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_phys_t *zb1,
EXPORT_SYMBOL(zio_type_name);
EXPORT_SYMBOL(zio_buf_alloc);
EXPORT_SYMBOL(zio_data_buf_alloc);
+EXPORT_SYMBOL(zio_buf_alloc_flags);
EXPORT_SYMBOL(zio_buf_free);
EXPORT_SYMBOL(zio_data_buf_free);