diff options
author | Mark Roper <[email protected]> | 2020-03-12 13:24:43 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2020-03-12 10:24:43 -0700 |
commit | 1e9231ada893d43bdbfa8ef3a11f1e423296981d (patch) | |
tree | e330194b9dd17d2afd8051afed9283a3970e1397 /module | |
parent | 61871518dd6703a59a676042a152aa54a30aea10 (diff) |
Prevent deadlock in arc_read in Linux memory reclaim callback
Using zfs with Lustre, an arc_read can trigger kernel memory allocation
that in turn leads to a memory reclaim callback and a deadlock within a
single zfs process. This change uses spl_fstrans_mark and
spl_trans_unmark to prevent the reclaim attempt and the deadlock
(https://zfsonlinux.topicbox.com/groups/zfs-devel/T4db2c705ec1804ba).
The stack trace observed is:
__schedule at ffffffff81610f2e
schedule at ffffffff81611558
schedule_preempt_disabled at ffffffff8161184a
__mutex_lock at ffffffff816131e8
arc_buf_destroy at ffffffffa0bf37d7 [zfs]
dbuf_destroy at ffffffffa0bfa6fe [zfs]
dbuf_evict_one at ffffffffa0bfaa96 [zfs]
dbuf_rele_and_unlock at ffffffffa0bfa561 [zfs]
dbuf_rele_and_unlock at ffffffffa0bfa32b [zfs]
osd_object_delete at ffffffffa0b64ecc [osd_zfs]
lu_object_free at ffffffffa06d6a74 [obdclass]
lu_site_purge_objects at ffffffffa06d7fc1 [obdclass]
lu_cache_shrink_scan at ffffffffa06d81b8 [obdclass]
shrink_slab at ffffffff811ca9d8
shrink_node at ffffffff811cfd94
do_try_to_free_pages at ffffffff811cfe63
try_to_free_pages at ffffffff811d01c4
__alloc_pages_slowpath at ffffffff811be7f2
__alloc_pages_nodemask at ffffffff811bf3ed
new_slab at ffffffff81226304
___slab_alloc at ffffffff812272ab
__slab_alloc at ffffffff8122740c
kmem_cache_alloc at ffffffff81227578
spl_kmem_cache_alloc at ffffffffa048a1fd [spl]
arc_buf_alloc_impl at ffffffffa0befba2 [zfs]
arc_read at ffffffffa0bf0924 [zfs]
dbuf_read at ffffffffa0bf9083 [zfs]
dmu_buf_hold_by_dnode at ffffffffa0c04869 [zfs]
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Mark Roper <[email protected]>
Closes #9987
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/arc.c | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index d49d85db0..6c9164f76 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5540,6 +5540,17 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ASSERT(!BP_IS_HOLE(bp)); ASSERT(!BP_IS_REDACTED(bp)); + /* + * Normally SPL_FSTRANS will already be set since kernel threads which + * expect to call the DMU interfaces will set it when created. System + * calls are similarly handled by setting/cleaning the bit in the + * registered callback (module/os/.../zfs/zpl_*). + * + * External consumers such as Lustre which call the exported DMU + * interfaces may not have set SPL_FSTRANS. To avoid a deadlock + * on the hash_lock always set and clear the bit. + */ + fstrans_cookie_t cookie = spl_fstrans_mark(); top: if (!embedded_bp) { /* @@ -6014,6 +6025,7 @@ out: /* embedded bps don't actually go to disk */ if (!embedded_bp) spa_read_history_add(spa, zb, *arc_flags); + spl_fstrans_unmark(cookie); return (rc); } |