diff options
author | Alexander Motin <[email protected]> | 2023-07-24 16:41:11 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2023-07-24 13:41:11 -0700 |
commit | 2cb992a99ccadb78d97049b40bd442eb4fdc549d (patch) | |
tree | 715a68e9f93a731e2e0fc3a424662dc467c34742 | |
parent | fb344f5aeb7088238f9bd3c2a8507132b702cfed (diff) |
ZIL: Fix config lock deadlock.
When we have some LWBs closed and their ZIOs ready to be issued, we
can not afford sleeping on config lock if somebody else try to lock
it as writer, or it will cause a deadlock.
To solve it, move spa_config_enter() from zil_lwb_write_issue() to
zil_lwb_write_close() under zl_issuer_lock to enforce lock ordering
with other threads. Now if we can't immediately lock config, issue
all previously closed LWBs so that they could drop their config
locks after completion, and only then allow sleeping on our lock.
Reviewed-by: Mark Maybee <[email protected]>
Reviewed-by: Prakash Surya <[email protected]>
Reviewed-by: George Wilson <[email protected]>
Signed-off-by: Alexander Motin <[email protected]>
Sponsored by: iXsystems, Inc.
Closes #15078
Closes #15080
-rw-r--r-- | module/zfs/zil.c | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 00d66a248..af7137faa 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache; static kmem_cache_t *zil_zcw_cache; static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx); +static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb); static itx_t *zil_itx_clone(itx_t *oitx); static int @@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE; * Has to be called under zl_issuer_lock to chain more lwbs. */ static lwb_t * -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs) { lwb_t *nlwb = NULL; zil_chain_t *zilc; @@ -1871,6 +1872,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) dmu_tx_commit(tx); /* + * We need to acquire the config lock for the lwb to issue it later. + * However, if we already have a queue of closed parent lwbs already + * holding the config lock (but not yet issued), we can't block here + * waiting on the lock or we will deadlock. In that case we must + * first issue to parent IOs before waiting on the lock. + */ + if (ilwbs && !list_is_empty(ilwbs)) { + if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) { + lwb_t *tlwb; + while ((tlwb = list_remove_head(ilwbs)) != NULL) + zil_lwb_write_issue(zilog, tlwb); + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); + } + } else { + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); + } + + if (ilwbs) + list_insert_tail(ilwbs, lwb); + + /* * If there was an allocation failure then nlwb will be null which * forces a txg_wait_synced(). */ @@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc, BP_GET_LSIZE(&lwb->lwb_blk)); } - spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER); + ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER)); zil_lwb_add_block(lwb, &lwb->lwb_blk); lwb->lwb_issued_timestamp = gethrtime(); zio_nowait(lwb->lwb_root_zio); @@ -2037,8 +2059,7 @@ cont: lwb_sp < zil_max_waste_space(zilog) && (dlen % max_log_data == 0 || lwb_sp < reclen + dlen % max_log_data))) { - list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb); + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); if (lwb == NULL) return (NULL); zil_lwb_write_open(zilog, lwb); @@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) zfs_commit_timeout_pct / 100; if (sleep < zil_min_commit_timeout || lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) { - list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb); + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); zilog->zl_cur_used = 0; if (lwb == NULL) { while ((lwb = list_remove_head(ilwbs)) @@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw) * since we've reached the commit waiter's timeout and it still * hasn't been issued. */ - lwb_t *nlwb = zil_lwb_write_close(zilog, lwb); + lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED); |