aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Motin <[email protected]>2023-07-24 16:41:11 -0400
committerGitHub <[email protected]>2023-07-24 13:41:11 -0700
commit2cb992a99ccadb78d97049b40bd442eb4fdc549d (patch)
tree715a68e9f93a731e2e0fc3a424662dc467c34742
parentfb344f5aeb7088238f9bd3c2a8507132b702cfed (diff)
ZIL: Fix config lock deadlock.
When we have some LWBs closed and their ZIOs ready to be issued, we can not afford sleeping on config lock if somebody else try to lock it as writer, or it will cause a deadlock. To solve it, move spa_config_enter() from zil_lwb_write_issue() to zil_lwb_write_close() under zl_issuer_lock to enforce lock ordering with other threads. Now if we can't immediately lock config, issue all previously closed LWBs so that they could drop their config locks after completion, and only then allow sleeping on our lock. Reviewed-by: Mark Maybee <[email protected]> Reviewed-by: Prakash Surya <[email protected]> Reviewed-by: George Wilson <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #15078 Closes #15080
-rw-r--r--module/zfs/zil.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 00d66a248..af7137faa 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache;
static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
+static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb);
static itx_t *zil_itx_clone(itx_t *oitx);
static int
@@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
* Has to be called under zl_issuer_lock to chain more lwbs.
*/
static lwb_t *
-zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
+zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
{
lwb_t *nlwb = NULL;
zil_chain_t *zilc;
@@ -1871,6 +1872,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
dmu_tx_commit(tx);
/*
+ * We need to acquire the config lock for the lwb to issue it later.
+ * However, if we already have a queue of closed parent lwbs already
+ * holding the config lock (but not yet issued), we can't block here
+ * waiting on the lock or we will deadlock. In that case we must
+ * first issue to parent IOs before waiting on the lock.
+ */
+ if (ilwbs && !list_is_empty(ilwbs)) {
+ if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
+ lwb_t *tlwb;
+ while ((tlwb = list_remove_head(ilwbs)) != NULL)
+ zil_lwb_write_issue(zilog, tlwb);
+ spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
+ }
+ } else {
+ spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
+ }
+
+ if (ilwbs)
+ list_insert_tail(ilwbs, lwb);
+
+ /*
* If there was an allocation failure then nlwb will be null which
* forces a txg_wait_synced().
*/
@@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
BP_GET_LSIZE(&lwb->lwb_blk));
}
- spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
+ ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
zil_lwb_add_block(lwb, &lwb->lwb_blk);
lwb->lwb_issued_timestamp = gethrtime();
zio_nowait(lwb->lwb_root_zio);
@@ -2037,8 +2059,7 @@ cont:
lwb_sp < zil_max_waste_space(zilog) &&
(dlen % max_log_data == 0 ||
lwb_sp < reclen + dlen % max_log_data))) {
- list_insert_tail(ilwbs, lwb);
- lwb = zil_lwb_write_close(zilog, lwb);
+ lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
if (lwb == NULL)
return (NULL);
zil_lwb_write_open(zilog, lwb);
@@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
zfs_commit_timeout_pct / 100;
if (sleep < zil_min_commit_timeout ||
lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
- list_insert_tail(ilwbs, lwb);
- lwb = zil_lwb_write_close(zilog, lwb);
+ lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
zilog->zl_cur_used = 0;
if (lwb == NULL) {
while ((lwb = list_remove_head(ilwbs))
@@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
* since we've reached the commit waiter's timeout and it still
* hasn't been issued.
*/
- lwb_t *nlwb = zil_lwb_write_close(zilog, lwb);
+ lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);