aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorRichard Yao <[email protected]>2023-03-07 19:12:28 -0500
committerGitHub <[email protected]>2023-03-07 16:12:28 -0800
commit7d638df09be7482935bcf6ec8e4ea2ac8a8be1a8 (patch)
treeaac1fce6f5637b831f21bf9636f56b2118c167ee /module/zfs
parent1f196e31079295320359bb04f3ee16a54563a330 (diff)
Do not hold spa_config in ZIL while blocked on IO
Otherwise, we can get a deadlock that looks like this: 1. fsync() grabs spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER) as part of zil_lwb_write_issue() . It then blocks on the txg_sync when a flush fails from a drive power cycling. 2. The txg_sync then blocks on the pool suspending due to the loss of too many disks. 3. zpool clear then blocks on spa_config_enter(spa, SCL_STATE | SCL_L2ARC | SCL_ZIO, spa, RW_WRITER) because it is a writer. The disks cannot be brought online due to fsync() holding that lock and the user gets upset since fsync() is uninterruptibly blocked inside the kernel. We need to grab the lock for vdev_lookup_top(), but we do not need to hold it while there is outstanding IO. This fixes a regression introduced by 1ce23dcaff6c3d777cb0d9a4a2cf02b43f777d78. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Richard Yao <[email protected]> Sponsored-By: Wasabi Technology, Inc. Closes #14519
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/zil.c8
1 files changed, 2 insertions, 6 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index ca578b311..fcf4e7357 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -1287,8 +1287,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
itx_t *itx;
uint64_t txg;
- spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
-
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
mutex_enter(&zilog->zl_lock);
@@ -1427,8 +1425,6 @@ zil_lwb_write_done(zio_t *zio)
zil_vdev_node_t *zv;
lwb_t *nlwb;
- ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0);
-
ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
@@ -1490,6 +1486,7 @@ zil_lwb_write_done(zio_t *zio)
return;
}
+ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
if (vd != NULL) {
@@ -1505,6 +1502,7 @@ zil_lwb_write_done(zio_t *zio)
}
kmem_free(zv, sizeof (*zv));
}
+ spa_config_exit(spa, SCL_STATE, FTAG);
}
static void
@@ -1783,8 +1781,6 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
*/
memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);
- spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
-
zil_lwb_add_block(lwb, &lwb->lwb_blk);
lwb->lwb_issued_timestamp = gethrtime();
lwb->lwb_state = LWB_STATE_ISSUED;