diff options
author | Alexander Motin <[email protected]> | 2023-10-25 18:22:25 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2023-10-25 15:22:25 -0700 |
commit | 3afdc97d91c24192db51b67762126a8d99d433db (patch) | |
tree | 9fece2b39d8953618a0a6808b3d71bfcbe584999 /module | |
parent | 05c4710e8958832afc2868102c9535a4f18115be (diff) |
ZIO: Remove READY pipeline stage from root ZIOs
zio_root() has no arguments for ready callback or parent ZIO. Except
one recent case in ZIL code if root ZIOs ever have a parent it is
also a root ZIO. It means we do not need READY pipeline stage for
them, which takes some time to process, but even more time to wait
for the children and be woken by them, and both for no good reason.
The most visible effect of this change is that it avoids one taskq
wakeup per ZIL block written, previously used to run zio_ready()
for lwb_root_zio and skipped now.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Alexander Motin <[email protected]>
Sponsored by: iXsystems, Inc.
Closes #15398
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/zil.c | 4 | ||||
-rw-r--r-- | module/zfs/zio.c | 51 |
2 files changed, 44 insertions, 11 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 218031a8a..ce2cb8b14 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -2155,8 +2155,8 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx) ZIL_STAT_INCR(zilog, zil_itx_indirect_bytes, lrw->lr_length); if (lwb->lwb_child_zio == NULL) { - lwb->lwb_child_zio = zio_root( - zilog->zl_spa, NULL, NULL, + lwb->lwb_child_zio = zio_null(NULL, + zilog->zl_spa, NULL, NULL, NULL, ZIO_FLAG_CANFAIL); } } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 3b3b40fa7..3eb472a9f 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -634,6 +634,11 @@ zio_add_child(zio_t *pio, zio_t *cio) */ ASSERT3S(cio->io_child_type, <=, pio->io_child_type); + /* Parent should not have READY stage if child doesn't have it. */ + IMPLY((cio->io_pipeline & ZIO_STAGE_READY) == 0 && + (cio->io_child_type != ZIO_CHILD_VDEV), + (pio->io_pipeline & ZIO_STAGE_READY) == 0); + zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); zl->zl_parent = pio; zl->zl_child = cio; @@ -665,6 +670,11 @@ zio_add_child_first(zio_t *pio, zio_t *cio) */ ASSERT3S(cio->io_child_type, <=, pio->io_child_type); + /* Parent should not have READY stage if child doesn't have it. */ + IMPLY((cio->io_pipeline & ZIO_STAGE_READY) == 0 && + (cio->io_child_type != ZIO_CHILD_VDEV), + (pio->io_pipeline & ZIO_STAGE_READY) == 0); + zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); zl->zl_parent = pio; zl->zl_child = cio; @@ -901,7 +911,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_orig_pipeline = zio->io_pipeline = pipeline; zio->io_pipeline_trace = ZIO_STAGE_OPEN; - zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY); + zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY) || + (pipeline & ZIO_STAGE_READY) == 0; zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE); if (zb != NULL) @@ -932,6 +943,10 @@ zio_destroy(zio_t *zio) kmem_cache_free(zio_cache, zio); } +/* + * ZIO intended to be between others. Provides synchronization at READY + * and DONE pipeline stages and calls the respective callbacks. + */ zio_t * zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, void *private, zio_flag_t flags) @@ -945,10 +960,22 @@ zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, return (zio); } +/* + * ZIO intended to be a root of a tree. Unlike null ZIO does not have a + * READY pipeline stage (is ready on creation), so it should not be used + * as child of any ZIO that may need waiting for grandchildren READY stage + * (any other ZIO type). + */ zio_t * zio_root(spa_t *spa, zio_done_func_t *done, void *private, zio_flag_t flags) { - return (zio_null(NULL, spa, NULL, done, private, flags)); + zio_t *zio; + + zio = zio_create(NULL, spa, 0, NULL, NULL, 0, 0, done, private, + ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, NULL, 0, NULL, + ZIO_STAGE_OPEN, ZIO_ROOT_PIPELINE); + + return (zio); } static int @@ -2396,13 +2423,14 @@ static void zio_reexecute(void *arg) { zio_t *pio = arg; - zio_t *cio, *cio_next; + zio_t *cio, *cio_next, *gio; ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN); ASSERT(pio->io_gang_leader == NULL); ASSERT(pio->io_gang_tree == NULL); + mutex_enter(&pio->io_lock); pio->io_flags = pio->io_orig_flags; pio->io_stage = pio->io_orig_stage; pio->io_pipeline = pio->io_orig_pipeline; @@ -2410,8 +2438,16 @@ zio_reexecute(void *arg) pio->io_flags |= ZIO_FLAG_REEXECUTED; pio->io_pipeline_trace = 0; pio->io_error = 0; - for (int w = 0; w < ZIO_WAIT_TYPES; w++) - pio->io_state[w] = 0; + pio->io_state[ZIO_WAIT_READY] = (pio->io_stage >= ZIO_STAGE_READY) || + (pio->io_pipeline & ZIO_STAGE_READY) == 0; + pio->io_state[ZIO_WAIT_DONE] = (pio->io_stage >= ZIO_STAGE_DONE); + zio_link_t *zl = NULL; + while ((gio = zio_walk_parents(pio, &zl)) != NULL) { + for (int w = 0; w < ZIO_WAIT_TYPES; w++) { + gio->io_children[pio->io_child_type][w] += + !pio->io_state[w]; + } + } for (int c = 0; c < ZIO_CHILD_TYPES; c++) pio->io_child_error[c] = 0; @@ -2425,12 +2461,9 @@ zio_reexecute(void *arg) * the remainder of pio's io_child_list, from 'cio_next' onward, * cannot be affected by any side effects of reexecuting 'cio'. */ - zio_link_t *zl = NULL; - mutex_enter(&pio->io_lock); + zl = NULL; for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) { cio_next = zio_walk_children(pio, &zl); - for (int w = 0; w < ZIO_WAIT_TYPES; w++) - pio->io_children[cio->io_child_type][w]++; mutex_exit(&pio->io_lock); zio_reexecute(cio); mutex_enter(&pio->io_lock); |