summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorAlexander Motin <[email protected]>2023-07-21 14:50:48 -0400
committerBrian Behlendorf <[email protected]>2023-07-21 16:35:12 -0700
commit8a6fde8213797df1acd24d4afb9ada0d005f6b1d (patch)
treef64ec91390090105d7662145d979823ae19ecfe8 /module
parentb6f618f8ffda435f5df9e185c999245842add93d (diff)
Add explicit prefetches to bpobj_iterate().
To simplify error handling bpobj_iterate_blkptrs() iterates through the list of block pointers backwards. Unfortunately speculative prefetcher is currently unable to detect such patterns, that makes each block read there synchronous and very slow on HDD pools. According to my tests, added explicit prefetch reduces time needed to asynchronously delete 8 snapshots of 4 million blocks each from 20 seconds to less than one, that should free sync thread for other useful work, such as async writes, scrub, etc. While there, plug one memory leak in case of bpobj_open() error and harmonize some variable names. Reviewed-by: Allan Jude <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #15071
Diffstat (limited to 'module')
-rw-r--r--module/zfs/bpobj.c49
1 files changed, 37 insertions, 12 deletions
diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c
index 211bab565..e772caead 100644
--- a/module/zfs/bpobj.c
+++ b/module/zfs/bpobj.c
@@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
dmu_buf_t *dbuf = NULL;
bpobj_t *bpo = bpi->bpi_bpo;
- for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) {
+ int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1;
+ uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) *
+ sizeof (blkptr_t);
+ uint64_t ps = start * sizeof (blkptr_t);
+ uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0,
+ ps);
+ if (pe > pb) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+ for (; i >= start; i--) {
uint64_t offset = i * sizeof (blkptr_t);
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
@@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
if (dbuf)
dmu_buf_rele(dbuf, FTAG);
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
- offset, FTAG, &dbuf, 0);
+ offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH);
if (err)
break;
+ pe = pb;
+ pb = MAX((dbuf->db_offset > dmu_prefetch_max) ?
+ dbuf->db_offset - dmu_prefetch_max : 0, ps);
+ if (pe > pb) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
+ pb, pe - pb, ZIO_PRIORITY_ASYNC_READ);
+ }
}
ASSERT3U(offset, >=, dbuf->db_offset);
@@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
uint64_t offset = i * sizeof (uint64_t);
- uint64_t obj_from_sublist;
+ uint64_t subobj;
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
- offset, sizeof (uint64_t), &obj_from_sublist,
- DMU_READ_PREFETCH);
+ offset, sizeof (uint64_t), &subobj,
+ DMU_READ_NO_PREFETCH);
if (err)
break;
- bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t),
- KM_SLEEP);
- err = bpobj_open(sublist, bpo->bpo_os,
- obj_from_sublist);
- if (err)
+ bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t),
+ KM_SLEEP);
+ err = bpobj_open(subbpo, bpo->bpo_os, subobj);
+ if (err) {
+ kmem_free(subbpo, sizeof (bpobj_t));
break;
+ }
+
+ if (subbpo->bpo_havesubobj &&
+ subbpo->bpo_phys->bpo_subobjs != 0) {
+ dmu_prefetch(subbpo->bpo_os,
+ subbpo->bpo_phys->bpo_subobjs, 0, 0, 0,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
- list_insert_head(&stack, bpi_alloc(sublist, bpi, i));
- mutex_enter(&sublist->bpo_lock);
+ list_insert_head(&stack, bpi_alloc(subbpo, bpi, i));
+ mutex_enter(&subbpo->bpo_lock);
bpi->bpi_unprocessed_subobjs--;
}
}