diff options
author | Alexander Motin <[email protected]> | 2023-07-21 14:50:48 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2023-07-21 16:35:12 -0700 |
commit | 8a6fde8213797df1acd24d4afb9ada0d005f6b1d (patch) | |
tree | f64ec91390090105d7662145d979823ae19ecfe8 /module | |
parent | b6f618f8ffda435f5df9e185c999245842add93d (diff) |
Add explicit prefetches to bpobj_iterate().
To simplify error handling bpobj_iterate_blkptrs() iterates through
the list of block pointers backwards. Unfortunately speculative
prefetcher is currently unable to detect such patterns, that makes
each block read there synchronous and very slow on HDD pools.
According to my tests, added explicit prefetch reduces time needed
to asynchronously delete 8 snapshots of 4 million blocks each from
20 seconds to less than one, that should free sync thread for other
useful work, such as async writes, scrub, etc.
While there, plug one memory leak in case of bpobj_open() error and
harmonize some variable names.
Reviewed-by: Allan Jude <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Alexander Motin <[email protected]>
Sponsored by: iXsystems, Inc.
Closes #15071
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/bpobj.c | 49 |
1 files changed, 37 insertions, 12 deletions
diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c index 211bab565..e772caead 100644 --- a/module/zfs/bpobj.c +++ b/module/zfs/bpobj.c @@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg, dmu_buf_t *dbuf = NULL; bpobj_t *bpo = bpi->bpi_bpo; - for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) { + int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; + uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) * + sizeof (blkptr_t); + uint64_t ps = start * sizeof (blkptr_t); + uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0, + ps); + if (pe > pb) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb, + ZIO_PRIORITY_ASYNC_READ); + } + for (; i >= start; i--) { uint64_t offset = i * sizeof (blkptr_t); uint64_t blkoff = P2PHASE(i, bpo->bpo_epb); @@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg, if (dbuf) dmu_buf_rele(dbuf, FTAG); err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, - offset, FTAG, &dbuf, 0); + offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH); if (err) break; + pe = pb; + pb = MAX((dbuf->db_offset > dmu_prefetch_max) ? + dbuf->db_offset - dmu_prefetch_max : 0, ps); + if (pe > pb) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, + pb, pe - pb, ZIO_PRIORITY_ASYNC_READ); + } } ASSERT3U(offset, >=, dbuf->db_offset); @@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg, int64_t i = bpi->bpi_unprocessed_subobjs - 1; uint64_t offset = i * sizeof (uint64_t); - uint64_t obj_from_sublist; + uint64_t subobj; err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, - offset, sizeof (uint64_t), &obj_from_sublist, - DMU_READ_PREFETCH); + offset, sizeof (uint64_t), &subobj, + DMU_READ_NO_PREFETCH); if (err) break; - bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t), - KM_SLEEP); - err = bpobj_open(sublist, bpo->bpo_os, - obj_from_sublist); - if (err) + bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t), + KM_SLEEP); + err = bpobj_open(subbpo, bpo->bpo_os, subobj); + if (err) { + kmem_free(subbpo, sizeof (bpobj_t)); break; + } + + if (subbpo->bpo_havesubobj && + subbpo->bpo_phys->bpo_subobjs != 0) { + dmu_prefetch(subbpo->bpo_os, + subbpo->bpo_phys->bpo_subobjs, 0, 0, 0, + ZIO_PRIORITY_ASYNC_READ); + } - list_insert_head(&stack, bpi_alloc(sublist, bpi, i)); - mutex_enter(&sublist->bpo_lock); + list_insert_head(&stack, bpi_alloc(subbpo, bpi, i)); + mutex_enter(&subbpo->bpo_lock); bpi->bpi_unprocessed_subobjs--; } } |