aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/bpobj.h1
-rw-r--r--module/zfs/bpobj.c65
-rw-r--r--module/zfs/dsl_deadlist.c71
3 files changed, 131 insertions, 6 deletions
diff --git a/include/sys/bpobj.h b/include/sys/bpobj.h
index 84f0ee76c..f3384f526 100644
--- a/include/sys/bpobj.h
+++ b/include/sys/bpobj.h
@@ -87,6 +87,7 @@ int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func,
void *arg, int64_t start);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
+void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx);
diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c
index f7fded565..fa99f5141 100644
--- a/module/zfs/bpobj.c
+++ b/module/zfs/bpobj.c
@@ -663,14 +663,13 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
}
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
- VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
-
if (bpobj_is_empty(&subbpo)) {
/* No point in having an empty subobj. */
bpobj_close(&subbpo);
bpobj_free(bpo->bpo_os, subobj, tx);
return;
}
+ VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
@@ -780,6 +779,68 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
}
+/*
+ * Prefetch metadata required for bpobj_enqueue_subobj().
+ */
+void
+bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj)
+{
+ dmu_object_info_t doi;
+ bpobj_t subbpo;
+ uint64_t subsubobjs;
+ boolean_t copy_subsub = B_TRUE;
+ boolean_t copy_bps = B_TRUE;
+
+ ASSERT(bpobj_is_open(bpo));
+ ASSERT(subobj != 0);
+
+ if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj)
+ return;
+
+ if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0)
+ return;
+ if (bpobj_is_empty(&subbpo)) {
+ bpobj_close(&subbpo);
+ return;
+ }
+ subsubobjs = subbpo.bpo_phys->bpo_subobjs;
+ bpobj_close(&subbpo);
+
+ if (subsubobjs != 0) {
+ if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0)
+ return;
+ if (doi.doi_max_offset > doi.doi_data_block_size)
+ copy_subsub = B_FALSE;
+ }
+
+ if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0)
+ return;
+ if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub)
+ copy_bps = B_FALSE;
+
+ if (copy_subsub && subsubobjs != 0) {
+ if (bpo->bpo_phys->bpo_subobjs) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
+ bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+ dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+
+ if (copy_bps) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
+ bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ } else if (bpo->bpo_phys->bpo_subobjs) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
+ bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+}
+
void
bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c
index 2b33446e6..d58820701 100644
--- a/module/zfs/dsl_deadlist.c
+++ b/module/zfs/dsl_deadlist.c
@@ -438,6 +438,18 @@ dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
}
}
+/*
+ * Prefetch metadata required for dle_enqueue_subobj().
+ */
+static void
+dle_prefetch_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
+ uint64_t obj)
+{
+ if (dle->dle_bpobj.bpo_object !=
+ dmu_objset_pool(dl->dl_os)->dp_empty_bpobj)
+ bpobj_prefetch_subobj(&dle->dle_bpobj, obj);
+}
+
void
dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
@@ -810,6 +822,27 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dle_enqueue_subobj(dl, dle, obj, tx);
}
+/*
+ * Prefetch metadata required for dsl_deadlist_insert_bpobj().
+ */
+static void
+dsl_deadlist_prefetch_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth)
+{
+ dsl_deadlist_entry_t dle_tofind;
+ dsl_deadlist_entry_t *dle;
+ avl_index_t where;
+
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
+
+ dsl_deadlist_load_tree(dl);
+
+ dle_tofind.dle_mintxg = birth;
+ dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
+ if (dle == NULL)
+ dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
+ dle_prefetch_subobj(dl, dle, obj);
+}
+
static int
dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
@@ -826,12 +859,12 @@ dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
void
dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
{
- zap_cursor_t zc;
- zap_attribute_t za;
+ zap_cursor_t zc, pzc;
+ zap_attribute_t za, pza;
dmu_buf_t *bonus;
dsl_deadlist_phys_t *dlp;
dmu_object_info_t doi;
- int error;
+ int error, perror, i;
VERIFY0(dmu_object_info(dl->dl_os, obj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) {
@@ -843,15 +876,32 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
}
mutex_enter(&dl->dl_lock);
+ /*
+ * Prefetch up to 128 deadlists first and then more as we progress.
+ * The limit is a balance between ARC use and diminishing returns.
+ */
+ for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0;
+ (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128;
+ zap_cursor_advance(&pzc), i++) {
+ dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
+ zfs_strtonum(pza.za_name, NULL));
+ }
for (zap_cursor_init(&zc, dl->dl_os, obj);
(error = zap_cursor_retrieve(&zc, &za)) == 0;
zap_cursor_advance(&zc)) {
uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
+ if (perror == 0) {
+ dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
+ zfs_strtonum(pza.za_name, NULL));
+ zap_cursor_advance(&pzc);
+ perror = zap_cursor_retrieve(&pzc, &pza);
+ }
}
VERIFY3U(error, ==, ENOENT);
zap_cursor_fini(&zc);
+ zap_cursor_fini(&pzc);
VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
dlp = bonus->db_data;
@@ -869,8 +919,9 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dmu_tx_t *tx)
{
dsl_deadlist_entry_t dle_tofind;
- dsl_deadlist_entry_t *dle;
+ dsl_deadlist_entry_t *dle, *pdle;
avl_index_t where;
+ int i;
ASSERT(!dl->dl_oldfmt);
@@ -882,11 +933,23 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
+ /*
+ * Prefetch up to 128 deadlists first and then more as we progress.
+ * The limit is a balance between ARC use and diminishing returns.
+ */
+ for (pdle = dle, i = 0; pdle && i < 128; ) {
+ bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
+ pdle = AVL_NEXT(&dl->dl_tree, pdle);
+ }
while (dle) {
uint64_t used, comp, uncomp;
dsl_deadlist_entry_t *dle_next;
bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
+ if (pdle) {
+ bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
+ pdle = AVL_NEXT(&dl->dl_tree, pdle);
+ }
VERIFY0(bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));