aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/metaslab.c
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2017-11-15 20:27:01 -0500
committerBrian Behlendorf <[email protected]>2017-11-15 17:27:01 -0800
commitd4a72f23863382bdf6d0ae33196f5b5decbc48fd (patch)
tree1084ea930b9a1ef46e58d1757943ab3ad66c22c4 /module/zfs/metaslab.c
parente301113c17673a290098850830cf2e6d1a1fcbe3 (diff)
Sequential scrub and resilvers
Currently, scrubs and resilvers can take an extremely long time to complete. This is largely due to the fact that zfs scans process pools in logical order, as determined by each block's bookmark. This makes sense from a simplicity perspective, but blocks in zfs are often scattered randomly across disks, particularly due to zfs's copy-on-write mechanisms. This patch improves performance by splitting scrubs and resilvers into a metadata scanning phase and an IO issuing phase. The metadata scan reads through the structure of the pool and gathers an in-memory queue of I/Os, sorted by size and offset on disk. The issuing phase will then issue the scrub I/Os as sequentially as possible, greatly improving performance. This patch also updates and cleans up some of the scan code which has not been updated in several years. Reviewed-by: Brian Behlendorf <[email protected]> Authored-by: Saso Kiselkov <[email protected]> Authored-by: Alek Pinchuk <[email protected]> Authored-by: Tom Caputi <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #3625 Closes #6256
Diffstat (limited to 'module/zfs/metaslab.c')
-rw-r--r--module/zfs/metaslab.c82
1 files changed, 2 insertions, 80 deletions
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index 5dc9ed60d..6320fd388 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -972,85 +972,6 @@ metaslab_rangesize_compare(const void *x1, const void *x2)
}
/*
- * Create any block allocator specific components. The current allocators
- * rely on using both a size-ordered range_tree_t and an array of uint64_t's.
- */
-static void
-metaslab_rt_create(range_tree_t *rt, void *arg)
-{
- metaslab_t *msp = arg;
-
- ASSERT3P(rt->rt_arg, ==, msp);
- ASSERT(msp->ms_tree == NULL);
-
- avl_create(&msp->ms_size_tree, metaslab_rangesize_compare,
- sizeof (range_seg_t), offsetof(range_seg_t, rs_pp_node));
-}
-
-/*
- * Destroy the block allocator specific components.
- */
-static void
-metaslab_rt_destroy(range_tree_t *rt, void *arg)
-{
- metaslab_t *msp = arg;
-
- ASSERT3P(rt->rt_arg, ==, msp);
- ASSERT3P(msp->ms_tree, ==, rt);
- ASSERT0(avl_numnodes(&msp->ms_size_tree));
-
- avl_destroy(&msp->ms_size_tree);
-}
-
-static void
-metaslab_rt_add(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- metaslab_t *msp = arg;
-
- ASSERT3P(rt->rt_arg, ==, msp);
- ASSERT3P(msp->ms_tree, ==, rt);
- VERIFY(!msp->ms_condensing);
- avl_add(&msp->ms_size_tree, rs);
-}
-
-static void
-metaslab_rt_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- metaslab_t *msp = arg;
-
- ASSERT3P(rt->rt_arg, ==, msp);
- ASSERT3P(msp->ms_tree, ==, rt);
- VERIFY(!msp->ms_condensing);
- avl_remove(&msp->ms_size_tree, rs);
-}
-
-static void
-metaslab_rt_vacate(range_tree_t *rt, void *arg)
-{
- metaslab_t *msp = arg;
-
- ASSERT3P(rt->rt_arg, ==, msp);
- ASSERT3P(msp->ms_tree, ==, rt);
-
- /*
- * Normally one would walk the tree freeing nodes along the way.
- * Since the nodes are shared with the range trees we can avoid
- * walking all nodes and just reinitialize the avl tree. The nodes
- * will be freed by the range tree, so we don't want to free them here.
- */
- avl_create(&msp->ms_size_tree, metaslab_rangesize_compare,
- sizeof (range_seg_t), offsetof(range_seg_t, rs_pp_node));
-}
-
-static range_tree_ops_t metaslab_rt_ops = {
- metaslab_rt_create,
- metaslab_rt_destroy,
- metaslab_rt_add,
- metaslab_rt_remove,
- metaslab_rt_vacate
-};
-
-/*
* ==========================================================================
* Common allocator routines
* ==========================================================================
@@ -1425,7 +1346,8 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
* addition of new space; and for debugging, it ensures that we'd
* data fault on any attempt to use this metaslab before it's ready.
*/
- ms->ms_tree = range_tree_create(&metaslab_rt_ops, ms, &ms->ms_lock);
+ ms->ms_tree = range_tree_create_impl(&rt_avl_ops, &ms->ms_size_tree,
+ metaslab_rangesize_compare, &ms->ms_lock, 0);
metaslab_group_add(mg, ms);
metaslab_set_fragmentation(ms);