aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/range_tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/range_tree.c')
-rw-r--r--module/zfs/range_tree.c564
1 files changed, 348 insertions, 216 deletions
diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c
index 0e1297214..0b369a438 100644
--- a/module/zfs/range_tree.c
+++ b/module/zfs/range_tree.c
@@ -74,42 +74,38 @@
* support removing complete segments.
*/
-kmem_cache_t *range_seg_cache;
-
-/* Generic ops for managing an AVL tree alongside a range tree */
-struct range_tree_ops rt_avl_ops = {
- .rtop_create = rt_avl_create,
- .rtop_destroy = rt_avl_destroy,
- .rtop_add = rt_avl_add,
- .rtop_remove = rt_avl_remove,
- .rtop_vacate = rt_avl_vacate,
-};
-
-void
-range_tree_init(void)
-{
- ASSERT(range_seg_cache == NULL);
- range_seg_cache = kmem_cache_create("range_seg_cache",
- sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
-}
-
-void
-range_tree_fini(void)
-{
- kmem_cache_destroy(range_seg_cache);
- range_seg_cache = NULL;
+static inline void
+rs_copy(range_seg_t *src, range_seg_t *dest, range_tree_t *rt)
+{
+ ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
+ size_t size = 0;
+ switch (rt->rt_type) {
+ case RANGE_SEG32:
+ size = sizeof (range_seg32_t);
+ break;
+ case RANGE_SEG64:
+ size = sizeof (range_seg64_t);
+ break;
+ case RANGE_SEG_GAP:
+ size = sizeof (range_seg_gap_t);
+ break;
+ default:
+ VERIFY(0);
+ }
+ bcopy(src, dest, size);
}
void
range_tree_stat_verify(range_tree_t *rt)
{
range_seg_t *rs;
+ zfs_btree_index_t where;
uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
int i;
- for (rs = avl_first(&rt->rt_root); rs != NULL;
- rs = AVL_NEXT(&rt->rt_root, rs)) {
- uint64_t size = rs->rs_end - rs->rs_start;
+ for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL;
+ rs = zfs_btree_next(&rt->rt_root, &where, &where)) {
+ uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
int idx = highbit64(size) - 1;
hist[idx]++;
@@ -128,7 +124,7 @@ range_tree_stat_verify(range_tree_t *rt)
static void
range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
{
- uint64_t size = rs->rs_end - rs->rs_start;
+ uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
int idx = highbit64(size) - 1;
ASSERT(size != 0);
@@ -142,7 +138,7 @@ range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
static void
range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
{
- uint64_t size = rs->rs_end - rs->rs_start;
+ uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt);
int idx = highbit64(size) - 1;
ASSERT(size != 0);
@@ -153,14 +149,35 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
rt->rt_histogram[idx]--;
}
-/*
- * NOTE: caller is responsible for all locking.
- */
static int
-range_tree_seg_compare(const void *x1, const void *x2)
+range_tree_seg32_compare(const void *x1, const void *x2)
+{
+ const range_seg32_t *r1 = x1;
+ const range_seg32_t *r2 = x2;
+
+ ASSERT3U(r1->rs_start, <=, r1->rs_end);
+ ASSERT3U(r2->rs_start, <=, r2->rs_end);
+
+ return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
+}
+
+static int
+range_tree_seg64_compare(const void *x1, const void *x2)
{
- const range_seg_t *r1 = (const range_seg_t *)x1;
- const range_seg_t *r2 = (const range_seg_t *)x2;
+ const range_seg64_t *r1 = x1;
+ const range_seg64_t *r2 = x2;
+
+ ASSERT3U(r1->rs_start, <=, r1->rs_end);
+ ASSERT3U(r2->rs_start, <=, r2->rs_end);
+
+ return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
+}
+
+static int
+range_tree_seg_gap_compare(const void *x1, const void *x2)
+{
+ const range_seg_gap_t *r1 = x1;
+ const range_seg_gap_t *r2 = x2;
ASSERT3U(r1->rs_start, <=, r1->rs_end);
ASSERT3U(r2->rs_start, <=, r2->rs_end);
@@ -169,18 +186,42 @@ range_tree_seg_compare(const void *x1, const void *x2)
}
range_tree_t *
-range_tree_create_impl(range_tree_ops_t *ops, void *arg,
- int (*avl_compare) (const void *, const void *), uint64_t gap)
+range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
+ uint64_t start, uint64_t shift,
+ int (*zfs_btree_compare) (const void *, const void *),
+ uint64_t gap)
{
range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
- avl_create(&rt->rt_root, range_tree_seg_compare,
- sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
+ ASSERT3U(shift, <, 64);
+ ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
+ size_t size;
+ int (*compare) (const void *, const void *);
+ switch (type) {
+ case RANGE_SEG32:
+ size = sizeof (range_seg32_t);
+ compare = range_tree_seg32_compare;
+ break;
+ case RANGE_SEG64:
+ size = sizeof (range_seg64_t);
+ compare = range_tree_seg64_compare;
+ break;
+ case RANGE_SEG_GAP:
+ size = sizeof (range_seg_gap_t);
+ compare = range_tree_seg_gap_compare;
+ break;
+ default:
+ panic("Invalid range seg type %d", type);
+ }
+ zfs_btree_create(&rt->rt_root, compare, size);
rt->rt_ops = ops;
rt->rt_gap = gap;
rt->rt_arg = arg;
- rt->rt_avl_compare = avl_compare;
+ rt->rt_type = type;
+ rt->rt_start = start;
+ rt->rt_shift = shift;
+ rt->rt_btree_compare = zfs_btree_compare;
if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
rt->rt_ops->rtop_create(rt, rt->rt_arg);
@@ -189,9 +230,10 @@ range_tree_create_impl(range_tree_ops_t *ops, void *arg,
}
range_tree_t *
-range_tree_create(range_tree_ops_t *ops, void *arg)
+range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
+ void *arg, uint64_t start, uint64_t shift)
{
- return (range_tree_create_impl(ops, arg, NULL, 0));
+ return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0));
}
void
@@ -202,19 +244,20 @@ range_tree_destroy(range_tree_t *rt)
if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
- avl_destroy(&rt->rt_root);
+ zfs_btree_destroy(&rt->rt_root);
kmem_free(rt, sizeof (*rt));
}
void
range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta)
{
- ASSERT3U(rs->rs_fill + delta, !=, 0);
- ASSERT3U(rs->rs_fill + delta, <=, rs->rs_end - rs->rs_start);
+ ASSERT3U(rs_get_fill(rs, rt) + delta, !=, 0);
+ ASSERT3U(rs_get_fill(rs, rt) + delta, <=, rs_get_end(rs, rt) -
+ rs_get_start(rs, rt));
if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
- rs->rs_fill += delta;
+ rs_set_fill(rs, rt, rs_get_fill(rs, rt) + delta);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
}
@@ -223,28 +266,20 @@ static void
range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
{
range_tree_t *rt = arg;
- avl_index_t where;
- range_seg_t rsearch, *rs_before, *rs_after, *rs;
+ zfs_btree_index_t where;
+ range_seg_t *rs_before, *rs_after, *rs;
+ range_seg_max_t tmp, rsearch;
uint64_t end = start + size, gap = rt->rt_gap;
uint64_t bridge_size = 0;
boolean_t merge_before, merge_after;
ASSERT3U(size, !=, 0);
ASSERT3U(fill, <=, size);
+ ASSERT3U(start + size, >, start);
- rsearch.rs_start = start;
- rsearch.rs_end = end;
- rs = avl_find(&rt->rt_root, &rsearch, &where);
-
- if (gap == 0 && rs != NULL &&
- rs->rs_start <= start && rs->rs_end >= end) {
- zfs_panic_recover("zfs: allocating allocated segment"
- "(offset=%llu size=%llu) of (offset=%llu size=%llu)\n",
- (longlong_t)start, (longlong_t)size,
- (longlong_t)rs->rs_start,
- (longlong_t)rs->rs_end - rs->rs_start);
- return;
- }
+ rs_set_start(&rsearch, rt, start);
+ rs_set_end(&rsearch, rt, end);
+ rs = zfs_btree_find(&rt->rt_root, &rsearch, &where);
/*
* If this is a gap-supporting range tree, it is possible that we
@@ -255,27 +290,28 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
* the normal code paths.
*/
if (rs != NULL) {
+ ASSERT3U(rt->rt_gap, !=, 0);
+ uint64_t rstart = rs_get_start(rs, rt);
+ uint64_t rend = rs_get_end(rs, rt);
ASSERT3U(gap, !=, 0);
- if (rs->rs_start <= start && rs->rs_end >= end) {
+ if (rstart <= start && rend >= end) {
range_tree_adjust_fill(rt, rs, fill);
return;
}
- avl_remove(&rt->rt_root, rs);
+ zfs_btree_remove(&rt->rt_root, rs);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
range_tree_stat_decr(rt, rs);
- rt->rt_space -= rs->rs_end - rs->rs_start;
+ rt->rt_space -= rend - rstart;
- fill += rs->rs_fill;
- start = MIN(start, rs->rs_start);
- end = MAX(end, rs->rs_end);
+ fill += rs_get_fill(rs, rt);
+ start = MIN(start, rstart);
+ end = MAX(end, rend);
size = end - start;
range_tree_add_impl(rt, start, size, fill);
-
- kmem_cache_free(range_seg_cache, rs);
return;
}
@@ -286,19 +322,21 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
* If gap != 0, we might need to merge with our neighbors even if we
* aren't directly touching.
*/
- rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
- rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
+ zfs_btree_index_t where_before, where_after;
+ rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before);
+ rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after);
- merge_before = (rs_before != NULL && rs_before->rs_end >= start - gap);
- merge_after = (rs_after != NULL && rs_after->rs_start <= end + gap);
+ merge_before = (rs_before != NULL && rs_get_end(rs_before, rt) >=
+ start - gap);
+ merge_after = (rs_after != NULL && rs_get_start(rs_after, rt) <= end +
+ gap);
if (merge_before && gap != 0)
- bridge_size += start - rs_before->rs_end;
+ bridge_size += start - rs_get_end(rs_before, rt);
if (merge_after && gap != 0)
- bridge_size += rs_after->rs_start - end;
+ bridge_size += rs_get_start(rs_after, rt) - end;
if (merge_before && merge_after) {
- avl_remove(&rt->rt_root, rs_before);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) {
rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
@@ -307,9 +345,19 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
range_tree_stat_decr(rt, rs_before);
range_tree_stat_decr(rt, rs_after);
- rs_after->rs_fill += rs_before->rs_fill + fill;
- rs_after->rs_start = rs_before->rs_start;
- kmem_cache_free(range_seg_cache, rs_before);
+ rs_copy(rs_after, &tmp, rt);
+ uint64_t before_start = rs_get_start_raw(rs_before, rt);
+ uint64_t before_fill = rs_get_fill(rs_before, rt);
+ uint64_t after_fill = rs_get_fill(rs_after, rt);
+ zfs_btree_remove_from(&rt->rt_root, &where_before);
+
+ /*
+ * We have to re-find the node because our old reference is
+ * invalid as soon as we do any mutating btree operations.
+ */
+ rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after);
+ rs_set_start_raw(rs_after, rt, before_start);
+ rs_set_fill(rs_after, rt, after_fill + before_fill + fill);
rs = rs_after;
} else if (merge_before) {
if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
@@ -317,8 +365,9 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
range_tree_stat_decr(rt, rs_before);
- rs_before->rs_fill += fill;
- rs_before->rs_end = end;
+ uint64_t before_fill = rs_get_fill(rs_before, rt);
+ rs_set_end(rs_before, rt, end);
+ rs_set_fill(rs_before, rt, before_fill + fill);
rs = rs_before;
} else if (merge_after) {
if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
@@ -326,22 +375,26 @@ range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
range_tree_stat_decr(rt, rs_after);
- rs_after->rs_fill += fill;
- rs_after->rs_start = start;
+ uint64_t after_fill = rs_get_fill(rs_after, rt);
+ rs_set_start(rs_after, rt, start);
+ rs_set_fill(rs_after, rt, after_fill + fill);
rs = rs_after;
} else {
- rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
+ rs = &tmp;
- rs->rs_fill = fill;
- rs->rs_start = start;
- rs->rs_end = end;
- avl_insert(&rt->rt_root, rs, where);
+ rs_set_start(rs, rt, start);
+ rs_set_end(rs, rt, end);
+ rs_set_fill(rs, rt, fill);
+ zfs_btree_insert(&rt->rt_root, rs, &where);
}
- if (gap != 0)
- ASSERT3U(rs->rs_fill, <=, rs->rs_end - rs->rs_start);
- else
- ASSERT3U(rs->rs_fill, ==, rs->rs_end - rs->rs_start);
+ if (gap != 0) {
+ ASSERT3U(rs_get_fill(rs, rt), <=, rs_get_end(rs, rt) -
+ rs_get_start(rs, rt));
+ } else {
+ ASSERT3U(rs_get_fill(rs, rt), ==, rs_get_end(rs, rt) -
+ rs_get_start(rs, rt));
+ }
if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
@@ -360,22 +413,25 @@ static void
range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
boolean_t do_fill)
{
- avl_index_t where;
- range_seg_t rsearch, *rs, *newseg;
+ zfs_btree_index_t where;
+ range_seg_t *rs;
+ range_seg_max_t rsearch, rs_tmp;
uint64_t end = start + size;
boolean_t left_over, right_over;
VERIFY3U(size, !=, 0);
VERIFY3U(size, <=, rt->rt_space);
+ if (rt->rt_type == RANGE_SEG64)
+ ASSERT3U(start + size, >, start);
- rsearch.rs_start = start;
- rsearch.rs_end = end;
- rs = avl_find(&rt->rt_root, &rsearch, &where);
+ rs_set_start(&rsearch, rt, start);
+ rs_set_end(&rsearch, rt, end);
+ rs = zfs_btree_find(&rt->rt_root, &rsearch, &where);
/* Make sure we completely overlap with someone */
if (rs == NULL) {
- zfs_panic_recover("zfs: freeing free segment "
- "(offset=%llu size=%llu)",
+ zfs_panic_recover("zfs: removing nonexistent segment from "
+ "range tree (offset=%llu size=%llu)",
(longlong_t)start, (longlong_t)size);
return;
}
@@ -388,30 +444,32 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
*/
if (rt->rt_gap != 0) {
if (do_fill) {
- if (rs->rs_fill == size) {
- start = rs->rs_start;
- end = rs->rs_end;
+ if (rs_get_fill(rs, rt) == size) {
+ start = rs_get_start(rs, rt);
+ end = rs_get_end(rs, rt);
size = end - start;
} else {
range_tree_adjust_fill(rt, rs, -size);
return;
}
- } else if (rs->rs_start != start || rs->rs_end != end) {
+ } else if (rs_get_start(rs, rt) != start ||
+ rs_get_end(rs, rt) != end) {
zfs_panic_recover("zfs: freeing partial segment of "
"gap tree (offset=%llu size=%llu) of "
"(offset=%llu size=%llu)",
(longlong_t)start, (longlong_t)size,
- (longlong_t)rs->rs_start,
- (longlong_t)rs->rs_end - rs->rs_start);
+ (longlong_t)rs_get_start(rs, rt),
+ (longlong_t)rs_get_end(rs, rt) - rs_get_start(rs,
+ rt));
return;
}
}
- VERIFY3U(rs->rs_start, <=, start);
- VERIFY3U(rs->rs_end, >=, end);
+ VERIFY3U(rs_get_start(rs, rt), <=, start);
+ VERIFY3U(rs_get_end(rs, rt), >=, end);
- left_over = (rs->rs_start != start);
- right_over = (rs->rs_end != end);
+ left_over = (rs_get_start(rs, rt) != start);
+ right_over = (rs_get_end(rs, rt) != end);
range_tree_stat_decr(rt, rs);
@@ -419,24 +477,33 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
if (left_over && right_over) {
- newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
- newseg->rs_start = end;
- newseg->rs_end = rs->rs_end;
- newseg->rs_fill = newseg->rs_end - newseg->rs_start;
- range_tree_stat_incr(rt, newseg);
+ range_seg_max_t newseg;
+ rs_set_start(&newseg, rt, end);
+ rs_set_end_raw(&newseg, rt, rs_get_end_raw(rs, rt));
+ rs_set_fill(&newseg, rt, rs_get_end(rs, rt) - end);
+ range_tree_stat_incr(rt, &newseg);
- rs->rs_end = start;
+ // This modifies the buffer already inside the range tree
+ rs_set_end(rs, rt, start);
+
+ rs_copy(rs, &rs_tmp, rt);
+ if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL)
+ zfs_btree_insert(&rt->rt_root, &newseg, &where);
+ else
+ zfs_btree_add(&rt->rt_root, &newseg);
- avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
- rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
+ rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg);
} else if (left_over) {
- rs->rs_end = start;
+ // This modifies the buffer already inside the range tree
+ rs_set_end(rs, rt, start);
+ rs_copy(rs, &rs_tmp, rt);
} else if (right_over) {
- rs->rs_start = end;
+ // This modifies the buffer already inside the range tree
+ rs_set_start(rs, rt, end);
+ rs_copy(rs, &rs_tmp, rt);
} else {
- avl_remove(&rt->rt_root, rs);
- kmem_cache_free(range_seg_cache, rs);
+ zfs_btree_remove_from(&rt->rt_root, &where);
rs = NULL;
}
@@ -446,11 +513,12 @@ range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
* the size, since we do not support removing partial segments
* of range trees with gaps.
*/
- rs->rs_fill = rs->rs_end - rs->rs_start;
- range_tree_stat_incr(rt, rs);
+ rs_set_fill_raw(rs, rt, rs_get_end_raw(rs, rt) -
+ rs_get_start_raw(rs, rt));
+ range_tree_stat_incr(rt, &rs_tmp);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
- rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
+ rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg);
}
rt->rt_space -= size;
@@ -472,14 +540,14 @@ void
range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
uint64_t newstart, uint64_t newsize)
{
- int64_t delta = newsize - (rs->rs_end - rs->rs_start);
+ int64_t delta = newsize - (rs_get_end(rs, rt) - rs_get_start(rs, rt));
range_tree_stat_decr(rt, rs);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
- rs->rs_start = newstart;
- rs->rs_end = newstart + newsize;
+ rs_set_start(rs, rt, newstart);
+ rs_set_end(rs, rt, newstart + newsize);
range_tree_stat_incr(rt, rs);
if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
@@ -491,22 +559,27 @@ range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
static range_seg_t *
range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
{
- range_seg_t rsearch;
+ range_seg_max_t rsearch;
uint64_t end = start + size;
VERIFY(size != 0);
- rsearch.rs_start = start;
- rsearch.rs_end = end;
- return (avl_find(&rt->rt_root, &rsearch, NULL));
+ rs_set_start(&rsearch, rt, start);
+ rs_set_end(&rsearch, rt, end);
+ return (zfs_btree_find(&rt->rt_root, &rsearch, NULL));
}
range_seg_t *
range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
{
+ if (rt->rt_type == RANGE_SEG64)
+ ASSERT3U(start + size, >, start);
+
range_seg_t *rs = range_tree_find_impl(rt, start, size);
- if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
+ if (rs != NULL && rs_get_start(rs, rt) <= start &&
+ rs_get_end(rs, rt) >= start + size) {
return (rs);
+ }
return (NULL);
}
@@ -533,24 +606,28 @@ boolean_t
range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
uint64_t *ostart, uint64_t *osize)
{
- range_seg_t rsearch;
- rsearch.rs_start = start;
- rsearch.rs_end = start + 1;
+ if (rt->rt_type == RANGE_SEG64)
+ ASSERT3U(start + size, >, start);
- avl_index_t where;
- range_seg_t *rs = avl_find(&rt->rt_root, &rsearch, &where);
+ range_seg_max_t rsearch;
+ rs_set_start(&rsearch, rt, start);
+ rs_set_end_raw(&rsearch, rt, rs_get_start_raw(&rsearch, rt) + 1);
+
+ zfs_btree_index_t where;
+ range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where);
if (rs != NULL) {
*ostart = start;
- *osize = MIN(size, rs->rs_end - start);
+ *osize = MIN(size, rs_get_end(rs, rt) - start);
return (B_TRUE);
}
- rs = avl_nearest(&rt->rt_root, where, AVL_AFTER);
- if (rs == NULL || rs->rs_start > start + size)
+ rs = zfs_btree_next(&rt->rt_root, &where, &where);
+ if (rs == NULL || rs_get_start(rs, rt) > start + size)
return (B_FALSE);
- *ostart = rs->rs_start;
- *osize = MIN(start + size, rs->rs_end) - rs->rs_start;
+ *ostart = rs_get_start(rs, rt);
+ *osize = MIN(start + size, rs_get_end(rs, rt)) -
+ rs_get_start(rs, rt);
return (B_TRUE);
}
@@ -566,9 +643,12 @@ range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
if (size == 0)
return;
+ if (rt->rt_type == RANGE_SEG64)
+ ASSERT3U(start + size, >, start);
+
while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
- uint64_t free_start = MAX(rs->rs_start, start);
- uint64_t free_end = MIN(rs->rs_end, start + size);
+ uint64_t free_start = MAX(rs_get_start(rs, rt), start);
+ uint64_t free_end = MIN(rs_get_end(rs, rt), start + size);
range_tree_remove(rt, free_start, free_end - free_start);
}
}
@@ -579,7 +659,7 @@ range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
range_tree_t *rt;
ASSERT0(range_tree_space(*rtdst));
- ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
+ ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root));
rt = *rtsrc;
*rtsrc = *rtdst;
@@ -589,16 +669,20 @@ range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
void
range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
{
- range_seg_t *rs;
- void *cookie = NULL;
-
if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL)
rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
- while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
- if (func != NULL)
- func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
- kmem_cache_free(range_seg_cache, rs);
+ if (func != NULL) {
+ range_seg_t *rs;
+ zfs_btree_index_t *cookie = NULL;
+
+ while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) !=
+ NULL) {
+ func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) -
+ rs_get_start(rs, rt));
+ }
+ } else {
+ zfs_btree_clear(&rt->rt_root);
}
bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
@@ -608,16 +692,18 @@ range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
void
range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
{
- for (range_seg_t *rs = avl_first(&rt->rt_root); rs;
- rs = AVL_NEXT(&rt->rt_root, rs)) {
- func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
+ zfs_btree_index_t where;
+ for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where);
+ rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) {
+ func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) -
+ rs_get_start(rs, rt));
}
}
range_seg_t *
range_tree_first(range_tree_t *rt)
{
- return (avl_first(&rt->rt_root));
+ return (zfs_btree_first(&rt->rt_root, NULL));
}
uint64_t
@@ -629,7 +715,7 @@ range_tree_space(range_tree_t *rt)
uint64_t
range_tree_numsegs(range_tree_t *rt)
{
- return ((rt == NULL) ? 0 : avl_numnodes(&rt->rt_root));
+ return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root));
}
boolean_t
@@ -639,71 +725,76 @@ range_tree_is_empty(range_tree_t *rt)
return (range_tree_space(rt) == 0);
}
-/* Generic range tree functions for maintaining segments in an AVL tree. */
+/* ARGSUSED */
void
-rt_avl_create(range_tree_t *rt, void *arg)
-{
- avl_tree_t *tree = arg;
-
- avl_create(tree, rt->rt_avl_compare, sizeof (range_seg_t),
- offsetof(range_seg_t, rs_pp_node));
+rt_btree_create(range_tree_t *rt, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+
+ size_t size;
+ switch (rt->rt_type) {
+ case RANGE_SEG32:
+ size = sizeof (range_seg32_t);
+ break;
+ case RANGE_SEG64:
+ size = sizeof (range_seg64_t);
+ break;
+ case RANGE_SEG_GAP:
+ size = sizeof (range_seg_gap_t);
+ break;
+ default:
+ panic("Invalid range seg type %d", rt->rt_type);
+ }
+ zfs_btree_create(size_tree, rt->rt_btree_compare, size);
}
+/* ARGSUSED */
void
-rt_avl_destroy(range_tree_t *rt, void *arg)
+rt_btree_destroy(range_tree_t *rt, void *arg)
{
- avl_tree_t *tree = arg;
+ zfs_btree_t *size_tree = arg;
+ ASSERT0(zfs_btree_numnodes(size_tree));
- ASSERT0(avl_numnodes(tree));
- avl_destroy(tree);
+ zfs_btree_destroy(size_tree);
}
+/* ARGSUSED */
void
-rt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg)
+rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg)
{
- avl_tree_t *tree = arg;
- avl_add(tree, rs);
-}
+ zfs_btree_t *size_tree = arg;
-void
-rt_avl_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- avl_tree_t *tree = arg;
- avl_remove(tree, rs);
+ zfs_btree_add(size_tree, rs);
}
+/* ARGSUSED */
void
-rt_avl_vacate(range_tree_t *rt, void *arg)
+rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
{
- /*
- * Normally one would walk the tree freeing nodes along the way.
- * Since the nodes are shared with the range trees we can avoid
- * walking all nodes and just reinitialize the avl tree. The nodes
- * will be freed by the range tree, so we don't want to free them here.
- */
- rt_avl_create(rt, arg);
-}
+ zfs_btree_t *size_tree = arg;
-uint64_t
-range_tree_min(range_tree_t *rt)
-{
- range_seg_t *rs = avl_first(&rt->rt_root);
- return (rs != NULL ? rs->rs_start : 0);
+ zfs_btree_remove(size_tree, rs);
}
-uint64_t
-range_tree_max(range_tree_t *rt)
+/* ARGSUSED */
+void
+rt_btree_vacate(range_tree_t *rt, void *arg)
{
- range_seg_t *rs = avl_last(&rt->rt_root);
- return (rs != NULL ? rs->rs_end : 0);
-}
+ zfs_btree_t *size_tree = arg;
+ zfs_btree_clear(size_tree);
+ zfs_btree_destroy(size_tree);
-uint64_t
-range_tree_span(range_tree_t *rt)
-{
- return (range_tree_max(rt) - range_tree_min(rt));
+ rt_btree_create(rt, arg);
}
+range_tree_ops_t rt_btree_ops = {
+ .rtop_create = rt_btree_create,
+ .rtop_destroy = rt_btree_destroy,
+ .rtop_add = rt_btree_add,
+ .rtop_remove = rt_btree_remove,
+ .rtop_vacate = rt_btree_vacate
+};
+
/*
* Remove any overlapping ranges between the given segment [start, end)
* from removefrom. Add non-overlapping leftovers to addto.
@@ -712,42 +803,62 @@ void
range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
range_tree_t *removefrom, range_tree_t *addto)
{
- avl_index_t where;
- range_seg_t starting_rs = {
- .rs_start = start,
- .rs_end = start + 1
- };
+ zfs_btree_index_t where;
+ range_seg_max_t starting_rs;
+ rs_set_start(&starting_rs, removefrom, start);
+ rs_set_end_raw(&starting_rs, removefrom, rs_get_start_raw(&starting_rs,
+ removefrom) + 1);
- range_seg_t *curr = avl_find(&removefrom->rt_root,
+ range_seg_t *curr = zfs_btree_find(&removefrom->rt_root,
&starting_rs, &where);
if (curr == NULL)
- curr = avl_nearest(&removefrom->rt_root, where, AVL_AFTER);
+ curr = zfs_btree_next(&removefrom->rt_root, &where, &where);
range_seg_t *next;
for (; curr != NULL; curr = next) {
- next = AVL_NEXT(&removefrom->rt_root, curr);
-
if (start == end)
return;
VERIFY3U(start, <, end);
/* there is no overlap */
- if (end <= curr->rs_start) {
+ if (end <= rs_get_start(curr, removefrom)) {
range_tree_add(addto, start, end - start);
return;
}
- uint64_t overlap_start = MAX(curr->rs_start, start);
- uint64_t overlap_end = MIN(curr->rs_end, end);
+ uint64_t overlap_start = MAX(rs_get_start(curr, removefrom),
+ start);
+ uint64_t overlap_end = MIN(rs_get_end(curr, removefrom),
+ end);
uint64_t overlap_size = overlap_end - overlap_start;
ASSERT3S(overlap_size, >, 0);
+ range_seg_max_t rs;
+ rs_copy(curr, &rs, removefrom);
+
range_tree_remove(removefrom, overlap_start, overlap_size);
if (start < overlap_start)
range_tree_add(addto, start, overlap_start - start);
start = overlap_end;
+ next = zfs_btree_find(&removefrom->rt_root, &rs, &where);
+ /*
+ * If we find something here, we only removed part of the
+ * curr segment. Either there's some left at the end
+ * because we've reached the end of the range we're removing,
+ * or there's some left at the start because we started
+ * partway through the range. Either way, we continue with
+ * the loop. If it's the former, we'll return at the start of
+ * the loop, and if it's the latter we'll see if there is more
+ * area to process.
+ */
+ if (next != NULL) {
+ ASSERT(start == end || start == rs_get_end(&rs,
+ removefrom));
+ }
+
+ next = zfs_btree_next(&removefrom->rt_root, &where, &where);
}
VERIFY3P(curr, ==, NULL);
@@ -767,9 +878,30 @@ void
range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
range_tree_t *addto)
{
- for (range_seg_t *rs = avl_first(&rt->rt_root); rs;
- rs = AVL_NEXT(&rt->rt_root, rs)) {
- range_tree_remove_xor_add_segment(rs->rs_start, rs->rs_end,
- removefrom, addto);
+ zfs_btree_index_t where;
+ for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs;
+ rs = zfs_btree_next(&rt->rt_root, &where, &where)) {
+ range_tree_remove_xor_add_segment(rs_get_start(rs, rt),
+ rs_get_end(rs, rt), removefrom, addto);
}
}
+
+uint64_t
+range_tree_min(range_tree_t *rt)
+{
+ range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL);
+ return (rs != NULL ? rs_get_start(rs, rt) : 0);
+}
+
+uint64_t
+range_tree_max(range_tree_t *rt)
+{
+ range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL);
+ return (rs != NULL ? rs_get_end(rs, rt) : 0);
+}
+
+uint64_t
+range_tree_span(range_tree_t *rt)
+{
+ return (range_tree_max(rt) - range_tree_min(rt));
+}