summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/dsl_scan.h2
-rw-r--r--include/sys/range_tree.h21
-rw-r--r--module/zfs/dsl_scan.c261
-rw-r--r--module/zfs/range_tree.c79
4 files changed, 158 insertions, 205 deletions
diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h
index fb1f1d65b..d716510f8 100644
--- a/include/sys/dsl_scan.h
+++ b/include/sys/dsl_scan.h
@@ -155,7 +155,7 @@ typedef struct dsl_scan {
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
- uint64_t scn_bytes_pending; /* outstanding data to issue */
+ uint64_t scn_queues_pending; /* outstanding data to issue */
} dsl_scan_t;
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
diff --git a/include/sys/range_tree.h b/include/sys/range_tree.h
index fef3d4d7b..daa39e20d 100644
--- a/include/sys/range_tree.h
+++ b/include/sys/range_tree.h
@@ -63,12 +63,8 @@ typedef struct range_tree {
*/
uint8_t rt_shift;
uint64_t rt_start;
- range_tree_ops_t *rt_ops;
-
- /* rt_btree_compare should only be set if rt_arg is a b-tree */
+ const range_tree_ops_t *rt_ops;
void *rt_arg;
- int (*rt_btree_compare)(const void *, const void *);
-
uint64_t rt_gap; /* allowable inter-segment gap */
/*
@@ -278,11 +274,11 @@ rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
-range_tree_t *range_tree_create_impl(range_tree_ops_t *ops,
+range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops,
range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
- int (*zfs_btree_compare) (const void *, const void *), uint64_t gap);
-range_tree_t *range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
- void *arg, uint64_t start, uint64_t shift);
+ uint64_t gap);
+range_tree_t *range_tree_create(const range_tree_ops_t *ops,
+ range_seg_type_t type, void *arg, uint64_t start, uint64_t shift);
void range_tree_destroy(range_tree_t *rt);
boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
@@ -316,13 +312,6 @@ void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
range_tree_t *addto);
-void rt_btree_create(range_tree_t *rt, void *arg);
-void rt_btree_destroy(range_tree_t *rt, void *arg);
-void rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg);
-void rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg);
-void rt_btree_vacate(range_tree_t *rt, void *arg);
-extern range_tree_ops_t rt_btree_ops;
-
#ifdef __cplusplus
}
#endif
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 716c0bfbb..c1a380d8f 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -219,9 +219,9 @@ typedef struct {
/*
* This controls what conditions are placed on dsl_scan_sync_state():
- * SYNC_OPTIONAL) write out scn_phys iff scn_bytes_pending == 0
- * SYNC_MANDATORY) write out scn_phys always. scn_bytes_pending must be 0.
- * SYNC_CACHED) if scn_bytes_pending == 0, write out scn_phys. Otherwise
+ * SYNC_OPTIONAL) write out scn_phys iff scn_queues_pending == 0
+ * SYNC_MANDATORY) write out scn_phys always. scn_queues_pending must be 0.
+ * SYNC_CACHED) if scn_queues_pending == 0, write out scn_phys. Otherwise
* write out the scn_phys_cached version.
* See dsl_scan_sync_state for details.
*/
@@ -283,9 +283,10 @@ struct dsl_scan_io_queue {
/* trees used for sorting I/Os and extents of I/Os */
range_tree_t *q_exts_by_addr;
- zfs_btree_t q_exts_by_size;
+ zfs_btree_t q_exts_by_size;
avl_tree_t q_sios_by_addr;
uint64_t q_sio_memused;
+ uint64_t q_last_ext_addr;
/* members for zio rate limiting */
uint64_t q_maxinflight_bytes;
@@ -639,7 +640,7 @@ dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
* Because we can be running in the block sorting algorithm, we do not always
* want to write out the record, only when it is "safe" to do so. This safety
* condition is achieved by making sure that the sorting queues are empty
- * (scn_bytes_pending == 0). When this condition is not true, the sync'd state
+ * (scn_queues_pending == 0). When this condition is not true, the sync'd state
* is inconsistent with how much actual scanning progress has been made. The
* kind of sync to be performed is specified by the sync_type argument. If the
* sync is optional, we only sync if the queues are empty. If the sync is
@@ -662,8 +663,8 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type)
int i;
spa_t *spa = scn->scn_dp->dp_spa;
- ASSERT(sync_type != SYNC_MANDATORY || scn->scn_bytes_pending == 0);
- if (scn->scn_bytes_pending == 0) {
+ ASSERT(sync_type != SYNC_MANDATORY || scn->scn_queues_pending == 0);
+ if (scn->scn_queues_pending == 0) {
for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
dsl_scan_io_queue_t *q = vd->vdev_scan_io_queue;
@@ -1198,7 +1199,7 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx)
dmu_object_type_t ot = (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) ?
DMU_OT_SCAN_QUEUE : DMU_OT_ZAP_OTHER;
- ASSERT0(scn->scn_bytes_pending);
+ ASSERT0(scn->scn_queues_pending);
ASSERT(scn->scn_phys.scn_queue_obj != 0);
VERIFY0(dmu_object_free(dp->dp_meta_objset,
@@ -1270,11 +1271,12 @@ dsl_scan_should_clear(dsl_scan_t *scn)
queue = tvd->vdev_scan_io_queue;
if (queue != NULL) {
/*
- * # of extents in exts_by_size = # in exts_by_addr.
+ * # of extents in exts_by_addr = # in exts_by_size.
* B-tree efficiency is ~75%, but can be as low as 50%.
*/
mused += zfs_btree_numnodes(&queue->q_exts_by_size) *
- 3 * sizeof (range_seg_gap_t) + queue->q_sio_memused;
+ ((sizeof (range_seg_gap_t) + sizeof (uint64_t)) *
+ 3 / 2) + queue->q_sio_memused;
}
mutex_exit(&tvd->vdev_scan_io_queue_lock);
}
@@ -1282,7 +1284,7 @@ dsl_scan_should_clear(dsl_scan_t *scn)
dprintf("current scan memory usage: %llu bytes\n", (longlong_t)mused);
if (mused == 0)
- ASSERT0(scn->scn_bytes_pending);
+ ASSERT0(scn->scn_queues_pending);
/*
* If we are above our hard limit, we need to clear out memory.
@@ -2840,7 +2842,6 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list)
{
dsl_scan_t *scn = queue->q_scn;
scan_io_t *sio;
- int64_t bytes_issued = 0;
boolean_t suspended = B_FALSE;
while ((sio = list_head(io_list)) != NULL) {
@@ -2852,16 +2853,12 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list)
}
sio2bp(sio, &bp);
- bytes_issued += SIO_GET_ASIZE(sio);
scan_exec_io(scn->scn_dp, &bp, sio->sio_flags,
&sio->sio_zb, queue);
(void) list_remove_head(io_list);
scan_io_queues_update_zio_stats(queue, &bp);
sio_free(sio);
}
-
- atomic_add_64(&scn->scn_bytes_pending, -bytes_issued);
-
return (suspended);
}
@@ -2906,6 +2903,8 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list)
next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio);
avl_remove(&queue->q_sios_by_addr, sio);
+ if (avl_is_empty(&queue->q_sios_by_addr))
+ atomic_add_64(&queue->q_scn->scn_queues_pending, -1);
queue->q_sio_memused -= SIO_GET_MUSED(sio);
bytes_issued += SIO_GET_ASIZE(sio);
@@ -2927,12 +2926,13 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list)
range_tree_resize_segment(queue->q_exts_by_addr, rs,
SIO_GET_OFFSET(sio), rs_get_end(rs,
queue->q_exts_by_addr) - SIO_GET_OFFSET(sio));
-
+ queue->q_last_ext_addr = SIO_GET_OFFSET(sio);
return (B_TRUE);
} else {
uint64_t rstart = rs_get_start(rs, queue->q_exts_by_addr);
uint64_t rend = rs_get_end(rs, queue->q_exts_by_addr);
range_tree_remove(queue->q_exts_by_addr, rstart, rend - rstart);
+ queue->q_last_ext_addr = -1;
return (B_FALSE);
}
}
@@ -2957,31 +2957,8 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue)
ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
ASSERT(scn->scn_is_sorted);
- /* handle tunable overrides */
- if (scn->scn_checkpointing || scn->scn_clearing) {
- if (zfs_scan_issue_strategy == 1) {
- return (range_tree_first(rt));
- } else if (zfs_scan_issue_strategy == 2) {
- /*
- * We need to get the original entry in the by_addr
- * tree so we can modify it.
- */
- range_seg_t *size_rs =
- zfs_btree_first(&queue->q_exts_by_size, NULL);
- if (size_rs == NULL)
- return (NULL);
- uint64_t start = rs_get_start(size_rs, rt);
- uint64_t size = rs_get_end(size_rs, rt) - start;
- range_seg_t *addr_rs = range_tree_find(rt, start,
- size);
- ASSERT3P(addr_rs, !=, NULL);
- ASSERT3U(rs_get_start(size_rs, rt), ==,
- rs_get_start(addr_rs, rt));
- ASSERT3U(rs_get_end(size_rs, rt), ==,
- rs_get_end(addr_rs, rt));
- return (addr_rs);
- }
- }
+ if (!scn->scn_checkpointing && !scn->scn_clearing)
+ return (NULL);
/*
* During normal clearing, we want to issue our largest segments
@@ -2992,28 +2969,42 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue)
* so the way we are sorted now is as good as it will ever get.
* In this case, we instead switch to issuing extents in LBA order.
*/
- if (scn->scn_checkpointing) {
+ if ((zfs_scan_issue_strategy < 1 && scn->scn_checkpointing) ||
+ zfs_scan_issue_strategy == 1)
return (range_tree_first(rt));
- } else if (scn->scn_clearing) {
- /*
- * We need to get the original entry in the by_addr
- * tree so we can modify it.
- */
- range_seg_t *size_rs = zfs_btree_first(&queue->q_exts_by_size,
- NULL);
- if (size_rs == NULL)
- return (NULL);
- uint64_t start = rs_get_start(size_rs, rt);
- uint64_t size = rs_get_end(size_rs, rt) - start;
- range_seg_t *addr_rs = range_tree_find(rt, start, size);
- ASSERT3P(addr_rs, !=, NULL);
- ASSERT3U(rs_get_start(size_rs, rt), ==, rs_get_start(addr_rs,
- rt));
- ASSERT3U(rs_get_end(size_rs, rt), ==, rs_get_end(addr_rs, rt));
- return (addr_rs);
- } else {
- return (NULL);
+
+ /*
+ * Try to continue previous extent if it is not completed yet. After
+ * shrink in scan_io_queue_gather() it may no longer be the best, but
+ * otherwise we leave shorter remnant every txg.
+ */
+ uint64_t start;
+ uint64_t size = 1 << rt->rt_shift;
+ range_seg_t *addr_rs;
+ if (queue->q_last_ext_addr != -1) {
+ start = queue->q_last_ext_addr;
+ addr_rs = range_tree_find(rt, start, size);
+ if (addr_rs != NULL)
+ return (addr_rs);
}
+
+ /*
+ * Nothing to continue, so find new best extent.
+ */
+ uint64_t *v = zfs_btree_first(&queue->q_exts_by_size, NULL);
+ if (v == NULL)
+ return (NULL);
+ queue->q_last_ext_addr = start = *v << rt->rt_shift;
+
+ /*
+ * We need to get the original entry in the by_addr tree so we can
+ * modify it.
+ */
+ addr_rs = range_tree_find(rt, start, size);
+ ASSERT3P(addr_rs, !=, NULL);
+ ASSERT3U(rs_get_start(addr_rs, rt), ==, start);
+ ASSERT3U(rs_get_end(addr_rs, rt), >, start);
+ return (addr_rs);
}
static void
@@ -3049,12 +3040,12 @@ scan_io_queues_run_one(void *arg)
/* loop until we run out of time or sios */
while ((rs = scan_io_queue_fetch_ext(queue)) != NULL) {
uint64_t seg_start = 0, seg_end = 0;
- boolean_t more_left = B_TRUE;
+ boolean_t more_left;
ASSERT(list_is_empty(&sio_list));
/* loop while we still have sios left to process in this rs */
- while (more_left) {
+ do {
scan_io_t *first_sio, *last_sio;
/*
@@ -3083,7 +3074,7 @@ scan_io_queues_run_one(void *arg)
if (suspended)
break;
- }
+ } while (more_left);
/* update statistics for debugging purposes */
scan_io_queues_update_seg_stats(queue, seg_start, seg_end);
@@ -3124,7 +3115,7 @@ scan_io_queues_run(dsl_scan_t *scn)
ASSERT(scn->scn_is_sorted);
ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
- if (scn->scn_bytes_pending == 0)
+ if (scn->scn_queues_pending == 0)
return;
if (scn->scn_taskq == NULL) {
@@ -3749,7 +3740,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
zfs_dbgmsg("scan complete txg %llu",
(longlong_t)tx->tx_txg);
}
- } else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) {
+ } else if (scn->scn_is_sorted && scn->scn_queues_pending != 0) {
ASSERT(scn->scn_clearing);
/* need to issue scrubbing IOs from per-vdev queues */
@@ -3777,7 +3768,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)tx->tx_txg);
ASSERT3U(scn->scn_done_txg, !=, 0);
ASSERT0(spa->spa_scrub_inflight);
- ASSERT0(scn->scn_bytes_pending);
+ ASSERT0(scn->scn_queues_pending);
dsl_scan_done(scn, B_TRUE, tx);
sync_type = SYNC_MANDATORY;
}
@@ -3868,20 +3859,21 @@ static void
scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio)
{
avl_index_t idx;
- int64_t asize = SIO_GET_ASIZE(sio);
dsl_scan_t *scn = queue->q_scn;
ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
+ if (unlikely(avl_is_empty(&queue->q_sios_by_addr)))
+ atomic_add_64(&scn->scn_queues_pending, 1);
if (avl_find(&queue->q_sios_by_addr, sio, &idx) != NULL) {
/* block is already scheduled for reading */
- atomic_add_64(&scn->scn_bytes_pending, -asize);
sio_free(sio);
return;
}
avl_insert(&queue->q_sios_by_addr, sio, idx);
queue->q_sio_memused += SIO_GET_MUSED(sio);
- range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), asize);
+ range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio),
+ SIO_GET_ASIZE(sio));
}
/*
@@ -3894,7 +3886,6 @@ static void
scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i,
int zio_flags, const zbookmark_phys_t *zb)
{
- dsl_scan_t *scn = queue->q_scn;
scan_io_t *sio = sio_alloc(BP_GET_NDVAS(bp));
ASSERT0(BP_IS_GANG(bp));
@@ -3904,13 +3895,7 @@ scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i,
sio->sio_flags = zio_flags;
sio->sio_zb = *zb;
- /*
- * Increment the bytes pending counter now so that we can't
- * get an integer underflow in case the worker processes the
- * zio before we get to incrementing this counter.
- */
- atomic_add_64(&scn->scn_bytes_pending, SIO_GET_ASIZE(sio));
-
+ queue->q_last_ext_addr = -1;
scan_io_queue_insert_impl(queue, sio);
}
@@ -3996,8 +3981,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
* Keep track of how much data we've examined so that
* zpool(8) status can make useful progress reports.
*/
- scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva);
- spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva);
+ uint64_t asize = DVA_GET_ASIZE(dva);
+ scn->scn_phys.scn_examined += asize;
+ spa->spa_scan_pass_exam += asize;
/* if it's a resilver, this may not be in the target range */
if (!needs_io)
@@ -4118,33 +4104,88 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
* extents that are more completely filled (in a 3:2 ratio) vs just larger.
* Note that as an optimization, we replace multiplication and division by
* 100 with bitshifting by 7 (which effectively multiplies and divides by 128).
+ *
+ * Since we do not care if one extent is only few percent better than another,
+ * compress the score into 6 bits via binary logarithm AKA highbit64() and
+ * put into otherwise unused due to ashift high bits of offset. This allows
+ * to reduce q_exts_by_size B-tree elements to only 64 bits and compare them
+ * with single operation. Plus it makes scrubs more sequential and reduces
+ * chances that minor extent change move it within the B-tree.
*/
static int
ext_size_compare(const void *x, const void *y)
{
- const range_seg_gap_t *rsa = x, *rsb = y;
+ const uint64_t *a = x, *b = y;
+
+ return (TREE_CMP(*a, *b));
+}
+
+static void
+ext_size_create(range_tree_t *rt, void *arg)
+{
+ (void) rt;
+ zfs_btree_t *size_tree = arg;
- uint64_t sa = rsa->rs_end - rsa->rs_start;
- uint64_t sb = rsb->rs_end - rsb->rs_start;
- uint64_t score_a, score_b;
+ zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
+}
- score_a = rsa->rs_fill + ((((rsa->rs_fill << 7) / sa) *
- fill_weight * rsa->rs_fill) >> 7);
- score_b = rsb->rs_fill + ((((rsb->rs_fill << 7) / sb) *
- fill_weight * rsb->rs_fill) >> 7);
+static void
+ext_size_destroy(range_tree_t *rt, void *arg)
+{
+ (void) rt;
+ zfs_btree_t *size_tree = arg;
+ ASSERT0(zfs_btree_numnodes(size_tree));
- if (score_a > score_b)
- return (-1);
- if (score_a == score_b) {
- if (rsa->rs_start < rsb->rs_start)
- return (-1);
- if (rsa->rs_start == rsb->rs_start)
- return (0);
- return (1);
- }
- return (1);
+ zfs_btree_destroy(size_tree);
+}
+
+static uint64_t
+ext_size_value(range_tree_t *rt, range_seg_gap_t *rsg)
+{
+ (void) rt;
+ uint64_t size = rsg->rs_end - rsg->rs_start;
+ uint64_t score = rsg->rs_fill + ((((rsg->rs_fill << 7) / size) *
+ fill_weight * rsg->rs_fill) >> 7);
+ ASSERT3U(rt->rt_shift, >=, 8);
+ return (((uint64_t)(64 - highbit64(score)) << 56) | rsg->rs_start);
}
+static void
+ext_size_add(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+ ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP);
+ uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs);
+ zfs_btree_add(size_tree, &v);
+}
+
+static void
+ext_size_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+ ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP);
+ uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs);
+ zfs_btree_remove(size_tree, &v);
+}
+
+static void
+ext_size_vacate(range_tree_t *rt, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+ zfs_btree_clear(size_tree);
+ zfs_btree_destroy(size_tree);
+
+ ext_size_create(rt, arg);
+}
+
+static const range_tree_ops_t ext_size_ops = {
+ .rtop_create = ext_size_create,
+ .rtop_destroy = ext_size_destroy,
+ .rtop_add = ext_size_add,
+ .rtop_remove = ext_size_remove,
+ .rtop_vacate = ext_size_vacate
+};
+
/*
* Comparator for the q_sios_by_addr tree. Sorting is simply performed
* based on LBA-order (from lowest to highest).
@@ -4167,9 +4208,10 @@ scan_io_queue_create(vdev_t *vd)
q->q_scn = scn;
q->q_vd = vd;
q->q_sio_memused = 0;
+ q->q_last_ext_addr = -1;
cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL);
- q->q_exts_by_addr = range_tree_create_impl(&rt_btree_ops, RANGE_SEG_GAP,
- &q->q_exts_by_size, 0, 0, ext_size_compare, zfs_scan_max_ext_gap);
+ q->q_exts_by_addr = range_tree_create_gap(&ext_size_ops, RANGE_SEG_GAP,
+ &q->q_exts_by_size, 0, vd->vdev_ashift, zfs_scan_max_ext_gap);
avl_create(&q->q_sios_by_addr, sio_addr_compare,
sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node));
@@ -4187,21 +4229,20 @@ dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue)
dsl_scan_t *scn = queue->q_scn;
scan_io_t *sio;
void *cookie = NULL;
- int64_t bytes_dequeued = 0;
ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
+ if (!avl_is_empty(&queue->q_sios_by_addr))
+ atomic_add_64(&scn->scn_queues_pending, -1);
while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) !=
NULL) {
ASSERT(range_tree_contains(queue->q_exts_by_addr,
SIO_GET_OFFSET(sio), SIO_GET_ASIZE(sio)));
- bytes_dequeued += SIO_GET_ASIZE(sio);
queue->q_sio_memused -= SIO_GET_MUSED(sio);
sio_free(sio);
}
ASSERT0(queue->q_sio_memused);
- atomic_add_64(&scn->scn_bytes_pending, -bytes_dequeued);
range_tree_vacate(queue->q_exts_by_addr, NULL, queue);
range_tree_destroy(queue->q_exts_by_addr);
avl_destroy(&queue->q_sios_by_addr);
@@ -4297,25 +4338,19 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i)
sio_free(srch_sio);
if (sio != NULL) {
- int64_t asize = SIO_GET_ASIZE(sio);
blkptr_t tmpbp;
/* Got it while it was cold in the queue */
ASSERT3U(start, ==, SIO_GET_OFFSET(sio));
- ASSERT3U(size, ==, asize);
+ ASSERT3U(size, ==, SIO_GET_ASIZE(sio));
avl_remove(&queue->q_sios_by_addr, sio);
+ if (avl_is_empty(&queue->q_sios_by_addr))
+ atomic_add_64(&scn->scn_queues_pending, -1);
queue->q_sio_memused -= SIO_GET_MUSED(sio);
ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
range_tree_remove_fill(queue->q_exts_by_addr, start, size);
- /*
- * We only update scn_bytes_pending in the cold path,
- * otherwise it will already have been accounted for as
- * part of the zio's execution.
- */
- atomic_add_64(&scn->scn_bytes_pending, -asize);
-
/* count the block as though we issued it */
sio2bp(sio, &tmpbp);
count_block(scn, dp->dp_blkstats, &tmpbp);
diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c
index 67910f9ff..a1a5f7985 100644
--- a/module/zfs/range_tree.c
+++ b/module/zfs/range_tree.c
@@ -188,10 +188,8 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
}
range_tree_t *
-range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
- uint64_t start, uint64_t shift,
- int (*zfs_btree_compare) (const void *, const void *),
- uint64_t gap)
+range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
+ void *arg, uint64_t start, uint64_t shift, uint64_t gap)
{
range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
@@ -223,7 +221,6 @@ range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
rt->rt_type = type;
rt->rt_start = start;
rt->rt_shift = shift;
- rt->rt_btree_compare = zfs_btree_compare;
if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
rt->rt_ops->rtop_create(rt, rt->rt_arg);
@@ -232,10 +229,10 @@ range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
}
range_tree_t *
-range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
+range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type,
void *arg, uint64_t start, uint64_t shift)
{
- return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0));
+ return (range_tree_create_gap(ops, type, arg, start, shift, 0));
}
void
@@ -741,74 +738,6 @@ range_tree_is_empty(range_tree_t *rt)
return (range_tree_space(rt) == 0);
}
-void
-rt_btree_create(range_tree_t *rt, void *arg)
-{
- zfs_btree_t *size_tree = arg;
-
- size_t size;
- switch (rt->rt_type) {
- case RANGE_SEG32:
- size = sizeof (range_seg32_t);
- break;
- case RANGE_SEG64:
- size = sizeof (range_seg64_t);
- break;
- case RANGE_SEG_GAP:
- size = sizeof (range_seg_gap_t);
- break;
- default:
- panic("Invalid range seg type %d", rt->rt_type);
- }
- zfs_btree_create(size_tree, rt->rt_btree_compare, size);
-}
-
-void
-rt_btree_destroy(range_tree_t *rt, void *arg)
-{
- (void) rt;
- zfs_btree_t *size_tree = arg;
- ASSERT0(zfs_btree_numnodes(size_tree));
-
- zfs_btree_destroy(size_tree);
-}
-
-void
-rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- (void) rt;
- zfs_btree_t *size_tree = arg;
-
- zfs_btree_add(size_tree, rs);
-}
-
-void
-rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- (void) rt;
- zfs_btree_t *size_tree = arg;
-
- zfs_btree_remove(size_tree, rs);
-}
-
-void
-rt_btree_vacate(range_tree_t *rt, void *arg)
-{
- zfs_btree_t *size_tree = arg;
- zfs_btree_clear(size_tree);
- zfs_btree_destroy(size_tree);
-
- rt_btree_create(rt, arg);
-}
-
-range_tree_ops_t rt_btree_ops = {
- .rtop_create = rt_btree_create,
- .rtop_destroy = rt_btree_destroy,
- .rtop_add = rt_btree_add,
- .rtop_remove = rt_btree_remove,
- .rtop_vacate = rt_btree_vacate
-};
-
/*
* Remove any overlapping ranges between the given segment [start, end)
* from removefrom. Add non-overlapping leftovers to addto.