diff options
Diffstat (limited to 'module/zfs/dsl_scan.c')
-rw-r--r-- | module/zfs/dsl_scan.c | 87 |
1 files changed, 64 insertions, 23 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 90534b4fa..53953a6c5 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2017 Datto Inc. * Copyright 2017 Joyent, Inc. @@ -165,6 +165,7 @@ int zfs_scan_mem_lim_fact = 20; /* fraction of physmem */ int zfs_scan_mem_lim_soft_fact = 20; /* fraction of mem lim above */ int zfs_scrub_min_time_ms = 1000; /* min millisecs to scrub per txg */ +int zfs_obsolete_min_time_ms = 500; /* min millisecs to obsolete per txg */ int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */ int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */ int zfs_scan_checkpoint_intval = 7200; /* in seconds */ @@ -172,7 +173,7 @@ int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */ enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE; /* max number of blocks to free in a single TXG */ -unsigned long zfs_free_max_blocks = 100000; +unsigned long zfs_async_block_max_blocks = 100000; /* * We wait a few txgs after importing a pool to begin scanning so that @@ -2112,7 +2113,6 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; dsl_dataset_t *ds; - objset_t *os; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); @@ -2156,18 +2156,23 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) goto out; } - if (dmu_objset_from_ds(ds, &os)) - goto out; - /* - * Only the ZIL in the head (non-snapshot) is valid. Even though + * Only the ZIL in the head (non-snapshot) is valid. Even though * snapshots can have ZIL block pointers (which may be the same - * BP as in the head), they must be ignored. So we traverse the - * ZIL here, rather than in scan_recurse(), because the regular - * snapshot block-sharing rules don't apply to it. + * BP as in the head), they must be ignored. In addition, $ORIGIN + * doesn't have a objset (i.e. its ds_bp is a hole) so we don't + * need to look for a ZIL in it either. So we traverse the ZIL here, + * rather than in scan_recurse(), because the regular snapshot + * block-sharing rules don't apply to it. */ - if (!ds->ds_is_snapshot) + if (!dsl_dataset_is_snapshot(ds) && + ds->ds_dir != dp->dp_origin_snap->ds_dir) { + objset_t *os; + if (dmu_objset_from_ds(ds, &os) != 0) { + goto out; + } dsl_scan_zil(dp, &os->os_zil_header); + } /* * Iterate over the bps in this ds. @@ -2839,19 +2844,19 @@ scan_io_queues_run(dsl_scan_t *scn) } static boolean_t -dsl_scan_free_should_suspend(dsl_scan_t *scn) +dsl_scan_async_block_should_pause(dsl_scan_t *scn) { uint64_t elapsed_nanosecs; if (zfs_recover) return (B_FALSE); - if (scn->scn_visited_this_txg >= zfs_free_max_blocks) + if (scn->scn_visited_this_txg >= zfs_async_block_max_blocks) return (B_TRUE); elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || - (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms && + (NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms && txg_sync_waiting(scn->scn_dp)) || spa_shutting_down(scn->scn_dp->dp_spa)); } @@ -2863,7 +2868,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) if (!scn->scn_is_bptree || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { - if (dsl_scan_free_should_suspend(scn)) + if (dsl_scan_async_block_should_pause(scn)) return (SET_ERROR(ERESTART)); } @@ -2911,6 +2916,22 @@ dsl_scan_update_stats(dsl_scan_t *scn) scn->scn_zios_this_txg = zio_count_total; } +static int +dsl_scan_obsolete_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + dsl_scan_t *scn = arg; + const dva_t *dva = &bp->blk_dva[0]; + + if (dsl_scan_async_block_should_pause(scn)) + return (SET_ERROR(ERESTART)); + + spa_vdev_indirect_mark_obsolete(scn->scn_dp->dp_spa, + DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), + DVA_GET_ASIZE(dva), tx); + scn->scn_visited_this_txg++; + return (0); +} + boolean_t dsl_scan_active(dsl_scan_t *scn) { @@ -3047,6 +3068,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (zfs_free_bpobj_enabled && spa_version(spa) >= SPA_VERSION_DEADLISTS) { scn->scn_is_bptree = B_FALSE; + scn->scn_async_block_min_time_ms = zfs_free_min_time_ms; scn->scn_zio_root = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); err = bpobj_iterate(&dp->dp_free_bpobj, @@ -3146,6 +3168,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) -dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes, -dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx); } + if (dp->dp_free_dir != NULL && !scn->scn_async_destroying) { /* finished; verify that space accounting went to zero */ ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes); @@ -3153,6 +3176,24 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes); } + EQUIV(bpobj_is_open(&dp->dp_obsolete_bpobj), + 0 == zap_contains(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_OBSOLETE_BPOBJ)); + if (err == 0 && bpobj_is_open(&dp->dp_obsolete_bpobj)) { + ASSERT(spa_feature_is_active(dp->dp_spa, + SPA_FEATURE_OBSOLETE_COUNTS)); + + scn->scn_is_bptree = B_FALSE; + scn->scn_async_block_min_time_ms = zfs_obsolete_min_time_ms; + err = bpobj_iterate(&dp->dp_obsolete_bpobj, + dsl_scan_obsolete_block_cb, scn, tx); + if (err != 0 && err != ERESTART) + zfs_panic_recover("error %u from bpobj_iterate()", err); + + if (bpobj_is_empty(&dp->dp_obsolete_bpobj)) + dsl_pool_destroy_obsolete_bpobj(dp, tx); + } + if (!dsl_scan_is_running(scn) || dsl_scan_is_paused_scrub(scn)) return; @@ -3685,8 +3726,7 @@ scan_io_queue_create(vdev_t *vd) q->q_vd = vd; cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL); q->q_exts_by_addr = range_tree_create_impl(&rt_avl_ops, - &q->q_exts_by_size, ext_size_compare, - &q->q_vd->vdev_scan_io_queue_lock, zfs_scan_max_ext_gap); + &q->q_exts_by_size, ext_size_compare, zfs_scan_max_ext_gap); avl_create(&q->q_sios_by_addr, sio_addr_compare, sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node)); @@ -3739,11 +3779,8 @@ dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd) VERIFY3P(tvd->vdev_scan_io_queue, ==, NULL); tvd->vdev_scan_io_queue = svd->vdev_scan_io_queue; svd->vdev_scan_io_queue = NULL; - if (tvd->vdev_scan_io_queue != NULL) { + if (tvd->vdev_scan_io_queue != NULL) tvd->vdev_scan_io_queue->q_vd = tvd; - range_tree_set_lock(tvd->vdev_scan_io_queue->q_exts_by_addr, - &tvd->vdev_scan_io_queue_lock); - } mutex_exit(&tvd->vdev_scan_io_queue_lock); mutex_exit(&svd->vdev_scan_io_queue_lock); @@ -3869,6 +3906,9 @@ MODULE_PARM_DESC(zfs_scan_vdev_limit, module_param(zfs_scrub_min_time_ms, int, 0644); MODULE_PARM_DESC(zfs_scrub_min_time_ms, "Min millisecs to scrub per txg"); +module_param(zfs_obsolete_min_time_ms, int, 0644); +MODULE_PARM_DESC(zfs_obsolete_min_time_ms, "Min millisecs to obsolete per txg"); + module_param(zfs_free_min_time_ms, int, 0644); MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg"); @@ -3882,8 +3922,9 @@ module_param(zfs_no_scrub_prefetch, int, 0644); MODULE_PARM_DESC(zfs_no_scrub_prefetch, "Set to disable scrub prefetching"); /* CSTYLED */ -module_param(zfs_free_max_blocks, ulong, 0644); -MODULE_PARM_DESC(zfs_free_max_blocks, "Max number of blocks freed in one txg"); +module_param(zfs_async_block_max_blocks, ulong, 0644); +MODULE_PARM_DESC(zfs_async_block_max_blocks, + "Max number of blocks freed in one txg"); module_param(zfs_free_bpobj_enabled, int, 0644); MODULE_PARM_DESC(zfs_free_bpobj_enabled, "Enable processing of the free_bpobj"); |