aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dsl_scan.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/dsl_scan.c')
-rw-r--r--module/zfs/dsl_scan.c142
1 files changed, 95 insertions, 47 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index f03f1f54b..0f0243b31 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -65,7 +65,7 @@ int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
-int zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
+int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
@@ -1417,7 +1417,7 @@ dsl_scan_active(dsl_scan_t *scn)
if (spa_shutting_down(spa))
return (B_FALSE);
if (scn->scn_phys.scn_state == DSS_SCANNING ||
- scn->scn_async_destroying)
+ (scn->scn_async_destroying && !scn->scn_async_stalled))
return (B_TRUE);
if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
@@ -1432,7 +1432,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
{
dsl_scan_t *scn = dp->dp_scan;
spa_t *spa = dp->dp_spa;
- int err;
+ int err = 0;
/*
* Check for scn_restart_txg before checking spa_load_state, so
@@ -1450,7 +1450,10 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
dsl_scan_setup_sync(&func, tx);
}
- if (!dsl_scan_active(scn) ||
+ /*
+ * If the scan is inactive due to a stalled async destroy, try again.
+ */
+ if ((!scn->scn_async_stalled && !dsl_scan_active(scn)) ||
spa_sync_pass(dp->dp_spa) > 1)
return;
@@ -1460,10 +1463,11 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
spa->spa_scrub_active = B_TRUE;
/*
- * First process the free list. If we pause the free, don't do
- * any scanning. This ensures that there is no free list when
- * we are scanning, so the scan code doesn't have to worry about
- * traversing it.
+ * First process the async destroys. If we pause, don't do
+ * any scrubbing or resilvering. This ensures that there are no
+ * async destroys while we are scanning, so the scan code doesn't
+ * have to worry about traversing it. It is also faster to free the
+ * blocks than to scrub them.
*/
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
scn->scn_is_bptree = B_FALSE;
@@ -1473,48 +1477,92 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
dsl_scan_free_block_cb, scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
- if (err == 0 && spa_feature_is_active(spa,
- SPA_FEATURE_ASYNC_DESTROY)) {
- ASSERT(scn->scn_async_destroying);
- scn->scn_is_bptree = B_TRUE;
- scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
- NULL, ZIO_FLAG_MUSTSUCCEED);
- err = bptree_iterate(dp->dp_meta_objset,
- dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
- scn, tx);
- VERIFY0(zio_wait(scn->scn_zio_root));
+ if (err != 0 && err != ERESTART)
+ zfs_panic_recover("error %u from bpobj_iterate()", err);
+ }
- if (err == 0) {
- /* finished; deactivate async destroy feature */
- spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY,
- tx);
- ASSERT(!spa_feature_is_active(spa,
- SPA_FEATURE_ASYNC_DESTROY));
- VERIFY0(zap_remove(dp->dp_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, tx));
- VERIFY0(bptree_free(dp->dp_meta_objset,
- dp->dp_bptree_obj, tx));
- dp->dp_bptree_obj = 0;
- scn->scn_async_destroying = B_FALSE;
- }
+ if (err == 0 && spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
+ ASSERT(scn->scn_async_destroying);
+ scn->scn_is_bptree = B_TRUE;
+ scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
+ NULL, ZIO_FLAG_MUSTSUCCEED);
+ err = bptree_iterate(dp->dp_meta_objset,
+ dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx);
+ VERIFY0(zio_wait(scn->scn_zio_root));
+
+ if (err == EIO || err == ECKSUM) {
+ err = 0;
+ } else if (err != 0 && err != ERESTART) {
+ zfs_panic_recover("error %u from "
+ "traverse_dataset_destroyed()", err);
}
- if (scn->scn_visited_this_txg) {
- zfs_dbgmsg("freed %llu blocks in %llums from "
- "free_bpobj/bptree txg %llu",
- (longlong_t)scn->scn_visited_this_txg,
- (longlong_t)
- NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
- (longlong_t)tx->tx_txg);
- scn->scn_visited_this_txg = 0;
- /*
- * Re-sync the ddt so that we can further modify
- * it when doing bprewrite.
- */
- ddt_sync(spa, tx->tx_txg);
+
+ /*
+ * If we didn't make progress, mark the async destroy as
+ * stalled, so that we will not initiate a spa_sync() on
+ * its behalf.
+ */
+ scn->scn_async_stalled = (scn->scn_visited_this_txg == 0);
+
+ if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) {
+ /* finished; deactivate async destroy feature */
+ spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx);
+ ASSERT(!spa_feature_is_active(spa,
+ SPA_FEATURE_ASYNC_DESTROY));
+ VERIFY0(zap_remove(dp->dp_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, tx));
+ VERIFY0(bptree_free(dp->dp_meta_objset,
+ dp->dp_bptree_obj, tx));
+ dp->dp_bptree_obj = 0;
+ scn->scn_async_destroying = B_FALSE;
}
- if (err == ERESTART)
- return;
+ }
+ if (scn->scn_visited_this_txg) {
+ zfs_dbgmsg("freed %llu blocks in %llums from "
+ "free_bpobj/bptree txg %llu; err=%u",
+ (longlong_t)scn->scn_visited_this_txg,
+ (longlong_t)
+ NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
+ (longlong_t)tx->tx_txg, err);
+ scn->scn_visited_this_txg = 0;
+
+ /*
+ * Write out changes to the DDT that may be required as a
+ * result of the blocks freed. This ensures that the DDT
+ * is clean when a scrub/resilver runs.
+ */
+ ddt_sync(spa, tx->tx_txg);
+ }
+ if (err != 0)
+ return;
+ if (!scn->scn_async_destroying && zfs_free_leak_on_eio &&
+ (dp->dp_free_dir->dd_phys->dd_used_bytes != 0 ||
+ dp->dp_free_dir->dd_phys->dd_compressed_bytes != 0 ||
+ dp->dp_free_dir->dd_phys->dd_uncompressed_bytes != 0)) {
+ /*
+ * We have finished background destroying, but there is still
+ * some space left in the dp_free_dir. Transfer this leaked
+ * space to the dp_leak_dir.
+ */
+ if (dp->dp_leak_dir == NULL) {
+ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+ (void) dsl_dir_create_sync(dp, dp->dp_root_dir,
+ LEAK_DIR_NAME, tx);
+ VERIFY0(dsl_pool_open_special_dir(dp,
+ LEAK_DIR_NAME, &dp->dp_leak_dir));
+ rrw_exit(&dp->dp_config_rwlock, FTAG);
+ }
+ dsl_dir_diduse_space(dp->dp_leak_dir, DD_USED_HEAD,
+ dp->dp_free_dir->dd_phys->dd_used_bytes,
+ dp->dp_free_dir->dd_phys->dd_compressed_bytes,
+ dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx);
+ dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+ -dp->dp_free_dir->dd_phys->dd_used_bytes,
+ -dp->dp_free_dir->dd_phys->dd_compressed_bytes,
+ -dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx);
+ }
+ if (!scn->scn_async_destroying) {
/* finished; verify that space accounting went to zero */
ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes);
ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes);