diff options
Diffstat (limited to 'module/zfs/spa.c')
-rw-r--r-- | module/zfs/spa.c | 451 |
1 files changed, 411 insertions, 40 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c index cdc03e66c..8ab7c3428 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -153,8 +153,7 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { static void spa_sync_version(void *arg, dmu_tx_t *tx); static void spa_sync_props(void *arg, dmu_tx_t *tx); static boolean_t spa_has_active_shared_spare(spa_t *spa); -static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, - boolean_t reloading); +static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport); static void spa_vdev_resilver_done(spa_t *spa); uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */ @@ -216,6 +215,7 @@ unsigned long zfs_max_missing_tvds = 0; * and we get a chance to retrieve the trusted config. */ uint64_t zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1; + /* * In the case where config was assembled by scanning device paths (/dev/dsks * by default) we are less tolerant since all the existing devices should have @@ -224,6 +224,11 @@ uint64_t zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1; uint64_t zfs_max_missing_tvds_scan = 0; /* + * Debugging aid that pauses spa_sync() towards the end. + */ +boolean_t zfs_pause_spa_sync = B_FALSE; + +/* * ========================================================================== * SPA properties routines * ========================================================================== @@ -274,6 +279,8 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, size - alloc, src); + spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL, + spa->spa_checkpoint_info.sci_dspace, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, metaslab_class_fragmentation(mc), src); @@ -811,6 +818,12 @@ spa_change_guid_check(void *arg, dmu_tx_t *tx) vdev_t *rvd = spa->spa_root_vdev; uint64_t vdev_state; + if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + int error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (SET_ERROR(error)); + } + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); vdev_state = rvd->vdev_state; spa_config_exit(spa, SCL_STATE, FTAG); @@ -1452,6 +1465,12 @@ spa_unload(spa_t *spa) spa->spa_condense_zthr = NULL; } + if (spa->spa_checkpoint_discard_zthr != NULL) { + ASSERT(!zthr_isrunning(spa->spa_checkpoint_discard_zthr)); + zthr_destroy(spa->spa_checkpoint_discard_zthr); + spa->spa_checkpoint_discard_zthr = NULL; + } + spa_condense_fini(spa); bpobj_close(&spa->spa_deferred_bpobj); @@ -1535,6 +1554,18 @@ spa_load_spares(spa_t *spa) int i; vdev_t *vd, *tvd; +#ifndef _KERNEL + /* + * zdb opens both the current state of the pool and the + * checkpointed state (if present), with a different spa_t. + * + * As spare vdevs are shared among open pools, we skip loading + * them when we load the checkpointed state of the pool. + */ + if (!spa_writeable(spa)) + return; +#endif + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); /* @@ -1654,6 +1685,19 @@ spa_load_l2cache(spa_t *spa) vdev_t *vd, **oldvdevs, **newvdevs; spa_aux_vdev_t *sav = &spa->spa_l2cache; +#ifndef _KERNEL + /* + * zdb opens both the current state of the pool and the + * checkpointed state (if present), with a different spa_t. + * + * As L2 caches are part of the ARC which is shared among open + * pools, we skip loading them when we load the checkpointed + * state of the pool. + */ + if (!spa_writeable(spa)) + return; +#endif + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); oldvdevs = sav->sav_vdevs; @@ -2206,6 +2250,11 @@ spa_spawn_aux_threads(spa_t *spa) ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa_start_indirect_condensing_thread(spa); + + ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); + spa->spa_checkpoint_discard_zthr = + zthr_create(spa_checkpoint_discard_thread_check, + spa_checkpoint_discard_thread, spa); } /* @@ -2299,7 +2348,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_load_state = state; gethrestime(&spa->spa_loaded_ts); - error = spa_load_impl(spa, type, &ereport, B_FALSE); + error = spa_load_impl(spa, type, &ereport); /* * Don't count references from objsets that are already closed @@ -2606,8 +2655,25 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type) return (SET_ERROR(EINVAL)); } - if ((spa->spa_load_state == SPA_LOAD_IMPORT || spa->spa_load_state == - SPA_LOAD_TRYIMPORT) && spa_guid_exists(pool_guid, 0)) { + /* + * If we are doing an import, ensure that the pool is not already + * imported by checking if its pool guid already exists in the + * spa namespace. + * + * The only case that we allow an already imported pool to be + * imported again, is when the pool is checkpointed and we want to + * look at its checkpointed state from userland tools like zdb. + */ +#ifdef _KERNEL + if ((spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0)) { +#else + if ((spa->spa_load_state == SPA_LOAD_IMPORT || + spa->spa_load_state == SPA_LOAD_TRYIMPORT) && + spa_guid_exists(pool_guid, 0) && + !spa_importing_readonly_checkpoint(spa)) { +#endif spa_load_failed(spa, "a pool with guid %llu is already open", (u_longlong_t)pool_guid); return (SET_ERROR(EEXIST)); @@ -2766,6 +2832,19 @@ spa_ld_validate_vdevs(spa_t *spa) return (0); } +static void +spa_ld_select_uberblock_done(spa_t *spa, uberblock_t *ub) +{ + spa->spa_state = POOL_STATE_ACTIVE; + spa->spa_ubsync = spa->spa_uberblock; + spa->spa_verify_min_txg = spa->spa_extreme_rewind ? + TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; + spa->spa_first_txg = spa->spa_last_ubsync_txg ? + spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; + spa->spa_claim_max_txg = spa->spa_first_txg; + spa->spa_prev_software_version = ub->ub_software_version; +} + static int spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) { @@ -2775,6 +2854,29 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) boolean_t activity_check = B_FALSE; /* + * If we are opening the checkpointed state of the pool by + * rewinding to it, at this point we will have written the + * checkpointed uberblock to the vdev labels, so searching + * the labels will find the right uberblock. However, if + * we are opening the checkpointed state read-only, we have + * not modified the labels. Therefore, we must ignore the + * labels and continue using the spa_uberblock that was set + * by spa_ld_checkpoint_rewind. + * + * Note that it would be fine to ignore the labels when + * rewinding (opening writeable) as well. However, if we + * crash just after writing the labels, we will end up + * searching the labels. Doing so in the common case means + * that this code path gets exercised normally, rather than + * just in the edge case. + */ + if (ub->ub_checkpoint_txg != 0 && + spa_importing_readonly_checkpoint(spa)) { + spa_ld_select_uberblock_done(spa, ub); + return (0); + } + + /* * Find the best uberblock. */ vdev_uberblock_load(rvd, ub, &label); @@ -2905,14 +3007,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) /* * Initialize internal SPA structures. */ - spa->spa_state = POOL_STATE_ACTIVE; - spa->spa_ubsync = spa->spa_uberblock; - spa->spa_verify_min_txg = spa->spa_extreme_rewind ? - TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; - spa->spa_first_txg = spa->spa_last_ubsync_txg ? - spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; - spa->spa_claim_max_txg = spa->spa_first_txg; - spa->spa_prev_software_version = ub->ub_software_version; + spa_ld_select_uberblock_done(spa, ub); return (0); } @@ -2935,7 +3030,7 @@ spa_ld_open_rootbp(spa_t *spa) } static int -spa_ld_load_trusted_config(spa_t *spa, spa_import_type_t type, +spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, boolean_t reloading) { vdev_t *mrvd, *rvd = spa->spa_root_vdev; @@ -3609,7 +3704,7 @@ spa_ld_claim_log_blocks(spa_t *spa) static void spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, - boolean_t reloading) + boolean_t update_config_cache) { vdev_t *rvd = spa->spa_root_vdev; int need_update = B_FALSE; @@ -3621,7 +3716,7 @@ spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, * If this is a verbatim import, trust the current * in-core spa_config and update the disk labels. */ - if (reloading || config_cache_txg != spa->spa_config_txg || + if (update_config_cache || config_cache_txg != spa->spa_config_txg || spa->spa_load_state == SPA_LOAD_IMPORT || spa->spa_load_state == SPA_LOAD_RECOVER || (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) @@ -3657,18 +3752,38 @@ spa_ld_prepare_for_reload(spa_t *spa) spa->spa_async_suspended = async_suspended; } -/* - * Load an existing storage pool, using the config provided. This config - * describes which vdevs are part of the pool and is later validated against - * partial configs present in each vdev's label and an entire copy of the - * config stored in the MOS. - */ static int -spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, - boolean_t reloading) +spa_ld_read_checkpoint_txg(spa_t *spa) +{ + uberblock_t checkpoint; + int error = 0; + + ASSERT0(spa->spa_checkpoint_txg); + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), + sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); + + if (error == ENOENT) + return (0); + + if (error != 0) + return (error); + + ASSERT3U(checkpoint.ub_txg, !=, 0); + ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0); + ASSERT3U(checkpoint.ub_timestamp, !=, 0); + spa->spa_checkpoint_txg = checkpoint.ub_txg; + spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; + + return (0); +} + +static int +spa_ld_mos_init(spa_t *spa, spa_import_type_t type) { int error = 0; - boolean_t missing_feat_write = B_FALSE; ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); @@ -3684,11 +3799,6 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, if (type != SPA_IMPORT_ASSEMBLE) spa->spa_trust_config = B_FALSE; - if (reloading) - spa_load_note(spa, "RELOADING"); - else - spa_load_note(spa, "LOADING"); - /* * Parse the config provided to create a vdev tree. */ @@ -3721,11 +3831,11 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, } /* - * Read vdev labels to find the best uberblock (i.e. latest, unless - * spa_load_max_txg is set) and store it in spa_uberblock. We get the - * list of features required to read blkptrs in the MOS from the vdev - * label with the best uberblock and verify that our version of zfs - * supports them all. + * Read all vdev labels to find the best uberblock (i.e. latest, + * unless spa_load_max_txg is set) and store it in spa_uberblock. We + * get the list of features required to read blkptrs in the MOS from + * the vdev label with the best uberblock and verify that our version + * of zfs supports them all. */ error = spa_ld_select_uberblock(spa, type); if (error != 0) @@ -3740,23 +3850,211 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, if (error != 0) return (error); + return (0); +} + +static int +spa_ld_checkpoint_rewind(spa_t *spa) +{ + uberblock_t checkpoint; + int error = 0; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), + sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); + + if (error != 0) { + spa_load_failed(spa, "unable to retrieve checkpointed " + "uberblock from the MOS config [error=%d]", error); + + if (error == ENOENT) + error = ZFS_ERR_NO_CHECKPOINT; + + return (error); + } + + ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg); + ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg); + + /* + * We need to update the txg and timestamp of the checkpointed + * uberblock to be higher than the latest one. This ensures that + * the checkpointed uberblock is selected if we were to close and + * reopen the pool right after we've written it in the vdev labels. + * (also see block comment in vdev_uberblock_compare) + */ + checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1; + checkpoint.ub_timestamp = gethrestime_sec(); + + /* + * Set current uberblock to be the checkpointed uberblock. + */ + spa->spa_uberblock = checkpoint; + + /* + * If we are doing a normal rewind, then the pool is open for + * writing and we sync the "updated" checkpointed uberblock to + * disk. Once this is done, we've basically rewound the whole + * pool and there is no way back. + * + * There are cases when we don't want to attempt and sync the + * checkpointed uberblock to disk because we are opening a + * pool as read-only. Specifically, verifying the checkpointed + * state with zdb, and importing the checkpointed state to get + * a "preview" of its content. + */ + if (spa_writeable(spa)) { + vdev_t *rvd = spa->spa_root_vdev; + + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; + int svdcount = 0; + int children = rvd->vdev_children; + int c0 = spa_get_random(children); + + for (int c = 0; c < children; c++) { + vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; + + /* Stop when revisiting the first vdev */ + if (c > 0 && svd[0] == vd) + break; + + if (vd->vdev_ms_array == 0 || vd->vdev_islog || + !vdev_is_concrete(vd)) + continue; + + svd[svdcount++] = vd; + if (svdcount == SPA_SYNC_MIN_VDEVS) + break; + } + error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); + if (error == 0) + spa->spa_last_synced_guid = rvd->vdev_guid; + spa_config_exit(spa, SCL_ALL, FTAG); + + if (error != 0) { + spa_load_failed(spa, "failed to write checkpointed " + "uberblock to the vdev labels [error=%d]", error); + return (error); + } + } + + return (0); +} + +static int +spa_ld_mos_with_trusted_config(spa_t *spa, spa_import_type_t type, + boolean_t *update_config_cache) +{ + int error; + + /* + * Parse the config for pool, open and validate vdevs, + * select an uberblock, and use that uberblock to open + * the MOS. + */ + error = spa_ld_mos_init(spa, type); + if (error != 0) + return (error); + /* * Retrieve the trusted config stored in the MOS and use it to create * a new, exact version of the vdev tree, then reopen all vdevs. */ - error = spa_ld_load_trusted_config(spa, type, reloading); + error = spa_ld_trusted_config(spa, type, B_FALSE); if (error == EAGAIN) { - VERIFY(!reloading); + if (update_config_cache != NULL) + *update_config_cache = B_TRUE; + /* * Redo the loading process with the trusted config if it is * too different from the untrusted config. */ spa_ld_prepare_for_reload(spa); - return (spa_load_impl(spa, type, ereport, B_TRUE)); + spa_load_note(spa, "RELOADING"); + error = spa_ld_mos_init(spa, type); + if (error != 0) + return (error); + + error = spa_ld_trusted_config(spa, type, B_TRUE); + if (error != 0) + return (error); + } else if (error != 0) { return (error); } + return (0); +} + +/* + * Load an existing storage pool, using the config provided. This config + * describes which vdevs are part of the pool and is later validated against + * partial configs present in each vdev's label and an entire copy of the + * config stored in the MOS. + */ +static int +spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) +{ + int error = 0; + boolean_t missing_feat_write = B_FALSE; + boolean_t checkpoint_rewind = + (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); + boolean_t update_config_cache = B_FALSE; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); + + spa_load_note(spa, "LOADING"); + + error = spa_ld_mos_with_trusted_config(spa, type, &update_config_cache); + if (error != 0) + return (error); + + /* + * If we are rewinding to the checkpoint then we need to repeat + * everything we've done so far in this function but this time + * selecting the checkpointed uberblock and using that to open + * the MOS. + */ + if (checkpoint_rewind) { + /* + * If we are rewinding to the checkpoint update config cache + * anyway. + */ + update_config_cache = B_TRUE; + + /* + * Extract the checkpointed uberblock from the current MOS + * and use this as the pool's uberblock from now on. If the + * pool is imported as writeable we also write the checkpoint + * uberblock to the labels, making the rewind permanent. + */ + error = spa_ld_checkpoint_rewind(spa); + if (error != 0) + return (error); + + /* + * Redo the loading process process again with the + * checkpointed uberblock. + */ + spa_ld_prepare_for_reload(spa); + spa_load_note(spa, "LOADING checkpointed uberblock"); + error = spa_ld_mos_with_trusted_config(spa, type, NULL); + if (error != 0) + return (error); + } + + /* + * Retrieve the checkpoint txg if the pool has a checkpoint. + */ + error = spa_ld_read_checkpoint_txg(spa); + if (error != 0) + return (error); + /* * Retrieve the mapping of indirect vdevs. Those vdevs were removed * from the pool and their contents were re-mapped to other vdevs. Note @@ -3860,6 +4158,16 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT); /* + * In case of a checkpoint rewind, log the original txg + * of the checkpointed uberblock. + */ + if (checkpoint_rewind) { + spa_history_log_internal(spa, "checkpoint rewind", + NULL, "rewound state to txg=%llu", + (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); + } + + /* * Traverse the ZIL and claim all blocks. */ spa_ld_claim_log_blocks(spa); @@ -3886,7 +4194,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport, * and the cachefile (by default /etc/zfs/zpool.cache). */ spa_ld_check_for_config_update(spa, config_cache_txg, - reloading); + update_config_cache); /* * Check all DTLs to see if anything needs resilvering. @@ -3970,6 +4278,15 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING); if (load_error == 0) return (0); + if (load_error == ZFS_ERR_NO_CHECKPOINT) { + /* + * When attempting checkpoint-rewind on a pool with no + * checkpoint, we should not attempt to load uberblocks + * from previous txgs when spa_load fails. + */ + ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); + return (load_error); + } if (spa->spa_root_vdev != NULL) config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); @@ -5564,6 +5881,13 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (spa_vdev_exit(spa, NULL, txg, error)); + } + if (spa->spa_vdev_removal != NULL) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); @@ -5776,6 +6100,27 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) vd = spa_lookup_by_guid(spa, guid, B_FALSE); + /* + * Besides being called directly from the userland through the + * ioctl interface, spa_vdev_detach() can be potentially called + * at the end of spa_vdev_resilver_done(). + * + * In the regular case, when we have a checkpoint this shouldn't + * happen as we never empty the DTLs of a vdev during the scrub + * [see comment in dsl_scan_done()]. Thus spa_vdev_resilvering_done() + * should never get here when we have a checkpoint. + * + * That said, even in a case when we checkpoint the pool exactly + * as spa_vdev_resilver_done() calls this function everything + * should be fine as the resilver will return right away. + */ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (spa_vdev_exit(spa, NULL, txg, error)); + } + if (vd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); @@ -6014,6 +6359,13 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, txg = spa_vdev_enter(spa); + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { + error = (spa_has_checkpoint(spa)) ? + ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; + return (spa_vdev_exit(spa, NULL, txg, error)); + } + /* clear the log and flush everything up to now */ activate_slog = spa_passivate_log(spa); (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); @@ -6665,6 +7017,10 @@ spa_async_suspend(spa_t *spa) zthr_t *condense_thread = spa->spa_condense_zthr; if (condense_thread != NULL && zthr_isrunning(condense_thread)) VERIFY0(zthr_cancel(condense_thread)); + + zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; + if (discard_thread != NULL && zthr_isrunning(discard_thread)) + VERIFY0(zthr_cancel(discard_thread)); } void @@ -6679,6 +7035,10 @@ spa_async_resume(spa_t *spa) zthr_t *condense_thread = spa->spa_condense_zthr; if (condense_thread != NULL && !zthr_isrunning(condense_thread)) zthr_resume(condense_thread); + + zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; + if (discard_thread != NULL && !zthr_isrunning(discard_thread)) + zthr_resume(discard_thread); } static boolean_t @@ -7454,6 +7814,8 @@ spa_sync(spa_t *spa, uint64_t txg) txg)); ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); + ASSERT(txg_list_empty(&dp->dp_early_sync_tasks, + txg)); break; } spa_sync_deferred_frees(spa, tx); @@ -7499,16 +7861,22 @@ spa_sync(spa_t *spa, uint64_t txg) spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); if (list_is_empty(&spa->spa_config_dirty_list)) { - vdev_t *svd[SPA_SYNC_MIN_VDEVS]; + vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; int c0 = spa_get_random(children); for (int c = 0; c < children; c++) { vd = rvd->vdev_child[(c0 + c) % children]; + + /* Stop when revisiting the first vdev */ + if (c > 0 && svd[0] == vd) + break; + if (vd->vdev_ms_array == 0 || vd->vdev_islog || !vdev_is_concrete(vd)) continue; + svd[svdcount++] = vd; if (svdcount == SPA_SYNC_MIN_VDEVS) break; @@ -7572,6 +7940,9 @@ spa_sync(spa_t *spa, uint64_t txg) ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); + while (zfs_pause_spa_sync) + delay(1); + spa->spa_sync_pass = 0; /* |