diff options
Diffstat (limited to 'module/zfs/spa.c')
-rw-r--r-- | module/zfs/spa.c | 153 |
1 files changed, 128 insertions, 25 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 23cf17a58..3ad8fc6e4 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1420,20 +1420,89 @@ spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, return (0); } +static boolean_t +spa_should_flush_logs_on_unload(spa_t *spa) +{ + if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) + return (B_FALSE); + + if (!spa_writeable(spa)) + return (B_FALSE); + + if (!spa->spa_sync_on) + return (B_FALSE); + + if (spa_state(spa) != POOL_STATE_EXPORTED) + return (B_FALSE); + + if (zfs_keep_log_spacemaps_at_export) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Opens a transaction that will set the flag that will instruct + * spa_sync to attempt to flush all the metaslabs for that txg. + */ +static void +spa_unload_log_sm_flush_all(spa_t *spa) +{ + dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); + + ASSERT3U(spa->spa_log_flushall_txg, ==, 0); + spa->spa_log_flushall_txg = dmu_tx_get_txg(tx); + + dmu_tx_commit(tx); + txg_wait_synced(spa_get_dsl(spa), spa->spa_log_flushall_txg); +} + +static void +spa_unload_log_sm_metadata(spa_t *spa) +{ + void *cookie = NULL; + spa_log_sm_t *sls; + while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg, + &cookie)) != NULL) { + VERIFY0(sls->sls_mscount); + kmem_free(sls, sizeof (spa_log_sm_t)); + } + + for (log_summary_entry_t *e = list_head(&spa->spa_log_summary); + e != NULL; e = list_head(&spa->spa_log_summary)) { + VERIFY0(e->lse_mscount); + list_remove(&spa->spa_log_summary, e); + kmem_free(e, sizeof (log_summary_entry_t)); + } + + spa->spa_unflushed_stats.sus_nblocks = 0; + spa->spa_unflushed_stats.sus_memused = 0; + spa->spa_unflushed_stats.sus_blocklimit = 0; +} + /* * Opposite of spa_load(). */ static void spa_unload(spa_t *spa) { - int i; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED); spa_import_progress_remove(spa_guid(spa)); spa_load_note(spa, "UNLOADING"); /* + * If the log space map feature is enabled and the pool is getting + * exported (but not destroyed), we want to spend some time flushing + * as many metaslabs as we can in an attempt to destroy log space + * maps and save import time. + */ + if (spa_should_flush_logs_on_unload(spa)) + spa_unload_log_sm_flush_all(spa); + + /* * Stop async tasks. */ spa_async_suspend(spa); @@ -1454,16 +1523,15 @@ spa_unload(spa_t *spa) } /* - * Even though vdev_free() also calls vdev_metaslab_fini, we need - * to call it earlier, before we wait for async i/o to complete. - * This ensures that there is no async metaslab prefetching, by - * calling taskq_wait(mg_taskq). + * This ensures that there is no async metaslab prefetching + * while we attempt to unload the spa. */ if (spa->spa_root_vdev != NULL) { - spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); - for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) - vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]); - spa_config_exit(spa, SCL_ALL, spa); + for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) { + vdev_t *vc = spa->spa_root_vdev->vdev_child[c]; + if (vc->vdev_mg != NULL) + taskq_wait(vc->vdev_mg->mg_taskq); + } } if (spa->spa_mmp.mmp_thread) @@ -1517,13 +1585,14 @@ spa_unload(spa_t *spa) } ddt_unload(spa); + spa_unload_log_sm_metadata(spa); /* * Drop and purge level 2 cache */ spa_l2cache_drop(spa); - for (i = 0; i < spa->spa_spares.sav_count; i++) + for (int i = 0; i < spa->spa_spares.sav_count; i++) vdev_free(spa->spa_spares.sav_vdevs[i]); if (spa->spa_spares.sav_vdevs) { kmem_free(spa->spa_spares.sav_vdevs, @@ -1536,7 +1605,7 @@ spa_unload(spa_t *spa) } spa->spa_spares.sav_count = 0; - for (i = 0; i < spa->spa_l2cache.sav_count; i++) { + for (int i = 0; i < spa->spa_l2cache.sav_count; i++) { vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); vdev_free(spa->spa_l2cache.sav_vdevs[i]); } @@ -3723,6 +3792,13 @@ spa_ld_load_vdev_metadata(spa_t *spa) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } + error = spa_ld_log_spacemaps(spa); + if (error != 0) { + spa_load_failed(spa, "spa_ld_log_sm_data failed [error=%d]", + error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); + } + /* * Propagate the leaf DTLs we just loaded all the way up the vdev tree. */ @@ -5864,7 +5940,7 @@ spa_reset(char *pool) int spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { - uint64_t txg, id; + uint64_t txg; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *tvd; @@ -5939,19 +6015,9 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot) } for (int c = 0; c < vd->vdev_children; c++) { - - /* - * Set the vdev id to the first hole, if one exists. - */ - for (id = 0; id < rvd->vdev_children; id++) { - if (rvd->vdev_child[id]->vdev_ishole) { - vdev_free(rvd->vdev_child[id]); - break; - } - } tvd = vd->vdev_child[c]; vdev_remove_child(vd, tvd); - tvd->vdev_id = id; + tvd->vdev_id = rvd->vdev_children; vdev_add_child(rvd, tvd); vdev_config_dirty(tvd); } @@ -7597,6 +7663,18 @@ spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) if (spa_sync_pass(spa) != 1) return; + /* + * Note: + * If the log space map feature is active, we stop deferring + * frees to the next TXG and therefore running this function + * would be considered a no-op as spa_deferred_bpobj should + * not have any entries. + * + * That said we run this function anyway (instead of returning + * immediately) for the edge-case scenario where we just + * activated the log space map feature in this TXG but we have + * deferred frees from the previous TXG. + */ zio_t *zio = zio_root(spa, NULL, NULL, 0); VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, spa_free_sync_cb, zio, tx), ==, 0); @@ -8187,7 +8265,14 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx) spa_errlog_sync(spa, txg); dsl_pool_sync(dp, txg); - if (pass < zfs_sync_pass_deferred_free) { + if (pass < zfs_sync_pass_deferred_free || + spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) { + /* + * If the log space map feature is active we don't + * care about deferred frees and the deferred bpobj + * as the log space map should effectively have the + * same results (i.e. appending only to one object). + */ spa_sync_frees(spa, free_bpl, tx); } else { /* @@ -8204,6 +8289,8 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx) svr_sync(spa, tx); spa_sync_upgrades(spa, tx); + spa_flush_metaslabs(spa, tx); + vdev_t *vd = NULL; while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) != NULL) @@ -8453,6 +8540,7 @@ spa_sync(spa_t *spa, uint64_t txg) while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) != NULL) vdev_sync_done(vd, txg); + spa_sync_close_syncing_log_sm(spa); spa_update_dspace(spa); @@ -8639,6 +8727,21 @@ spa_has_active_shared_spare(spa_t *spa) return (B_FALSE); } +uint64_t +spa_total_metaslabs(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + + uint64_t m = 0; + for (uint64_t c = 0; c < rvd->vdev_children; c++) { + vdev_t *vd = rvd->vdev_child[c]; + if (!vdev_is_concrete(vd)) + continue; + m += vd->vdev_ms_count; + } + return (m); +} + sysevent_t * spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name) { |