summaryrefslogtreecommitdiffstats
path: root/module/zfs/spa.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/spa.c')
-rw-r--r--module/zfs/spa.c153
1 files changed, 128 insertions, 25 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 23cf17a58..3ad8fc6e4 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1420,20 +1420,89 @@ spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
return (0);
}
+static boolean_t
+spa_should_flush_logs_on_unload(spa_t *spa)
+{
+ if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+ return (B_FALSE);
+
+ if (!spa_writeable(spa))
+ return (B_FALSE);
+
+ if (!spa->spa_sync_on)
+ return (B_FALSE);
+
+ if (spa_state(spa) != POOL_STATE_EXPORTED)
+ return (B_FALSE);
+
+ if (zfs_keep_log_spacemaps_at_export)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * Opens a transaction that will set the flag that will instruct
+ * spa_sync to attempt to flush all the metaslabs for that txg.
+ */
+static void
+spa_unload_log_sm_flush_all(spa_t *spa)
+{
+ dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+ VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+
+ ASSERT3U(spa->spa_log_flushall_txg, ==, 0);
+ spa->spa_log_flushall_txg = dmu_tx_get_txg(tx);
+
+ dmu_tx_commit(tx);
+ txg_wait_synced(spa_get_dsl(spa), spa->spa_log_flushall_txg);
+}
+
+static void
+spa_unload_log_sm_metadata(spa_t *spa)
+{
+ void *cookie = NULL;
+ spa_log_sm_t *sls;
+ while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
+ &cookie)) != NULL) {
+ VERIFY0(sls->sls_mscount);
+ kmem_free(sls, sizeof (spa_log_sm_t));
+ }
+
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e != NULL; e = list_head(&spa->spa_log_summary)) {
+ VERIFY0(e->lse_mscount);
+ list_remove(&spa->spa_log_summary, e);
+ kmem_free(e, sizeof (log_summary_entry_t));
+ }
+
+ spa->spa_unflushed_stats.sus_nblocks = 0;
+ spa->spa_unflushed_stats.sus_memused = 0;
+ spa->spa_unflushed_stats.sus_blocklimit = 0;
+}
+
/*
* Opposite of spa_load().
*/
static void
spa_unload(spa_t *spa)
{
- int i;
-
ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED);
spa_import_progress_remove(spa_guid(spa));
spa_load_note(spa, "UNLOADING");
/*
+ * If the log space map feature is enabled and the pool is getting
+ * exported (but not destroyed), we want to spend some time flushing
+ * as many metaslabs as we can in an attempt to destroy log space
+ * maps and save import time.
+ */
+ if (spa_should_flush_logs_on_unload(spa))
+ spa_unload_log_sm_flush_all(spa);
+
+ /*
* Stop async tasks.
*/
spa_async_suspend(spa);
@@ -1454,16 +1523,15 @@ spa_unload(spa_t *spa)
}
/*
- * Even though vdev_free() also calls vdev_metaslab_fini, we need
- * to call it earlier, before we wait for async i/o to complete.
- * This ensures that there is no async metaslab prefetching, by
- * calling taskq_wait(mg_taskq).
+ * This ensures that there is no async metaslab prefetching
+ * while we attempt to unload the spa.
*/
if (spa->spa_root_vdev != NULL) {
- spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
- for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++)
- vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]);
- spa_config_exit(spa, SCL_ALL, spa);
+ for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
+ vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
+ if (vc->vdev_mg != NULL)
+ taskq_wait(vc->vdev_mg->mg_taskq);
+ }
}
if (spa->spa_mmp.mmp_thread)
@@ -1517,13 +1585,14 @@ spa_unload(spa_t *spa)
}
ddt_unload(spa);
+ spa_unload_log_sm_metadata(spa);
/*
* Drop and purge level 2 cache
*/
spa_l2cache_drop(spa);
- for (i = 0; i < spa->spa_spares.sav_count; i++)
+ for (int i = 0; i < spa->spa_spares.sav_count; i++)
vdev_free(spa->spa_spares.sav_vdevs[i]);
if (spa->spa_spares.sav_vdevs) {
kmem_free(spa->spa_spares.sav_vdevs,
@@ -1536,7 +1605,7 @@ spa_unload(spa_t *spa)
}
spa->spa_spares.sav_count = 0;
- for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
vdev_free(spa->spa_l2cache.sav_vdevs[i]);
}
@@ -3723,6 +3792,13 @@ spa_ld_load_vdev_metadata(spa_t *spa)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
}
+ error = spa_ld_log_spacemaps(spa);
+ if (error != 0) {
+ spa_load_failed(spa, "spa_ld_log_sm_data failed [error=%d]",
+ error);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
+ }
+
/*
* Propagate the leaf DTLs we just loaded all the way up the vdev tree.
*/
@@ -5864,7 +5940,7 @@ spa_reset(char *pool)
int
spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
{
- uint64_t txg, id;
+ uint64_t txg;
int error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd, *tvd;
@@ -5939,19 +6015,9 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
}
for (int c = 0; c < vd->vdev_children; c++) {
-
- /*
- * Set the vdev id to the first hole, if one exists.
- */
- for (id = 0; id < rvd->vdev_children; id++) {
- if (rvd->vdev_child[id]->vdev_ishole) {
- vdev_free(rvd->vdev_child[id]);
- break;
- }
- }
tvd = vd->vdev_child[c];
vdev_remove_child(vd, tvd);
- tvd->vdev_id = id;
+ tvd->vdev_id = rvd->vdev_children;
vdev_add_child(rvd, tvd);
vdev_config_dirty(tvd);
}
@@ -7597,6 +7663,18 @@ spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx)
if (spa_sync_pass(spa) != 1)
return;
+ /*
+ * Note:
+ * If the log space map feature is active, we stop deferring
+ * frees to the next TXG and therefore running this function
+ * would be considered a no-op as spa_deferred_bpobj should
+ * not have any entries.
+ *
+ * That said we run this function anyway (instead of returning
+ * immediately) for the edge-case scenario where we just
+ * activated the log space map feature in this TXG but we have
+ * deferred frees from the previous TXG.
+ */
zio_t *zio = zio_root(spa, NULL, NULL, 0);
VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
spa_free_sync_cb, zio, tx), ==, 0);
@@ -8187,7 +8265,14 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
spa_errlog_sync(spa, txg);
dsl_pool_sync(dp, txg);
- if (pass < zfs_sync_pass_deferred_free) {
+ if (pass < zfs_sync_pass_deferred_free ||
+ spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
+ /*
+ * If the log space map feature is active we don't
+ * care about deferred frees and the deferred bpobj
+ * as the log space map should effectively have the
+ * same results (i.e. appending only to one object).
+ */
spa_sync_frees(spa, free_bpl, tx);
} else {
/*
@@ -8204,6 +8289,8 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
svr_sync(spa, tx);
spa_sync_upgrades(spa, tx);
+ spa_flush_metaslabs(spa, tx);
+
vdev_t *vd = NULL;
while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
!= NULL)
@@ -8453,6 +8540,7 @@ spa_sync(spa_t *spa, uint64_t txg)
while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
!= NULL)
vdev_sync_done(vd, txg);
+ spa_sync_close_syncing_log_sm(spa);
spa_update_dspace(spa);
@@ -8639,6 +8727,21 @@ spa_has_active_shared_spare(spa_t *spa)
return (B_FALSE);
}
+uint64_t
+spa_total_metaslabs(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ uint64_t m = 0;
+ for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ if (!vdev_is_concrete(vd))
+ continue;
+ m += vd->vdev_ms_count;
+ }
+ return (m);
+}
+
sysevent_t *
spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name)
{