aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorge Wilson <[email protected]>2017-04-07 13:50:18 -0700
committerBrian Behlendorf <[email protected]>2017-04-09 16:12:35 -0700
commit3b7f360c9606079c518829c5bd82fe8f8a89e3c0 (patch)
treee5dad1e8833519127ebee8597ea8f1ec746fbb2b
parent4c3c6b6c73e31c789a2c0ce2a70912c077f15421 (diff)
OpenZFS 8023 - Panic destroying a metaslab deferred range tree
Authored by: George Wilson <[email protected]> Approved by: Dan McDonald <[email protected]> Reviewed by: Brad Lewis <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Dan Kimmel <[email protected]> Reviewed by: Saso Kiselkov <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: George Melikov <[email protected]> Ported-by: Giuseppe Di Natale <[email protected]> We don't want to dirty any data when we're in the final txgs of the pool export logic. This change introduces checks to make sure that no data is dirtied after a certain point. It also addresses the culprit of this specific bug – the space map cannot be upgraded when we're in final stages of pool export. If we encounter a space map that wants to be upgraded in this phase, then we simply ignore the request as it will get retried the next time we set the fragmentation metric on that metaslab. OpenZFS-issue: https://www.illumos.org/issues/8023 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/2ef00f5 Closes #5991
-rw-r--r--include/sys/spa.h1
-rw-r--r--module/zfs/dbuf.c1
-rw-r--r--module/zfs/metaslab.c23
-rw-r--r--module/zfs/spa_misc.c10
-rw-r--r--module/zfs/space_map.c10
5 files changed, 35 insertions, 10 deletions
diff --git a/include/sys/spa.h b/include/sys/spa.h
index b6e124faa..e2f27ed60 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -802,6 +802,7 @@ extern uint64_t spa_load_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_syncing_txg(spa_t *spa);
+extern uint64_t spa_final_dirty_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
extern spa_load_state_t spa_load_state(spa_t *spa);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index ce99fd54c..096d18358 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -1639,6 +1639,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* this assertion only if we're not already dirty.
*/
os = dn->dn_objset;
+ VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(os->os_spa));
#ifdef DEBUG
if (dn->dn_objset->os_dsl_dataset != NULL)
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG);
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index 3fd7f9049..5e413c065 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -1582,15 +1582,22 @@ metaslab_set_fragmentation(metaslab_t *msp)
* so that we upgrade next time we encounter it.
*/
if (msp->ms_sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) {
+ uint64_t txg = spa_syncing_txg(spa);
vdev_t *vd = msp->ms_group->mg_vd;
- if (spa_writeable(vd->vdev_spa)) {
- uint64_t txg = spa_syncing_txg(spa);
-
+ /*
+ * If we've reached the final dirty txg, then we must
+ * be shutting down the pool. We don't want to dirty
+ * any data past this point so skip setting the condense
+ * flag. We can retry this action the next time the pool
+ * is imported.
+ */
+ if (spa_writeable(spa) && txg < spa_final_dirty_txg(spa)) {
msp->ms_condense_wanted = B_TRUE;
vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
spa_dbgmsg(spa, "txg %llu, requesting force condense: "
- "msp %p, vd %p", txg, msp, vd);
+ "ms_id %llu, vdev_id %llu", txg, msp->ms_id,
+ vd->vdev_id);
}
msp->ms_fragmentation = ZFS_FRAG_INVALID;
return;
@@ -2217,13 +2224,17 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
/*
* Normally, we don't want to process a metaslab if there
* are no allocations or frees to perform. However, if the metaslab
- * is being forced to condense we need to let it through.
+ * is being forced to condense and it's loaded, we need to let it
+ * through.
*/
if (range_tree_space(alloctree) == 0 &&
range_tree_space(msp->ms_freeingtree) == 0 &&
- !msp->ms_condense_wanted)
+ !(msp->ms_loaded && msp->ms_condense_wanted))
return;
+
+ VERIFY(txg <= spa_final_dirty_txg(spa));
+
/*
* The only state that can actually be changing concurrently with
* metaslab_sync() is the metaslab's ms_tree. No other thread can
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index ea16e69f0..39a1d7d6a 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1596,6 +1596,16 @@ spa_syncing_txg(spa_t *spa)
return (spa->spa_syncing_txg);
}
+/*
+ * Return the last txg where data can be dirtied. The final txgs
+ * will be used to just clear out any deferred frees that remain.
+ */
+uint64_t
+spa_final_dirty_txg(spa_t *spa)
+{
+ return (spa->spa_final_txg - TXG_DEFER_SIZE);
+}
+
pool_state_t
spa_state(spa_t *spa)
{
diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c
index 961f23b0e..a8f88b671 100644
--- a/module/zfs/space_map.c
+++ b/module/zfs/space_map.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -411,6 +411,7 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
ASSERT(dmu_tx_is_syncing(tx));
+ VERIFY3U(dmu_tx_get_txg(tx), <=, spa_final_dirty_txg(spa));
dmu_object_info_from_db(sm->sm_dbuf, &doi);
@@ -425,9 +426,10 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
doi.doi_data_block_size != space_map_blksz) {
- zfs_dbgmsg("txg %llu, spa %s, reallocating: "
- "old bonus %llu, old blocksz %u", dmu_tx_get_txg(tx),
- spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
+ zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating "
+ "object[%llu]: old bonus %u, old blocksz %u",
+ dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object,
+ doi.doi_bonus_size, doi.doi_data_block_size);
space_map_free(sm, tx);
dmu_buf_rele(sm->sm_dbuf, sm);