summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2014-09-13 15:40:05 +0200
committerBrian Behlendorf <[email protected]>2014-10-23 15:30:32 -0700
commit9635861742c6ac334877f944e17948aa11c2e57a (patch)
tree2fe4744c568f26163707363d6c6c3cb22eb627ae /module
parentb02fe35d3743c92e26f4158938e94093a2309a92 (diff)
Illumos 5164-5165 - space map fixes
5164 space_map_max_blksz causes panic, does not work 5165 zdb fails assertion when run on pool with recently-enabled space map_histogram feature Reviewed by: Christopher Siden <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Saso Kiselkov <[email protected]> Approved by: Dan McDonald <[email protected]> References: https://www.illumos.org/issues/5164 https://www.illumos.org/issues/5165 https://github.com/illumos/illumos-gate/commit/b1be289 Porting Notes: The metaslab_fragmentation() hunk was dropped from this patch because it was already resolved by commit 8b0a084. The comment modified in metaslab.c was updated to use the correct variable name, space_map_blksz. The upstream commit incorrectly used space_map_blksize. Ported by: Turbo Fredriksson <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2697
Diffstat (limited to 'module')
-rw-r--r--module/zfs/metaslab.c14
-rw-r--r--module/zfs/space_map.c128
2 files changed, 44 insertions, 98 deletions
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index d41a570c5..76410ec41 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -67,7 +67,7 @@ int zfs_condense_pct = 200;
/*
* Condensing a metaslab is not guaranteed to actually reduce the amount of
* space used on disk. In particular, a space map uses data in increments of
- * MAX(1 << ashift, SPACE_MAP_INITIAL_BLOCKSIZE), so a metaslab might use the
+ * MAX(1 << ashift, space_map_blksz), so a metaslab might use the
* same number of blocks after condensing. Since the goal of condensing is to
* reduce the number of IOPs required to read the space map, we only want to
* condense when we can be sure we will reduce the number of blocks used by the
@@ -1864,6 +1864,15 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
mutex_enter(&msp->ms_lock);
+ /*
+ * Note: metaslab_condense() clears the space_map's histogram.
+ * Therefore we muse verify and remove this histogram before
+ * condensing.
+ */
+ metaslab_group_histogram_verify(mg);
+ metaslab_class_histogram_verify(mg->mg_class);
+ metaslab_group_histogram_remove(mg, msp);
+
if (msp->ms_loaded && spa_sync_pass(spa) == 1 &&
metaslab_should_condense(msp)) {
metaslab_condense(msp, txg, tx);
@@ -1872,9 +1881,6 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
space_map_write(msp->ms_sm, *freetree, SM_FREE, tx);
}
- metaslab_group_histogram_verify(mg);
- metaslab_class_histogram_verify(mg->mg_class);
- metaslab_group_histogram_remove(mg, msp);
if (msp->ms_loaded) {
/*
* When the space map is loaded, we have an accruate
diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c
index a5bb9a617..0ceccf669 100644
--- a/module/zfs/space_map.c
+++ b/module/zfs/space_map.c
@@ -38,15 +38,12 @@
#include <sys/zfeature.h>
/*
- * This value controls how the space map's block size is allowed to grow.
- * If the value is set to the same size as SPACE_MAP_INITIAL_BLOCKSIZE then
- * the space map block size will remain fixed. Setting this value to something
- * greater than SPACE_MAP_INITIAL_BLOCKSIZE will allow the space map to
- * increase its block size as needed. To maintain backwards compatibilty the
- * space map's block size must be a power of 2 and SPACE_MAP_INITIAL_BLOCKSIZE
- * or larger.
+ * The data for a given space map can be kept on blocks of any size.
+ * Larger blocks entail fewer i/o operations, but they also cause the
+ * DMU to keep more data in-core, and also to waste more i/o bandwidth
+ * when only a few blocks have changed since the last transaction group.
*/
-int space_map_max_blksz = (1 << 12);
+int space_map_blksz = (1 << 12);
/*
* Load the space map disk into the specified range tree. Segments of maptype
@@ -236,58 +233,6 @@ space_map_entries(space_map_t *sm, range_tree_t *rt)
return (entries);
}
-void
-space_map_set_blocksize(space_map_t *sm, uint64_t size, dmu_tx_t *tx)
-{
- uint32_t blksz;
- u_longlong_t blocks;
-
- ASSERT3U(sm->sm_blksz, !=, 0);
- ASSERT3U(space_map_object(sm), !=, 0);
- ASSERT(sm->sm_dbuf != NULL);
- VERIFY(ISP2(space_map_max_blksz));
-
- if (sm->sm_blksz >= space_map_max_blksz)
- return;
-
- /*
- * The object contains more than one block so we can't adjust
- * its size.
- */
- if (sm->sm_phys->smp_objsize > sm->sm_blksz)
- return;
-
- if (size > sm->sm_blksz) {
- uint64_t newsz;
-
- /*
- * Older software versions treat space map blocks as fixed
- * entities. The DMU is capable of handling different block
- * sizes making it possible for us to increase the
- * block size and maintain backwards compatibility. The
- * caveat is that the new block sizes must be a
- * power of 2 so that old software can append to the file,
- * adding more blocks. The block size can grow until it
- * reaches space_map_max_blksz.
- */
- newsz = ISP2(size) ? size : 1ULL << highbit64(size);
- if (newsz > space_map_max_blksz)
- newsz = space_map_max_blksz;
-
- VERIFY0(dmu_object_set_blocksize(sm->sm_os,
- space_map_object(sm), newsz, 0, tx));
- dmu_object_size_from_db(sm->sm_dbuf, &blksz, &blocks);
-
- zfs_dbgmsg("txg %llu, spa %s, increasing blksz from %d to %d",
- dmu_tx_get_txg(tx), spa_name(dmu_objset_spa(sm->sm_os)),
- sm->sm_blksz, blksz);
-
- VERIFY3U(newsz, ==, blksz);
- VERIFY3U(sm->sm_blksz, <, blksz);
- sm->sm_blksz = blksz;
- }
-}
-
/*
* Note: space_map_write() will drop sm_lock across dmu_write() calls.
*/
@@ -301,7 +246,7 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
range_seg_t *rs;
uint64_t size, total, rt_space, nodes;
uint64_t *entry, *entry_map, *entry_map_end;
- uint64_t newsz, expected_entries, actual_entries = 1;
+ uint64_t expected_entries, actual_entries = 1;
ASSERT(MUTEX_HELD(rt->rt_lock));
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
@@ -327,13 +272,6 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
expected_entries = space_map_entries(sm, rt);
- /*
- * Calculate the new size for the space map on-disk and see if
- * we can grow the block size to accommodate the new size.
- */
- newsz = sm->sm_phys->smp_objsize + expected_entries * sizeof (uint64_t);
- space_map_set_blocksize(sm, newsz, tx);
-
entry_map = zio_buf_alloc(sm->sm_blksz);
entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
entry = entry_map;
@@ -465,46 +403,48 @@ space_map_close(space_map_t *sm)
kmem_free(sm, sizeof (*sm));
}
-static void
-space_map_reallocate(space_map_t *sm, dmu_tx_t *tx)
-{
- ASSERT(dmu_tx_is_syncing(tx));
-
- space_map_free(sm, tx);
- dmu_buf_rele(sm->sm_dbuf, sm);
-
- sm->sm_object = space_map_alloc(sm->sm_os, tx);
- VERIFY0(space_map_open_impl(sm));
-}
-
void
space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
{
objset_t *os = sm->sm_os;
spa_t *spa = dmu_objset_spa(os);
dmu_object_info_t doi;
- int bonuslen;
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
ASSERT(dmu_tx_is_syncing(tx));
- VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
dmu_object_info_from_db(sm->sm_dbuf, &doi);
- if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
- bonuslen = sizeof (space_map_phys_t);
- ASSERT3U(bonuslen, <=, dmu_bonus_max());
- } else {
- bonuslen = SPACE_MAP_SIZE_V0;
- }
-
- if (bonuslen != doi.doi_bonus_size ||
- doi.doi_data_block_size != SPACE_MAP_INITIAL_BLOCKSIZE) {
+ /*
+ * If the space map has the wrong bonus size (because
+ * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
+ * the wrong block size (because space_map_blksz has changed),
+ * free and re-allocate its object with the updated sizes.
+ *
+ * Otherwise, just truncate the current object.
+ */
+ if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
+ doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
+ doi.doi_data_block_size != space_map_blksz) {
zfs_dbgmsg("txg %llu, spa %s, reallocating: "
"old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
- space_map_reallocate(sm, tx);
- VERIFY3U(sm->sm_blksz, ==, SPACE_MAP_INITIAL_BLOCKSIZE);
+
+ space_map_free(sm, tx);
+ dmu_buf_rele(sm->sm_dbuf, sm);
+
+ sm->sm_object = space_map_alloc(sm->sm_os, tx);
+ VERIFY0(space_map_open_impl(sm));
+ } else {
+ VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
+
+ /*
+ * If the spacemap is reallocated, its histogram
+ * will be reset. Do the same in the common case so that
+ * bugs related to the uncommon case do not go unnoticed.
+ */
+ bzero(sm->sm_phys->smp_histogram,
+ sizeof (sm->sm_phys->smp_histogram));
}
dmu_buf_will_dirty(sm->sm_dbuf, tx);
@@ -543,7 +483,7 @@ space_map_alloc(objset_t *os, dmu_tx_t *tx)
}
object = dmu_object_alloc(os,
- DMU_OT_SPACE_MAP, SPACE_MAP_INITIAL_BLOCKSIZE,
+ DMU_OT_SPACE_MAP, space_map_blksz,
DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
return (object);