summaryrefslogtreecommitdiffstats
path: root/module/zfs/spa.c
diff options
context:
space:
mode:
authorGeorge Wilson <[email protected]>2013-10-01 13:25:53 -0800
committerBrian Behlendorf <[email protected]>2014-07-22 09:39:16 -0700
commit93cf20764a1be64a603020f54b45200e37b3877e (patch)
treeb0db8d60368de34cdbd4eccc9ee98d1110beb15e /module/zfs/spa.c
parent1be627f5c28a355bcd49e4e097114c13fae7731b (diff)
Illumos #4101, #4102, #4103, #4105, #4106
4101 metaslab_debug should allow for fine-grained control 4102 space_maps should store more information about themselves 4103 space map object blocksize should be increased 4105 removing a mirrored log device results in a leaked object 4106 asynchronously load metaslab Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Sebastien Roy <[email protected]> Approved by: Garrett D'Amore <[email protected]> Prior to this patch, space_maps were preferred solely based on the amount of free space left in each. Unfortunately, this heuristic didn't contain any information about the make-up of that free space, which meant we could keep preferring and loading a highly fragmented space map that wouldn't actually have enough contiguous space to satisfy the allocation; then unloading that space_map and repeating the process. This change modifies the space_map's to store additional information about the contiguous space in the space_map, so that we can use this information to make a better decision about which space_map to load. This requires reallocating all space_map objects to increase their bonus buffer size sizes enough to fit the new metadata. The above feature can be enabled via a new feature flag introduced by this change: com.delphix:spacemap_histogram In addition to the above, this patch allows the space_map block size to be increase. Currently the block size is set to be 4K in size, which has certain implications including the following: * 4K sector devices will not see any compression benefit * large space_maps require more metadata on-disk * large space_maps require more time to load (typically random reads) Now the space_map block size can adjust as needed up to the maximum size set via the space_map_max_blksz variable. A bug was fixed which resulted in potentially leaking an object when removing a mirrored log device. The previous logic for vdev_remove() did not deal with removing top-level vdevs that are interior vdevs (i.e. mirror) correctly. The problem would occur when removing a mirrored log device, and result in the DTL space map object being leaked; because top-level vdevs don't have DTL space map objects associated with them. References: https://www.illumos.org/issues/4101 https://www.illumos.org/issues/4102 https://www.illumos.org/issues/4103 https://www.illumos.org/issues/4105 https://www.illumos.org/issues/4106 https://github.com/illumos/illumos-gate/commit/0713e23 Porting notes: A handful of kmem_alloc() calls were converted to kmem_zalloc(). Also, the KM_PUSHPAGE and TQ_PUSHPAGE flags were used as necessary. Ported-by: Tim Chase <[email protected]> Signed-off-by: Prakash Surya <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2488
Diffstat (limited to 'module/zfs/spa.c')
-rw-r--r--module/zfs/spa.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 0d0499c63..397e9e627 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1259,6 +1259,15 @@ spa_unload(spa_t *spa)
bpobj_close(&spa->spa_deferred_bpobj);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+
+ /*
+ * Close all vdevs.
+ */
+ if (spa->spa_root_vdev)
+ vdev_free(spa->spa_root_vdev);
+ ASSERT(spa->spa_root_vdev == NULL);
+
/*
* Close the dsl pool.
*/
@@ -1270,20 +1279,12 @@ spa_unload(spa_t *spa)
ddt_unload(spa);
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
/*
* Drop and purge level 2 cache
*/
spa_l2cache_drop(spa);
- /*
- * Close all vdevs.
- */
- if (spa->spa_root_vdev)
- vdev_free(spa->spa_root_vdev);
- ASSERT(spa->spa_root_vdev == NULL);
-
for (i = 0; i < spa->spa_spares.sav_count; i++)
vdev_free(spa->spa_spares.sav_vdevs[i]);
if (spa->spa_spares.sav_vdevs) {
@@ -4568,7 +4569,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
vdev_dirty(tvd, VDD_DTL, newvd, txg);
/*
- * Restart the resilver
+ * Schedule the resilver to restart in the future. We do this to
+ * ensure that dmu_sync-ed blocks have been stitched into the
+ * respective datasets.
*/
dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
@@ -5193,7 +5196,7 @@ spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
ASSERT0(vd->vdev_stat.vs_alloc);
txg = spa_vdev_config_enter(spa);
vd->vdev_removing = B_TRUE;
- vdev_dirty(vd, 0, NULL, txg);
+ vdev_dirty_leaves(vd, VDD_DTL, txg);
vdev_config_dirty(vd);
spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
@@ -5965,7 +5968,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
ASSERT(zpool_prop_feature(nvpair_name(elem)));
fname = strchr(nvpair_name(elem), '@') + 1;
- VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
+ VERIFY0(zfeature_lookup_name(fname, &feature));
spa_feature_enable(spa, feature, tx);
spa_history_log_internal(spa, "set", tx,
@@ -5973,7 +5976,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
break;
case ZPOOL_PROP_VERSION:
- VERIFY(nvpair_value_uint64(elem, &intval) == 0);
+ intval = fnvpair_value_uint64(elem);
/*
* The version is synced seperatly before other
* properties and should be correct by now.
@@ -5997,7 +6000,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
*/
break;
case ZPOOL_PROP_COMMENT:
- VERIFY(nvpair_value_string(elem, &strval) == 0);
+ strval = fnvpair_value_string(elem);
if (spa->spa_comment != NULL)
spa_strfree(spa->spa_comment);
spa->spa_comment = spa_strdup(strval);
@@ -6029,23 +6032,23 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
if (nvpair_type(elem) == DATA_TYPE_STRING) {
ASSERT(proptype == PROP_TYPE_STRING);
- VERIFY(nvpair_value_string(elem, &strval) == 0);
- VERIFY(zap_update(mos,
+ strval = fnvpair_value_string(elem);
+ VERIFY0(zap_update(mos,
spa->spa_pool_props_object, propname,
- 1, strlen(strval) + 1, strval, tx) == 0);
+ 1, strlen(strval) + 1, strval, tx));
spa_history_log_internal(spa, "set", tx,
"%s=%s", nvpair_name(elem), strval);
} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
- VERIFY(nvpair_value_uint64(elem, &intval) == 0);
+ intval = fnvpair_value_uint64(elem);
if (proptype == PROP_TYPE_INDEX) {
const char *unused;
- VERIFY(zpool_prop_index_to_string(
- prop, intval, &unused) == 0);
+ VERIFY0(zpool_prop_index_to_string(
+ prop, intval, &unused));
}
- VERIFY(zap_update(mos,
+ VERIFY0(zap_update(mos,
spa->spa_pool_props_object, propname,
- 8, 1, &intval, tx) == 0);
+ 8, 1, &intval, tx));
spa_history_log_internal(spa, "set", tx,
"%s=%lld", nvpair_name(elem), intval);
} else {