diff options
author | George Wilson <[email protected]> | 2013-10-01 13:25:53 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-07-22 09:39:16 -0700 |
commit | 93cf20764a1be64a603020f54b45200e37b3877e (patch) | |
tree | b0db8d60368de34cdbd4eccc9ee98d1110beb15e /module/zfs/spa.c | |
parent | 1be627f5c28a355bcd49e4e097114c13fae7731b (diff) |
Illumos #4101, #4102, #4103, #4105, #4106
4101 metaslab_debug should allow for fine-grained control
4102 space_maps should store more information about themselves
4103 space map object blocksize should be increased
4105 removing a mirrored log device results in a leaked object
4106 asynchronously load metaslab
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Sebastien Roy <[email protected]>
Approved by: Garrett D'Amore <[email protected]>
Prior to this patch, space_maps were preferred solely based on the
amount of free space left in each. Unfortunately, this heuristic didn't
contain any information about the make-up of that free space, which
meant we could keep preferring and loading a highly fragmented space map
that wouldn't actually have enough contiguous space to satisfy the
allocation; then unloading that space_map and repeating the process.
This change modifies the space_map's to store additional information
about the contiguous space in the space_map, so that we can use this
information to make a better decision about which space_map to load.
This requires reallocating all space_map objects to increase their
bonus buffer size sizes enough to fit the new metadata.
The above feature can be enabled via a new feature flag introduced by
this change: com.delphix:spacemap_histogram
In addition to the above, this patch allows the space_map block size to
be increase. Currently the block size is set to be 4K in size, which has
certain implications including the following:
* 4K sector devices will not see any compression benefit
* large space_maps require more metadata on-disk
* large space_maps require more time to load (typically random reads)
Now the space_map block size can adjust as needed up to the maximum size
set via the space_map_max_blksz variable.
A bug was fixed which resulted in potentially leaking an object when
removing a mirrored log device. The previous logic for vdev_remove() did
not deal with removing top-level vdevs that are interior vdevs (i.e.
mirror) correctly. The problem would occur when removing a mirrored log
device, and result in the DTL space map object being leaked; because
top-level vdevs don't have DTL space map objects associated with them.
References:
https://www.illumos.org/issues/4101
https://www.illumos.org/issues/4102
https://www.illumos.org/issues/4103
https://www.illumos.org/issues/4105
https://www.illumos.org/issues/4106
https://github.com/illumos/illumos-gate/commit/0713e23
Porting notes:
A handful of kmem_alloc() calls were converted to kmem_zalloc(). Also,
the KM_PUSHPAGE and TQ_PUSHPAGE flags were used as necessary.
Ported-by: Tim Chase <[email protected]>
Signed-off-by: Prakash Surya <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2488
Diffstat (limited to 'module/zfs/spa.c')
-rw-r--r-- | module/zfs/spa.c | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 0d0499c63..397e9e627 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1259,6 +1259,15 @@ spa_unload(spa_t *spa) bpobj_close(&spa->spa_deferred_bpobj); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + + /* + * Close all vdevs. + */ + if (spa->spa_root_vdev) + vdev_free(spa->spa_root_vdev); + ASSERT(spa->spa_root_vdev == NULL); + /* * Close the dsl pool. */ @@ -1270,20 +1279,12 @@ spa_unload(spa_t *spa) ddt_unload(spa); - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * Drop and purge level 2 cache */ spa_l2cache_drop(spa); - /* - * Close all vdevs. - */ - if (spa->spa_root_vdev) - vdev_free(spa->spa_root_vdev); - ASSERT(spa->spa_root_vdev == NULL); - for (i = 0; i < spa->spa_spares.sav_count; i++) vdev_free(spa->spa_spares.sav_vdevs[i]); if (spa->spa_spares.sav_vdevs) { @@ -4568,7 +4569,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) vdev_dirty(tvd, VDD_DTL, newvd, txg); /* - * Restart the resilver + * Schedule the resilver to restart in the future. We do this to + * ensure that dmu_sync-ed blocks have been stitched into the + * respective datasets. */ dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); @@ -5193,7 +5196,7 @@ spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) ASSERT0(vd->vdev_stat.vs_alloc); txg = spa_vdev_config_enter(spa); vd->vdev_removing = B_TRUE; - vdev_dirty(vd, 0, NULL, txg); + vdev_dirty_leaves(vd, VDD_DTL, txg); vdev_config_dirty(vd); spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); @@ -5965,7 +5968,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx) ASSERT(zpool_prop_feature(nvpair_name(elem))); fname = strchr(nvpair_name(elem), '@') + 1; - VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); + VERIFY0(zfeature_lookup_name(fname, &feature)); spa_feature_enable(spa, feature, tx); spa_history_log_internal(spa, "set", tx, @@ -5973,7 +5976,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx) break; case ZPOOL_PROP_VERSION: - VERIFY(nvpair_value_uint64(elem, &intval) == 0); + intval = fnvpair_value_uint64(elem); /* * The version is synced seperatly before other * properties and should be correct by now. @@ -5997,7 +6000,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx) */ break; case ZPOOL_PROP_COMMENT: - VERIFY(nvpair_value_string(elem, &strval) == 0); + strval = fnvpair_value_string(elem); if (spa->spa_comment != NULL) spa_strfree(spa->spa_comment); spa->spa_comment = spa_strdup(strval); @@ -6029,23 +6032,23 @@ spa_sync_props(void *arg, dmu_tx_t *tx) if (nvpair_type(elem) == DATA_TYPE_STRING) { ASSERT(proptype == PROP_TYPE_STRING); - VERIFY(nvpair_value_string(elem, &strval) == 0); - VERIFY(zap_update(mos, + strval = fnvpair_value_string(elem); + VERIFY0(zap_update(mos, spa->spa_pool_props_object, propname, - 1, strlen(strval) + 1, strval, tx) == 0); + 1, strlen(strval) + 1, strval, tx)); spa_history_log_internal(spa, "set", tx, "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { - VERIFY(nvpair_value_uint64(elem, &intval) == 0); + intval = fnvpair_value_uint64(elem); if (proptype == PROP_TYPE_INDEX) { const char *unused; - VERIFY(zpool_prop_index_to_string( - prop, intval, &unused) == 0); + VERIFY0(zpool_prop_index_to_string( + prop, intval, &unused)); } - VERIFY(zap_update(mos, + VERIFY0(zap_update(mos, spa->spa_pool_props_object, propname, - 8, 1, &intval, tx) == 0); + 8, 1, &intval, tx)); spa_history_log_internal(spa, "set", tx, "%s=%lld", nvpair_name(elem), intval); } else { |