summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Dagnelie <[email protected]>2019-06-06 19:10:43 -0700
committerBrian Behlendorf <[email protected]>2019-06-06 19:10:43 -0700
commit893a6d62c1895f3e3eeb660b048236571995a564 (patch)
tree051154a79d6a6cc07ba4e93ed60a1b6a7f5b0763
parent876d76be3455ba6aa8d1567203847d8c012d05c9 (diff)
Allow metaslab to be unloaded even when not freed from
On large systems, the memory used by loaded metaslabs can become a concern. While range trees are a fairly efficient data structure, on heavily fragmented pools they can still consume a significant amount of memory. This problem is amplified when we fail to unload metaslabs that we aren't using. Currently, we only unload a metaslab during metaslab_sync_done; in order for that function to be called on a given metaslab in a given txg, we have to have dirtied that metaslab in that txg. If the dirtying was the result of an allocation, we wouldn't be unloading it (since it wouldn't be 8 txgs since it was selected), so in effect we only unload a metaslab during txgs where it's being freed from. We move the unload logic from sync_done to a new function, and call that function on all metaslabs in a given vdev during vdev_sync_done(). Reviewed-by: Richard Elling <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Paul Dagnelie <[email protected]> Closes #8837
-rw-r--r--include/sys/metaslab.h1
-rw-r--r--module/zfs/metaslab.c47
-rw-r--r--module/zfs/vdev.c14
3 files changed, 40 insertions, 22 deletions
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index 2790d06c7..330902529 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -50,6 +50,7 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
void metaslab_fini(metaslab_t *);
int metaslab_load(metaslab_t *);
+void metaslab_potentially_unload(metaslab_t *, uint64_t);
void metaslab_unload(metaslab_t *);
uint64_t metaslab_allocated_space(metaslab_t *);
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index d1d5a243f..41cbaad5f 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
@@ -2949,6 +2949,30 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
dmu_tx_commit(tx);
}
+void
+metaslab_potentially_unload(metaslab_t *msp, uint64_t txg)
+{
+ /*
+ * If the metaslab is loaded and we've not tried to load or allocate
+ * from it in 'metaslab_unload_delay' txgs, then unload it.
+ */
+ if (msp->ms_loaded &&
+ msp->ms_disabled == 0 &&
+ msp->ms_selected_txg + metaslab_unload_delay < txg) {
+ for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+ VERIFY0(range_tree_space(
+ msp->ms_allocating[(txg + t) & TXG_MASK]));
+ }
+ if (msp->ms_allocator != -1) {
+ metaslab_passivate(msp, msp->ms_weight &
+ ~METASLAB_ACTIVE_MASK);
+ }
+
+ if (!metaslab_debug_unload)
+ metaslab_unload(msp);
+ }
+}
+
/*
* Called after a transaction group has completely synced to mark
* all of the metaslab's free space as usable.
@@ -3086,27 +3110,6 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
*/
metaslab_recalculate_weight_and_sort(msp);
- /*
- * If the metaslab is loaded and we've not tried to load or allocate
- * from it in 'metaslab_unload_delay' txgs, then unload it.
- */
- if (msp->ms_loaded &&
- msp->ms_disabled == 0 &&
- msp->ms_selected_txg + metaslab_unload_delay < txg) {
-
- for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
- VERIFY0(range_tree_space(
- msp->ms_allocating[(txg + t) & TXG_MASK]));
- }
- if (msp->ms_allocator != -1) {
- metaslab_passivate(msp, msp->ms_weight &
- ~METASLAB_ACTIVE_MASK);
- }
-
- if (!metaslab_debug_unload)
- metaslab_unload(msp);
- }
-
ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
ASSERT0(range_tree_space(msp->ms_freeing));
ASSERT0(range_tree_space(msp->ms_freed));
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 1c4812cd8..81ef87e25 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -3234,6 +3234,20 @@ vdev_sync_done(vdev_t *vd, uint64_t txg)
!= NULL)
metaslab_sync_done(msp, txg);
+ /*
+ * Because this function is only called on dirty vdevs, it's possible
+ * we won't consider all metaslabs for unloading on every
+ * txg. However, unless the system is largely idle it is likely that
+ * we will dirty all vdevs within a few txgs.
+ */
+ for (int i = 0; i < vd->vdev_ms_count; i++) {
+ msp = vd->vdev_ms[i];
+ mutex_enter(&msp->ms_lock);
+ if (msp->ms_sm != NULL)
+ metaslab_potentially_unload(msp, txg);
+ mutex_exit(&msp->ms_lock);
+ }
+
if (reassess)
metaslab_sync_reassess(vd->vdev_mg);
}