diff options
author | Paul Dagnelie <[email protected]> | 2019-08-29 10:20:36 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-08-29 10:20:36 -0700 |
commit | eef0f4d84ec8e33b25792485f1f915efeb95af77 (patch) | |
tree | 3c0e4119bd252c7128efc5247694d6bcd61d8e8f | |
parent | e6cebbf86e769eba7c0e7b8834985682d1b38e7e (diff) |
Keep more metaslabs loaded
With the other metaslab changes loaded onto a system, we can
significantly reduce the memory usage of each loaded metaslab and
unload them on demand if there is memory pressure. However, none
of those changes actually result in us keeping more metaslabs loaded.
If we don't keep more metaslabs loaded, we will still have to wait
for demand-loading to finish when no loaded metaslab can satisfy our
allocation, which can cause ZIL performance issues. In addition,
performance is traditionally measured by IOs per unit time, while
unloading is currently done on a txg-count basis. Txgs can take a
widely varying range of times, from tenths of a second to several
seconds. This can result in confusing, hard to predict behavior.
This change simply adds a time-based component to metaslab unloading.
A metaslab will remain loaded for one minute and 8 txgs (by default)
after it was last used, unless it is evicted due to memory pressure.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matt Ahrens <[email protected]>
Signed-off-by: Paul Dagnelie <[email protected]>
External-issue: DLPX-65016
External-issue: DLPX-65047
Closes #9197
-rw-r--r-- | include/sys/metaslab_impl.h | 1 | ||||
-rw-r--r-- | man/man5/zfs-module-parameters.5 | 30 | ||||
-rw-r--r-- | module/zfs/metaslab.c | 69 |
3 files changed, 69 insertions, 31 deletions
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 07f07c02d..3ce39183e 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -489,6 +489,7 @@ struct metaslab { */ hrtime_t ms_load_time; /* time last loaded */ hrtime_t ms_unload_time; /* time last unloaded */ + hrtime_t ms_selected_time; /* time last allocated from */ uint64_t ms_alloc_txg; /* last successful alloc (debug only) */ uint64_t ms_max_size; /* maximum allocatable size */ diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 8a1048bee..f9ae2e781 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -398,7 +398,7 @@ the least recently used metaslab to prevent the system from clogging all of its memory with range trees. This tunable sets the percentage of total system memory that is the threshold. .sp -Default value: \fB75 percent\fR +Default value: \fB25 percent\fR .RE .sp @@ -472,6 +472,34 @@ Use \fB1\fR for yes (default) and \fB0\fR for no. .sp .ne 2 .na +\fBmetaslab_unload_delay\fR (int) +.ad +.RS 12n +After a metaslab is used, we keep it loaded for this many txgs, to attempt to +reduce unnecessary reloading. Note that both this many txgs and +\fBmetaslab_unload_delay_ms\fR milliseconds must pass before unloading will +occur. +.sp +Default value: \fB32\fR. +.RE + +.sp +.ne 2 +.na +\fBmetaslab_unload_delay_ms\fR (int) +.ad +.RS 12n +After a metaslab is used, we keep it loaded for this many milliseconds, to +attempt to reduce unnecessary reloading. Note that both this many +milliseconds and \fBmetaslab_unload_delay\fR txgs must pass before unloading +will occur. +.sp +Default value: \fB600000\fR (ten minutes). +.RE + +.sp +.ne 2 +.na \fBsend_holes_without_birth_time\fR (int) .ad .RS 12n diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 2f92fffa4..00af4a21b 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -198,16 +198,20 @@ int metaslab_df_use_largest_segment = B_FALSE; int metaslab_load_pct = 50; /* - * Determines how many txgs a metaslab may remain loaded without having any - * allocations from it. As long as a metaslab continues to be used we will - * keep it loaded. + * These tunables control how long a metaslab will remain loaded after the + * last allocation from it. A metaslab can't be unloaded until at least + * metaslab_unload_delay TXG's and metaslab_unload_delay_ms milliseconds + * have elapsed. However, zfs_metaslab_mem_limit may cause it to be + * unloaded sooner. These settings are intended to be generous -- to keep + * metaslabs loaded for a long time, reducing the rate of metaslab loading. */ -int metaslab_unload_delay = TXG_SIZE * 2; +int metaslab_unload_delay = 32; +int metaslab_unload_delay_ms = 10 * 60 * 1000; /* ten minutes */ /* * Max number of metaslabs per group to preload. */ -int metaslab_preload_limit = SPA_DVAS_PER_BP; +int metaslab_preload_limit = 10; /* * Enable/disable preloading of metaslab. @@ -273,17 +277,17 @@ uint64_t metaslab_trace_max_entries = 5000; int max_disabled_ms = 3; /* - * Time (in seconds) to respect ms_max_size when the metaslab is not loaded. - * To avoid 64-bit overflow, don't set above UINT32_MAX. - */ -unsigned long zfs_metaslab_max_size_cache_sec = 3600; /* 1 hour */ - -/* * Maximum percentage of memory to use on storing loaded metaslabs. If loading * a metaslab would take it over this percentage, the oldest selected metaslab * is automatically unloaded. */ -int zfs_metaslab_mem_limit = 75; +int zfs_metaslab_mem_limit = 25; + +/* + * Time (in seconds) to respect ms_max_size when the metaslab is not loaded. + * To avoid 64-bit overflow, don't set above UINT32_MAX. + */ +unsigned long zfs_metaslab_max_size_cache_sec = 3600; /* 1 hour */ static uint64_t metaslab_weight(metaslab_t *); static void metaslab_set_fragmentation(metaslab_t *); @@ -539,15 +543,6 @@ metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) multilist_sublist_unlock(mls); while (msp != NULL) { mutex_enter(&msp->ms_lock); - /* - * Once we've hit a metaslab selected too recently to - * evict, we're done evicting for now. - */ - if (msp->ms_selected_txg + metaslab_unload_delay >= - txg) { - mutex_exit(&msp->ms_lock); - break; - } /* * If the metaslab has been removed from the list @@ -563,7 +558,20 @@ metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) mls = multilist_sublist_lock(ml, i); metaslab_t *next_msp = multilist_sublist_next(mls, msp); multilist_sublist_unlock(mls); - metaslab_evict(msp, txg); + if (txg > + msp->ms_selected_txg + metaslab_unload_delay && + gethrtime() > msp->ms_selected_time + + (uint64_t)MSEC2NSEC(metaslab_unload_delay_ms)) { + metaslab_evict(msp, txg); + } else { + /* + * Once we've hit a metaslab selected too + * recently to evict, we're done evicting for + * now. + */ + mutex_exit(&msp->ms_lock); + break; + } mutex_exit(&msp->ms_lock); msp = next_msp; } @@ -2248,6 +2256,7 @@ metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg) if (multilist_link_active(&msp->ms_class_txg_node)) multilist_sublist_remove(mls, msp); msp->ms_selected_txg = txg; + msp->ms_selected_time = gethrtime(); multilist_sublist_insert_tail(mls, msp); multilist_sublist_unlock(mls); } @@ -2573,7 +2582,6 @@ metaslab_space_weight(metaslab_t *msp) uint64_t weight, space; ASSERT(MUTEX_HELD(&msp->ms_lock)); - ASSERT(!vd->vdev_removing); /* * The baseline weight is the metaslab's free space. @@ -2832,13 +2840,6 @@ metaslab_weight(metaslab_t *msp) ASSERT(MUTEX_HELD(&msp->ms_lock)); - /* - * If this vdev is in the process of being removed, there is nothing - * for us to do here. - */ - if (vd->vdev_removing) - return (0); - metaslab_set_fragmentation(msp); /* @@ -5869,6 +5870,14 @@ module_param(metaslab_preload_enabled, int, 0644); MODULE_PARM_DESC(metaslab_preload_enabled, "preload potential metaslabs during reassessment"); +module_param(metaslab_unload_delay, int, 0644); +MODULE_PARM_DESC(metaslab_unload_delay, + "delay in txgs after metaslab was last used before unloading"); + +module_param(metaslab_unload_delay_ms, int, 0644); +MODULE_PARM_DESC(metaslab_unload_delay_ms, + "delay in milliseconds after metaslab was last used before unloading"); + module_param(zfs_mg_noalloc_threshold, int, 0644); MODULE_PARM_DESC(zfs_mg_noalloc_threshold, "percentage of free space for metaslab group to allow allocation"); |