aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
authorAlexander Motin <[email protected]>2022-04-26 13:44:21 -0400
committerGitHub <[email protected]>2022-04-26 10:44:21 -0700
commit600a02b8844edb16b88b9bb179d1fbd7a169037d (patch)
tree3b089446cb0c3612d5abed8f4c2166ce36bf56c8 /include/sys
parent0409d3327371cef8a8c5886cb7530ded6f5f1091 (diff)
Improve log spacemap load time
Previous flushing algorithm limited only total number of log blocks to the minimum of 256K and 4x number of metaslabs in the pool. As result, system with 1500 disks with 1000 metaslabs each, touching several new metaslabs each TXG could grow spacemap log to huge size without much benefits. We've observed one of such systems importing pool for about 45 minutes. This patch improves the situation from five sides: - By limiting maximum period for each metaslab to be flushed to 1000 TXGs, that effectively limits maximum number of per-TXG spacemap logs to load to the same number. - By making flushing more smooth via accounting number of metaslabs that were touched after the last flush and actually need another flush, not just ms_unflushed_txg bump. - By applying zfs_unflushed_log_block_pct to the number of metaslabs that were touched after the last flush, not all metaslabs in the pool. - By aggressively prefetching per-TXG spacemap logs up to 16 TXGs in advance, making log spacemap load process for wide HDD pool CPU-bound, accelerating it by many times. - By reducing zfs_unflushed_log_block_max from 256K to 128K, reducing single-threaded by nature log processing time from ~10 to ~5 minutes. As further optimization we could skip bumping ms_unflushed_txg for metaslabs not touched since the last flush, but that would be an incompatible change, requiring new pool feature. Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc. Closes #12789
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/dmu.h2
-rw-r--r--include/sys/metaslab.h3
-rw-r--r--include/sys/metaslab_impl.h1
-rw-r--r--include/sys/spa_log_spacemap.h9
4 files changed, 13 insertions, 2 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 1ddff0d4e..03513f9f2 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -1067,6 +1067,8 @@ int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
extern uint64_t zfs_crc64_table[256];
+extern int dmu_prefetch_max;
+
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index 129a68be4..b777a3cae 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -49,11 +49,14 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
metaslab_t **);
void metaslab_fini(metaslab_t *);
+void metaslab_set_unflushed_dirty(metaslab_t *, boolean_t);
void metaslab_set_unflushed_txg(metaslab_t *, uint64_t, dmu_tx_t *);
void metaslab_set_estimated_condensed_size(metaslab_t *, uint64_t, dmu_tx_t *);
+boolean_t metaslab_unflushed_dirty(metaslab_t *);
uint64_t metaslab_unflushed_txg(metaslab_t *);
uint64_t metaslab_estimated_condensed_size(metaslab_t *);
int metaslab_sort_by_flushed(const void *, const void *);
+void metaslab_unflushed_bump(metaslab_t *, dmu_tx_t *, boolean_t);
uint64_t metaslab_unflushed_changes_memused(metaslab_t *);
int metaslab_load(metaslab_t *);
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index 3dbee4c17..820c61a25 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -553,6 +553,7 @@ struct metaslab {
* log space maps.
*/
uint64_t ms_unflushed_txg;
+ boolean_t ms_unflushed_dirty;
/* updated every time we are done syncing the metaslab's space map */
uint64_t ms_synced_length;
diff --git a/include/sys/spa_log_spacemap.h b/include/sys/spa_log_spacemap.h
index b2ed77fac..72229df6c 100644
--- a/include/sys/spa_log_spacemap.h
+++ b/include/sys/spa_log_spacemap.h
@@ -30,7 +30,10 @@
typedef struct log_summary_entry {
uint64_t lse_start; /* start TXG */
+ uint64_t lse_end; /* last TXG */
+ uint64_t lse_txgcount; /* # of TXGs */
uint64_t lse_mscount; /* # of metaslabs needed to be flushed */
+ uint64_t lse_msdcount; /* # of dirty metaslabs needed to be flushed */
uint64_t lse_blkcount; /* blocks held by this entry */
list_node_t lse_node;
} log_summary_entry_t;
@@ -50,6 +53,7 @@ typedef struct spa_log_sm {
uint64_t sls_nblocks; /* number of blocks in this log */
uint64_t sls_mscount; /* # of metaslabs flushed in the log's txg */
avl_node_t sls_node; /* node in spa_sm_logs_by_txg */
+ space_map_t *sls_sm; /* space map pointer, if open */
} spa_log_sm_t;
int spa_ld_log_spacemaps(spa_t *);
@@ -68,8 +72,9 @@ uint64_t spa_log_sm_memused(spa_t *);
void spa_log_sm_decrement_mscount(spa_t *, uint64_t);
void spa_log_sm_increment_current_mscount(spa_t *);
-void spa_log_summary_add_flushed_metaslab(spa_t *);
-void spa_log_summary_decrement_mscount(spa_t *, uint64_t);
+void spa_log_summary_add_flushed_metaslab(spa_t *, boolean_t);
+void spa_log_summary_dirty_flushed_metaslab(spa_t *, uint64_t);
+void spa_log_summary_decrement_mscount(spa_t *, uint64_t, boolean_t);
void spa_log_summary_decrement_blkcount(spa_t *, uint64_t);
boolean_t spa_flush_all_logs_requested(spa_t *);