summaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
authorSerapheim Dimitropoulos <[email protected]>2019-02-12 10:38:11 -0800
committerBrian Behlendorf <[email protected]>2019-02-12 10:38:11 -0800
commit425d3237ee88abc53d8522a7139c926d278b4b7f (patch)
treeb5ac7302fdc38129013399b1dcb63eb8cf1fddb6 /include/sys
parentd8d418ff0cc90776182534bce10b01e9487b63e4 (diff)
Get rid of space_map_update() for ms_synced_length
Initially, metaslabs and space maps used to be the same thing in ZFS. Later, we started differentiating them by referring to the space map as the on-disk state of the metaslab, making the metaslab a higher-level concept that is metadata that deals with space accounting. Today we've managed to split that code furthermore, with the space map being its own on-disk data structure used in areas of ZFS besides metaslabs (e.g. the vdev-wide space maps used for zpool checkpoint or vdev removal features). This patch refactors the space map code to further split the space map code from the metaslab code. It does so by getting rid of the idea that the space map can have a different in-core and on-disk length (sm_length vs smp_length) which is something that is only used for the metaslab code, and other consumers of space maps just have to deal with. Instead, this patch introduces changes that move the old in-core length of the metaslab's space map to the metaslab structure itself (see ms_synced_length field) while making the space map code only care about the actual space map's length on-disk. The result of this is that space map consumers no longer have to deal with syncing two different lengths for the same structure (e.g. space_map_update() goes away) while metaslab specific behavior stays within the metaslab code. Specifically, the ms_synced_length field keeps track of the amount of data metaslab_load() can read from the metaslab's space map while working concurrently with metaslab_sync() that may be appending to that same space map. As a side note, the patch also adds a few comments around the metaslab code documenting some assumptions and expected behavior. Reviewed-by: Matt Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed by: Pavel Zakharov <[email protected]> Signed-off-by: Serapheim Dimitropoulos <[email protected]> Closes #8328
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/metaslab.h2
-rw-r--r--include/sys/metaslab_impl.h36
-rw-r--r--include/sys/space_map.h30
3 files changed, 55 insertions, 13 deletions
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index f47bc19cf..fd0d23502 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -52,6 +52,8 @@ void metaslab_fini(metaslab_t *);
int metaslab_load(metaslab_t *);
void metaslab_unload(metaslab_t *);
+uint64_t metaslab_allocated_space(metaslab_t *);
+
void metaslab_sync(metaslab_t *, uint64_t);
void metaslab_sync_done(metaslab_t *, uint64_t);
void metaslab_sync_reassess(metaslab_group_t *);
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index 137a84769..02ce02226 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -340,8 +340,34 @@ struct metaslab_group {
* being written.
*/
struct metaslab {
+ /*
+ * This is the main lock of the metaslab and its purpose is to
+ * coordinate our allocations and frees [e.g metaslab_block_alloc(),
+ * metaslab_free_concrete(), ..etc] with our various syncing
+ * procedures [e.g. metaslab_sync(), metaslab_sync_done(), ..etc].
+ *
+ * The lock is also used during some miscellaneous operations like
+ * using the metaslab's histogram for the metaslab group's histogram
+ * aggregation, or marking the metaslab for initialization.
+ */
kmutex_t ms_lock;
+
+ /*
+ * Acquired together with the ms_lock whenever we expect to
+ * write to metaslab data on-disk (i.e flushing entries to
+ * the metaslab's space map). It helps coordinate readers of
+ * the metaslab's space map [see spa_vdev_remove_thread()]
+ * with writers [see metaslab_sync()].
+ *
+ * Note that metaslab_load(), even though a reader, uses
+ * a completely different mechanism to deal with the reading
+ * of the metaslab's space map based on ms_synced_length. That
+ * said, the function still uses the ms_sync_lock after it
+ * has read the ms_sm [see relevant comment in metaslab_load()
+ * as to why].
+ */
kmutex_t ms_sync_lock;
+
kcondvar_t ms_load_cv;
space_map_t *ms_sm;
uint64_t ms_id;
@@ -351,6 +377,7 @@ struct metaslab {
range_tree_t *ms_allocating[TXG_SIZE];
range_tree_t *ms_allocatable;
+ uint64_t ms_allocated_this_txg;
/*
* The following range trees are accessed only from syncing context.
@@ -375,6 +402,12 @@ struct metaslab {
boolean_t ms_loaded;
boolean_t ms_loading;
+ /*
+ * Tracks the exact amount of allocated space of this metaslab
+ * (and specifically the metaslab's space map) up to the most
+ * recently completed sync pass [see usage in metaslab_sync()].
+ */
+ uint64_t ms_allocated_space;
int64_t ms_deferspace; /* sum of ms_defermap[] space */
uint64_t ms_weight; /* weight vs. others in group */
uint64_t ms_activation_weight; /* activation weight */
@@ -411,6 +444,9 @@ struct metaslab {
avl_node_t ms_group_node; /* node in metaslab group tree */
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
+ /* updated every time we are done syncing the metaslab's space map */
+ uint64_t ms_synced_length;
+
boolean_t ms_new;
};
diff --git a/include/sys/space_map.h b/include/sys/space_map.h
index 64c97bb4d..52536cccc 100644
--- a/include/sys/space_map.h
+++ b/include/sys/space_map.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
#ifndef _SYS_SPACE_MAP_H
@@ -55,10 +55,17 @@ extern "C" {
* for backward compatibility.
*/
typedef struct space_map_phys {
- uint64_t smp_object; /* on-disk space map object */
- uint64_t smp_objsize; /* size of the object */
- int64_t smp_alloc; /* space allocated from the map */
- uint64_t smp_pad[5]; /* reserved */
+ /* object number: not needed but kept for backwards compatibility */
+ uint64_t smp_object;
+
+ /* length of the object in bytes */
+ uint64_t smp_length;
+
+ /* space allocated from the map */
+ int64_t smp_alloc;
+
+ /* reserved */
+ uint64_t smp_pad[5];
/*
* The smp_histogram maintains a histogram of free regions. Each
@@ -81,8 +88,6 @@ typedef struct space_map {
uint64_t sm_start; /* start of map */
uint64_t sm_size; /* size of map */
uint8_t sm_shift; /* unit shift */
- uint64_t sm_length; /* synced length */
- int64_t sm_alloc; /* synced space allocated */
objset_t *sm_os; /* objset for this map */
uint64_t sm_object; /* object id for this map */
uint32_t sm_blksz; /* block size for space map */
@@ -189,7 +194,10 @@ boolean_t sm_entry_is_double_word(uint64_t e);
typedef int (*sm_cb_t)(space_map_entry_t *sme, void *arg);
int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype);
-int space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg);
+int space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
+ uint64_t length);
+int space_map_iterate(space_map_t *sm, uint64_t length,
+ sm_cb_t callback, void *arg);
int space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
dmu_tx_t *tx);
@@ -197,10 +205,8 @@ void space_map_histogram_clear(space_map_t *sm);
void space_map_histogram_add(space_map_t *sm, range_tree_t *rt,
dmu_tx_t *tx);
-void space_map_update(space_map_t *sm);
-
uint64_t space_map_object(space_map_t *sm);
-uint64_t space_map_allocated(space_map_t *sm);
+int64_t space_map_allocated(space_map_t *sm);
uint64_t space_map_length(space_map_t *sm);
void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
@@ -216,8 +222,6 @@ int space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
uint64_t start, uint64_t size, uint8_t shift);
void space_map_close(space_map_t *sm);
-int64_t space_map_alloc_delta(space_map_t *sm);
-
#ifdef __cplusplus
}
#endif