diff options
author | Serapheim Dimitropoulos <[email protected]> | 2019-02-12 10:38:11 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-02-12 10:38:11 -0800 |
commit | 425d3237ee88abc53d8522a7139c926d278b4b7f (patch) | |
tree | b5ac7302fdc38129013399b1dcb63eb8cf1fddb6 /include/sys | |
parent | d8d418ff0cc90776182534bce10b01e9487b63e4 (diff) |
Get rid of space_map_update() for ms_synced_length
Initially, metaslabs and space maps used to be the same thing
in ZFS. Later, we started differentiating them by referring
to the space map as the on-disk state of the metaslab, making
the metaslab a higher-level concept that is metadata that deals
with space accounting. Today we've managed to split that code
furthermore, with the space map being its own on-disk data
structure used in areas of ZFS besides metaslabs (e.g. the
vdev-wide space maps used for zpool checkpoint or vdev removal
features).
This patch refactors the space map code to further split the
space map code from the metaslab code. It does so by getting
rid of the idea that the space map can have a different in-core
and on-disk length (sm_length vs smp_length) which is something
that is only used for the metaslab code, and other consumers
of space maps just have to deal with. Instead, this patch
introduces changes that move the old in-core length of the
metaslab's space map to the metaslab structure itself (see
ms_synced_length field) while making the space map code only
care about the actual space map's length on-disk.
The result of this is that space map consumers no longer have
to deal with syncing two different lengths for the same
structure (e.g. space_map_update() goes away) while metaslab
specific behavior stays within the metaslab code. Specifically,
the ms_synced_length field keeps track of the amount of data
metaslab_load() can read from the metaslab's space map while
working concurrently with metaslab_sync() that may be
appending to that same space map.
As a side note, the patch also adds a few comments around
the metaslab code documenting some assumptions and expected
behavior.
Reviewed-by: Matt Ahrens <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed by: Pavel Zakharov <[email protected]>
Signed-off-by: Serapheim Dimitropoulos <[email protected]>
Closes #8328
Diffstat (limited to 'include/sys')
-rw-r--r-- | include/sys/metaslab.h | 2 | ||||
-rw-r--r-- | include/sys/metaslab_impl.h | 36 | ||||
-rw-r--r-- | include/sys/space_map.h | 30 |
3 files changed, 55 insertions, 13 deletions
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index f47bc19cf..fd0d23502 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -52,6 +52,8 @@ void metaslab_fini(metaslab_t *); int metaslab_load(metaslab_t *); void metaslab_unload(metaslab_t *); +uint64_t metaslab_allocated_space(metaslab_t *); + void metaslab_sync(metaslab_t *, uint64_t); void metaslab_sync_done(metaslab_t *, uint64_t); void metaslab_sync_reassess(metaslab_group_t *); diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 137a84769..02ce02226 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -340,8 +340,34 @@ struct metaslab_group { * being written. */ struct metaslab { + /* + * This is the main lock of the metaslab and its purpose is to + * coordinate our allocations and frees [e.g metaslab_block_alloc(), + * metaslab_free_concrete(), ..etc] with our various syncing + * procedures [e.g. metaslab_sync(), metaslab_sync_done(), ..etc]. + * + * The lock is also used during some miscellaneous operations like + * using the metaslab's histogram for the metaslab group's histogram + * aggregation, or marking the metaslab for initialization. + */ kmutex_t ms_lock; + + /* + * Acquired together with the ms_lock whenever we expect to + * write to metaslab data on-disk (i.e flushing entries to + * the metaslab's space map). It helps coordinate readers of + * the metaslab's space map [see spa_vdev_remove_thread()] + * with writers [see metaslab_sync()]. + * + * Note that metaslab_load(), even though a reader, uses + * a completely different mechanism to deal with the reading + * of the metaslab's space map based on ms_synced_length. That + * said, the function still uses the ms_sync_lock after it + * has read the ms_sm [see relevant comment in metaslab_load() + * as to why]. + */ kmutex_t ms_sync_lock; + kcondvar_t ms_load_cv; space_map_t *ms_sm; uint64_t ms_id; @@ -351,6 +377,7 @@ struct metaslab { range_tree_t *ms_allocating[TXG_SIZE]; range_tree_t *ms_allocatable; + uint64_t ms_allocated_this_txg; /* * The following range trees are accessed only from syncing context. @@ -375,6 +402,12 @@ struct metaslab { boolean_t ms_loaded; boolean_t ms_loading; + /* + * Tracks the exact amount of allocated space of this metaslab + * (and specifically the metaslab's space map) up to the most + * recently completed sync pass [see usage in metaslab_sync()]. + */ + uint64_t ms_allocated_space; int64_t ms_deferspace; /* sum of ms_defermap[] space */ uint64_t ms_weight; /* weight vs. others in group */ uint64_t ms_activation_weight; /* activation weight */ @@ -411,6 +444,9 @@ struct metaslab { avl_node_t ms_group_node; /* node in metaslab group tree */ txg_node_t ms_txg_node; /* per-txg dirty metaslab links */ + /* updated every time we are done syncing the metaslab's space map */ + uint64_t ms_synced_length; + boolean_t ms_new; }; diff --git a/include/sys/space_map.h b/include/sys/space_map.h index 64c97bb4d..52536cccc 100644 --- a/include/sys/space_map.h +++ b/include/sys/space_map.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ #ifndef _SYS_SPACE_MAP_H @@ -55,10 +55,17 @@ extern "C" { * for backward compatibility. */ typedef struct space_map_phys { - uint64_t smp_object; /* on-disk space map object */ - uint64_t smp_objsize; /* size of the object */ - int64_t smp_alloc; /* space allocated from the map */ - uint64_t smp_pad[5]; /* reserved */ + /* object number: not needed but kept for backwards compatibility */ + uint64_t smp_object; + + /* length of the object in bytes */ + uint64_t smp_length; + + /* space allocated from the map */ + int64_t smp_alloc; + + /* reserved */ + uint64_t smp_pad[5]; /* * The smp_histogram maintains a histogram of free regions. Each @@ -81,8 +88,6 @@ typedef struct space_map { uint64_t sm_start; /* start of map */ uint64_t sm_size; /* size of map */ uint8_t sm_shift; /* unit shift */ - uint64_t sm_length; /* synced length */ - int64_t sm_alloc; /* synced space allocated */ objset_t *sm_os; /* objset for this map */ uint64_t sm_object; /* object id for this map */ uint32_t sm_blksz; /* block size for space map */ @@ -189,7 +194,10 @@ boolean_t sm_entry_is_double_word(uint64_t e); typedef int (*sm_cb_t)(space_map_entry_t *sme, void *arg); int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype); -int space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg); +int space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype, + uint64_t length); +int space_map_iterate(space_map_t *sm, uint64_t length, + sm_cb_t callback, void *arg); int space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg, dmu_tx_t *tx); @@ -197,10 +205,8 @@ void space_map_histogram_clear(space_map_t *sm); void space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx); -void space_map_update(space_map_t *sm); - uint64_t space_map_object(space_map_t *sm); -uint64_t space_map_allocated(space_map_t *sm); +int64_t space_map_allocated(space_map_t *sm); uint64_t space_map_length(space_map_t *sm); void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, @@ -216,8 +222,6 @@ int space_map_open(space_map_t **smp, objset_t *os, uint64_t object, uint64_t start, uint64_t size, uint8_t shift); void space_map_close(space_map_t *sm); -int64_t space_map_alloc_delta(space_map_t *sm); - #ifdef __cplusplus } #endif |