diff options
author | Don Brady <[email protected]> | 2017-01-12 12:52:56 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2017-01-12 11:52:56 -0800 |
commit | 4e21fd060a567a9c8a1bd0e640985412181c1e33 (patch) | |
tree | 622424c5f98ac43cd93d8af221775a77df161856 /include/sys/metaslab_impl.h | |
parent | 5727b00e06a4208a7040489d582f13bc3c183384 (diff) |
OpenZFS 7303 - dynamic metaslab selection
This change introduces a new weighting algorithm to improve
metaslab selection. The new weighting algorithm relies on the
SPACEMAP_HISTOGRAM feature. As a result, the metaslab weight
now encodes the type of weighting algorithm used (size-based
vs segment-based).
Porting Notes: The metaslab allocation tracing code is conditionally
removed on linux (dependent on mdb debugger).
Authored by: George Wilson <[email protected]>
Reviewed by: Alex Reece <[email protected]>
Reviewed by: Chris Siden <[email protected]>
Reviewed by: Dan Kimmel <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Paul Dagnelie <[email protected]>
Reviewed by: Pavel Zakharov [email protected]
Reviewed by: Prakash Surya <[email protected]>
Reviewed by: Don Brady <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Ported-by: Don Brady <[email protected]>
OpenZFS-issue: https://www.illumos.org/issues/7303
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/d5190931bd
Closes #5404
Diffstat (limited to 'include/sys/metaslab_impl.h')
-rw-r--r-- | include/sys/metaslab_impl.h | 108 |
1 files changed, 105 insertions, 3 deletions
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 080a1db9a..7b68b51f6 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H @@ -42,6 +42,94 @@ extern "C" { #endif /* + * Metaslab allocation tracing record. + */ +typedef struct metaslab_alloc_trace { + list_node_t mat_list_node; + metaslab_group_t *mat_mg; + metaslab_t *mat_msp; + uint64_t mat_size; + uint64_t mat_weight; + uint32_t mat_dva_id; + uint64_t mat_offset; +} metaslab_alloc_trace_t; + +/* + * Used by the metaslab allocation tracing facility to indicate + * error conditions. These errors are stored to the offset member + * of the metaslab_alloc_trace_t record and displayed by mdb. + */ +typedef enum trace_alloc_type { + TRACE_ALLOC_FAILURE = -1ULL, + TRACE_TOO_SMALL = -2ULL, + TRACE_FORCE_GANG = -3ULL, + TRACE_NOT_ALLOCATABLE = -4ULL, + TRACE_GROUP_FAILURE = -5ULL, + TRACE_ENOSPC = -6ULL, + TRACE_CONDENSING = -7ULL, + TRACE_VDEV_ERROR = -8ULL +} trace_alloc_type_t; + +#define METASLAB_WEIGHT_PRIMARY (1ULL << 63) +#define METASLAB_WEIGHT_SECONDARY (1ULL << 62) +#define METASLAB_WEIGHT_TYPE (1ULL << 61) +#define METASLAB_ACTIVE_MASK \ + (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY) + +/* + * The metaslab weight is used to encode the amount of free space in a + * metaslab, such that the "best" metaslab appears first when sorting the + * metaslabs by weight. The weight (and therefore the "best" metaslab) can + * be determined in two different ways: by computing a weighted sum of all + * the free space in the metaslab (a space based weight) or by counting only + * the free segments of the largest size (a segment based weight). We prefer + * the segment based weight because it reflects how the free space is + * comprised, but we cannot always use it -- legacy pools do not have the + * space map histogram information necessary to determine the largest + * contiguous regions. Pools that have the space map histogram determine + * the segment weight by looking at each bucket in the histogram and + * determining the free space whose size in bytes is in the range: + * [2^i, 2^(i+1)) + * We then encode the largest index, i, that contains regions into the + * segment-weighted value. + * + * Space-based weight: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |PS1| weighted-free space | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * PS - indicates primary and secondary activation + * space - the fragmentation-weighted space + * + * Segment-based weight: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |PS0| idx| count of segments in region | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * PS - indicates primary and secondary activation + * idx - index for the highest bucket in the histogram + * count - number of segments in the specified bucket + */ +#define WEIGHT_GET_ACTIVE(weight) BF64_GET((weight), 62, 2) +#define WEIGHT_SET_ACTIVE(weight, x) BF64_SET((weight), 62, 2, x) + +#define WEIGHT_IS_SPACEBASED(weight) \ + ((weight) == 0 || BF64_GET((weight), 61, 1)) +#define WEIGHT_SET_SPACEBASED(weight) BF64_SET((weight), 61, 1, 1) + +/* + * These macros are only applicable to segment-based weighting. + */ +#define WEIGHT_GET_INDEX(weight) BF64_GET((weight), 55, 6) +#define WEIGHT_SET_INDEX(weight, x) BF64_SET((weight), 55, 6, x) +#define WEIGHT_GET_COUNT(weight) BF64_GET((weight), 0, 55) +#define WEIGHT_SET_COUNT(weight, x) BF64_SET((weight), 0, 55, x) + +/* * A metaslab class encompasses a category of allocatable top-level vdevs. * Each top-level vdev is associated with a metaslab group which defines * the allocatable region for that vdev. Examples of these categories include @@ -220,7 +308,6 @@ struct metaslab { kmutex_t ms_lock; kcondvar_t ms_load_cv; space_map_t *ms_sm; - metaslab_ops_t *ms_ops; uint64_t ms_id; uint64_t ms_start; uint64_t ms_size; @@ -233,12 +320,27 @@ struct metaslab { boolean_t ms_condensing; /* condensing? */ boolean_t ms_condense_wanted; + + /* + * We must hold both ms_lock and ms_group->mg_lock in order to + * modify ms_loaded. + */ boolean_t ms_loaded; boolean_t ms_loading; int64_t ms_deferspace; /* sum of ms_defermap[] space */ uint64_t ms_weight; /* weight vs. others in group */ - uint64_t ms_access_txg; + uint64_t ms_activation_weight; /* activation weight */ + + /* + * Track of whenever a metaslab is selected for loading or allocation. + * We use this value to determine how long the metaslab should + * stay cached. + */ + uint64_t ms_selected_txg; + + uint64_t ms_alloc_txg; /* last successful alloc (debug only) */ + uint64_t ms_max_size; /* maximum allocatable size */ /* * The metaslab block allocators can optionally use a size-ordered |