aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/metaslab_impl.h15
-rw-r--r--module/zfs/metaslab.c95
-rw-r--r--module/zfs/spa.c9
3 files changed, 62 insertions, 57 deletions
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index d140f741d..4a7475256 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -204,6 +204,16 @@ struct metaslab_class {
};
/*
+ * Per-allocator data structure.
+ */
+typedef struct metaslab_group_allocator {
+ uint64_t mga_cur_max_alloc_queue_depth;
+ zfs_refcount_t mga_alloc_queue_depth;
+ metaslab_t *mga_primary;
+ metaslab_t *mga_secondary;
+} metaslab_group_allocator_t;
+
+/*
* Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs)
* of a top-level vdev. They are linked together to form a circular linked
* list and can belong to only one metaslab class. Metaslab groups may become
@@ -214,8 +224,6 @@ struct metaslab_class {
*/
struct metaslab_group {
kmutex_t mg_lock;
- metaslab_t **mg_primaries;
- metaslab_t **mg_secondaries;
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
boolean_t mg_allocatable; /* can we allocate? */
@@ -263,9 +271,8 @@ struct metaslab_group {
* groups are unable to handle their share of allocations.
*/
uint64_t mg_max_alloc_queue_depth;
- uint64_t *mg_cur_max_alloc_queue_depth;
- zfs_refcount_t *mg_alloc_queue_depth;
int mg_allocators;
+ metaslab_group_allocator_t *mg_allocator; /* array */
/*
* A metalab group that can no longer allocate the minimum block
* size will set mg_no_free_space. Once a metaslab group is out
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index 2fc017b5b..1fc44399f 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -814,10 +814,6 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&mg->mg_ms_disabled_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&mg->mg_ms_disabled_cv, NULL, CV_DEFAULT, NULL);
- mg->mg_primaries = kmem_zalloc(allocators * sizeof (metaslab_t *),
- KM_SLEEP);
- mg->mg_secondaries = kmem_zalloc(allocators * sizeof (metaslab_t *),
- KM_SLEEP);
avl_create(&mg->mg_metaslab_tree, metaslab_compare,
sizeof (metaslab_t), offsetof(metaslab_t, ms_group_node));
mg->mg_vd = vd;
@@ -827,13 +823,11 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
mg->mg_no_free_space = B_TRUE;
mg->mg_allocators = allocators;
- mg->mg_alloc_queue_depth = kmem_zalloc(allocators *
- sizeof (zfs_refcount_t), KM_SLEEP);
- mg->mg_cur_max_alloc_queue_depth = kmem_zalloc(allocators *
- sizeof (uint64_t), KM_SLEEP);
+ mg->mg_allocator = kmem_zalloc(allocators *
+ sizeof (metaslab_group_allocator_t), KM_SLEEP);
for (int i = 0; i < allocators; i++) {
- zfs_refcount_create_tracked(&mg->mg_alloc_queue_depth[i]);
- mg->mg_cur_max_alloc_queue_depth[i] = 0;
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[i];
+ zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
}
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
@@ -856,21 +850,16 @@ metaslab_group_destroy(metaslab_group_t *mg)
taskq_destroy(mg->mg_taskq);
avl_destroy(&mg->mg_metaslab_tree);
- kmem_free(mg->mg_primaries, mg->mg_allocators * sizeof (metaslab_t *));
- kmem_free(mg->mg_secondaries, mg->mg_allocators *
- sizeof (metaslab_t *));
mutex_destroy(&mg->mg_lock);
mutex_destroy(&mg->mg_ms_disabled_lock);
cv_destroy(&mg->mg_ms_disabled_cv);
for (int i = 0; i < mg->mg_allocators; i++) {
- zfs_refcount_destroy(&mg->mg_alloc_queue_depth[i]);
- mg->mg_cur_max_alloc_queue_depth[i] = 0;
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[i];
+ zfs_refcount_destroy(&mga->mga_alloc_queue_depth);
}
- kmem_free(mg->mg_alloc_queue_depth, mg->mg_allocators *
- sizeof (zfs_refcount_t));
- kmem_free(mg->mg_cur_max_alloc_queue_depth, mg->mg_allocators *
- sizeof (uint64_t));
+ kmem_free(mg->mg_allocator, mg->mg_allocators *
+ sizeof (metaslab_group_allocator_t));
kmem_free(mg, sizeof (metaslab_group_t));
}
@@ -951,14 +940,15 @@ metaslab_group_passivate(metaslab_group_t *mg)
spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
metaslab_group_alloc_update(mg);
for (int i = 0; i < mg->mg_allocators; i++) {
- metaslab_t *msp = mg->mg_primaries[i];
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[i];
+ metaslab_t *msp = mga->mga_primary;
if (msp != NULL) {
mutex_enter(&msp->ms_lock);
metaslab_passivate(msp,
metaslab_weight_from_range_tree(msp));
mutex_exit(&msp->ms_lock);
}
- msp = mg->mg_secondaries[i];
+ msp = mga->mga_secondary;
if (msp != NULL) {
mutex_enter(&msp->ms_lock);
metaslab_passivate(msp,
@@ -1218,9 +1208,9 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
* regardless of the mg_allocatable or throttle settings.
*/
if (mg->mg_allocatable) {
- metaslab_group_t *mgp;
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
int64_t qdepth;
- uint64_t qmax = mg->mg_cur_max_alloc_queue_depth[allocator];
+ uint64_t qmax = mga->mga_cur_max_alloc_queue_depth;
if (!mc->mc_alloc_throttle_enabled)
return (B_TRUE);
@@ -1239,8 +1229,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
*/
qmax = qmax * (4 + d) / 4;
- qdepth = zfs_refcount_count(
- &mg->mg_alloc_queue_depth[allocator]);
+ qdepth = zfs_refcount_count(&mga->mga_alloc_queue_depth);
/*
* If this metaslab group is below its qmax or it's
@@ -1258,11 +1247,14 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
* racy since we can't hold the locks for all metaslab
* groups at the same time when we make this check.
*/
- for (mgp = mg->mg_next; mgp != rotor; mgp = mgp->mg_next) {
- qmax = mgp->mg_cur_max_alloc_queue_depth[allocator];
+ for (metaslab_group_t *mgp = mg->mg_next;
+ mgp != rotor; mgp = mgp->mg_next) {
+ metaslab_group_allocator_t *mgap =
+ &mgp->mg_allocator[allocator];
+ qmax = mgap->mga_cur_max_alloc_queue_depth;
qmax = qmax * (4 + d) / 4;
- qdepth = zfs_refcount_count(
- &mgp->mg_alloc_queue_depth[allocator]);
+ qdepth =
+ zfs_refcount_count(&mgap->mga_alloc_queue_depth);
/*
* If there is another metaslab group that
@@ -3205,6 +3197,7 @@ static int
metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
int allocator, uint64_t activation_weight)
{
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
ASSERT(MUTEX_HELD(&msp->ms_lock));
/*
@@ -3219,16 +3212,16 @@ metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
return (0);
}
- metaslab_t **arr = (activation_weight == METASLAB_WEIGHT_PRIMARY ?
- mg->mg_primaries : mg->mg_secondaries);
+ metaslab_t **mspp = (activation_weight == METASLAB_WEIGHT_PRIMARY ?
+ &mga->mga_primary : &mga->mga_secondary);
mutex_enter(&mg->mg_lock);
- if (arr[allocator] != NULL) {
+ if (*mspp != NULL) {
mutex_exit(&mg->mg_lock);
return (EEXIST);
}
- arr[allocator] = msp;
+ *mspp = msp;
ASSERT3S(msp->ms_allocator, ==, -1);
msp->ms_allocator = allocator;
msp->ms_primary = (activation_weight == METASLAB_WEIGHT_PRIMARY);
@@ -3237,7 +3230,6 @@ metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
msp->ms_activation_weight = msp->ms_weight;
metaslab_group_sort_impl(mg, msp,
msp->ms_weight | activation_weight);
-
mutex_exit(&mg->mg_lock);
return (0);
@@ -3337,14 +3329,15 @@ metaslab_passivate_allocator(metaslab_group_t *mg, metaslab_t *msp,
ASSERT3S(0, <=, msp->ms_allocator);
ASSERT3U(msp->ms_allocator, <, mg->mg_allocators);
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[msp->ms_allocator];
if (msp->ms_primary) {
- ASSERT3P(mg->mg_primaries[msp->ms_allocator], ==, msp);
+ ASSERT3P(mga->mga_primary, ==, msp);
ASSERT(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
- mg->mg_primaries[msp->ms_allocator] = NULL;
+ mga->mga_primary = NULL;
} else {
- ASSERT3P(mg->mg_secondaries[msp->ms_allocator], ==, msp);
+ ASSERT3P(mga->mga_secondary, ==, msp);
ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
- mg->mg_secondaries[msp->ms_allocator] = NULL;
+ mga->mga_secondary = NULL;
}
msp->ms_allocator = -1;
metaslab_group_sort_impl(mg, msp, weight);
@@ -4493,22 +4486,24 @@ metaslab_group_alloc_increment(spa_t *spa, uint64_t vdev, void *tag, int flags,
if (!mg->mg_class->mc_alloc_throttle_enabled)
return;
- (void) zfs_refcount_add(&mg->mg_alloc_queue_depth[allocator], tag);
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+ (void) zfs_refcount_add(&mga->mga_alloc_queue_depth, tag);
}
static void
metaslab_group_increment_qdepth(metaslab_group_t *mg, int allocator)
{
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
uint64_t max = mg->mg_max_alloc_queue_depth;
- uint64_t cur = mg->mg_cur_max_alloc_queue_depth[allocator];
+ uint64_t cur = mga->mga_cur_max_alloc_queue_depth;
while (cur < max) {
- if (atomic_cas_64(&mg->mg_cur_max_alloc_queue_depth[allocator],
+ if (atomic_cas_64(&mga->mga_cur_max_alloc_queue_depth,
cur, cur + 1) == cur) {
atomic_inc_64(
&mg->mg_class->mc_alloc_max_slots[allocator]);
return;
}
- cur = mg->mg_cur_max_alloc_queue_depth[allocator];
+ cur = mga->mga_cur_max_alloc_queue_depth;
}
}
@@ -4524,7 +4519,8 @@ metaslab_group_alloc_decrement(spa_t *spa, uint64_t vdev, void *tag, int flags,
if (!mg->mg_class->mc_alloc_throttle_enabled)
return;
- (void) zfs_refcount_remove(&mg->mg_alloc_queue_depth[allocator], tag);
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+ (void) zfs_refcount_remove(&mga->mga_alloc_queue_depth, tag);
if (io_complete)
metaslab_group_increment_qdepth(mg, allocator);
}
@@ -4540,8 +4536,8 @@ metaslab_group_alloc_verify(spa_t *spa, const blkptr_t *bp, void *tag,
for (int d = 0; d < ndvas; d++) {
uint64_t vdev = DVA_GET_VDEV(&dva[d]);
metaslab_group_t *mg = vdev_lookup_top(spa, vdev)->vdev_mg;
- VERIFY(zfs_refcount_not_held(
- &mg->mg_alloc_queue_depth[allocator], tag));
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
+ VERIFY(zfs_refcount_not_held(&mga->mga_alloc_queue_depth, tag));
}
#endif
}
@@ -4716,6 +4712,7 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
*/
if (mg->mg_ms_ready < mg->mg_allocators * 3)
allocator = 0;
+ metaslab_group_allocator_t *mga = &mg->mg_allocator[allocator];
ASSERT3U(mg->mg_vd->vdev_ms_count, >=, 2);
@@ -4737,8 +4734,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
mutex_enter(&mg->mg_lock);
if (activation_weight == METASLAB_WEIGHT_PRIMARY &&
- mg->mg_primaries[allocator] != NULL) {
- msp = mg->mg_primaries[allocator];
+ mga->mga_primary != NULL) {
+ msp = mga->mga_primary;
/*
* Even though we don't hold the ms_lock for the
@@ -4753,8 +4750,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
was_active = B_TRUE;
ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
} else if (activation_weight == METASLAB_WEIGHT_SECONDARY &&
- mg->mg_secondaries[allocator] != NULL) {
- msp = mg->mg_secondaries[allocator];
+ mga->mga_secondary != NULL) {
+ msp = mga->mga_secondary;
/*
* See comment above about the similar assertions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index aface90af..bd1e091ca 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -8720,13 +8720,14 @@ spa_sync_adjust_vdev_max_queue_depth(spa_t *spa)
* allocations look at mg_max_alloc_queue_depth, and async
* allocations all happen from spa_sync().
*/
- for (int i = 0; i < spa->spa_alloc_count; i++)
+ for (int i = 0; i < mg->mg_allocators; i++) {
ASSERT0(zfs_refcount_count(
- &(mg->mg_alloc_queue_depth[i])));
+ &(mg->mg_allocator[i].mga_alloc_queue_depth)));
+ }
mg->mg_max_alloc_queue_depth = max_queue_depth;
- for (int i = 0; i < spa->spa_alloc_count; i++) {
- mg->mg_cur_max_alloc_queue_depth[i] =
+ for (int i = 0; i < mg->mg_allocators; i++) {
+ mg->mg_allocator[i].mga_cur_max_alloc_queue_depth =
zfs_vdev_def_queue_depth;
}
slots_per_allocator += zfs_vdev_def_queue_depth;