Optimize allocation throttling

Remove mc_lock use from metaslab_class_throttle_*(). The math there is based on refcounts and so atomic, so the only race possible there is between zfs_refcount_count() and zfs_refcount_add(). But in most cases metaslab_class_throttle_reserve() is called with the allocator lock held, which covers the race. In cases where the lock is not held, GANG_ALLOCATION() or METASLAB_MUST_RESERVE are set, and so we do not use zfs_refcount_count(). And even if we assume some other non-existing scenario, the worst that may happen from this race is few more I/Os get to allocation earlier, that is not a problem. Move locks and data of different allocators into different cache lines to avoid false sharing. Group spa_alloc_* arrays together into single array of aligned struct spa_alloc spa_allocs. Align struct metaslab_class_allocator. Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Ryan Moeller <[email protected]> Reviewed-by: Don Brady <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Closes #12314
author: Alexander Motin <[email protected]> 2021-07-21 08:40:36 -0400
committer: Tony Hutter <[email protected]> 2021-09-14 12:40:15 -0700
commit: 32c0b6468cbcfbd6c2c4bc08f88f34e016b4f184 (patch)
tree: d1c1adb19a5db5faee4aa2f32959900632a8e8b0 /include
parent: 7c61e1ef9d9f6c5fa6a3665a88838a19120cf07b (diff)
2 files changed, 10 insertions, 7 deletions
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index 9924c3ba0..adf4c03a2 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -157,7 +157,7 @@ typedef struct metaslab_class_allocator {
 	 */
 	uint64_t		mca_alloc_max_slots;
 	zfs_refcount_t		mca_alloc_slots;
-} metaslab_class_allocator_t;
+} ____cacheline_aligned metaslab_class_allocator_t;
 
 /*
  * A metaslab class encompasses a category of allocatable top-level vdevs.
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index bc88cfa15..cb2c49e58 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -57,6 +57,11 @@
 extern "C" {
 #endif
 
+typedef struct spa_alloc {
+	kmutex_t	spaa_lock;
+	avl_tree_t	spaa_tree;
+} ____cacheline_aligned spa_alloc_t;
+
 typedef struct spa_error_entry {
 	zbookmark_phys_t	se_bookmark;
 	char			*se_name;
@@ -250,13 +255,11 @@ struct spa {
 	list_t		spa_config_dirty_list;	/* vdevs with dirty config */
 	list_t		spa_state_dirty_list;	/* vdevs with dirty state */
 	/*
-	 * spa_alloc_locks and spa_alloc_trees are arrays, whose lengths are
-	 * stored in spa_alloc_count. There is one tree and one lock for each
-	 * allocator, to help improve allocation performance in write-heavy
-	 * workloads.
+	 * spa_allocs is an array, whose lengths is stored in spa_alloc_count.
+	 * There is one tree and one lock for each allocator, to help improve
+	 * allocation performance in write-heavy workloads.
 	 */
-	kmutex_t	*spa_alloc_locks;
-	avl_tree_t	*spa_alloc_trees;
+	spa_alloc_t	*spa_allocs;
 	int		spa_alloc_count;
 
 	spa_aux_vdev_t	spa_spares;		/* hot spares */
author	Alexander Motin <[email protected]>	2021-07-21 08:40:36 -0400
committer	Tony Hutter <[email protected]>	2021-09-14 12:40:15 -0700
commit	32c0b6468cbcfbd6c2c4bc08f88f34e016b4f184 (patch)
tree	d1c1adb19a5db5faee4aa2f32959900632a8e8b0 /include
parent	7c61e1ef9d9f6c5fa6a3665a88838a19120cf07b (diff)