Improve meta data performance

Profiling the system during meta data intensive workloads such as creating/removing millions of files, revealed that the system was cpu bound. A large fraction of that cpu time was being spent waiting on the virtual address space spin lock. It turns out this was caused by certain heavily used kmem_caches being backed by virtual memory. By default a kmem_cache will dynamically determine the type of memory used based on the object size. For large objects virtual memory is usually preferable and for small object physical memory is a better choice. See the spl_slab_alloc() function for a longer discussion on this. However, there is a certain amount of gray area when defining a 'large' object. For the following caches it turns out they were just over the line: * dnode_cache * zio_cache * zio_link_cache * zio_buf_512_cache * zfs_data_buf_512_cache Now because we know there will be a lot of churn in these caches, and because we know the slabs will still be reasonably sized. We can safely request with the KMC_KMEM flag that the caches be backed with physical memory addresses. This entirely avoids the need to serialize on the virtual address space lock. As a bonus this also reduces our vmalloc usage which will be good for 32-bit kernels which have a very small virtual address space. It will also probably be good for interactive performance since unrelated processes could also block of this same global lock. Finally, we may see less cpu time being burned in the arc_reclaim and txg_sync_threads. Signed-off-by: Brian Behlendorf <[email protected]> Issue #258
author: Brian Behlendorf <[email protected]> 2011-11-01 16:56:48 -0700
committer: Brian Behlendorf <[email protected]> 2011-11-03 10:19:21 -0700
commit: ae6ba3dbe618bb7dbc46f2a3fb54c58243835d6b (patch)
tree: 685ac79b11ac9b17218f826c60515625eb913226
parent: 6a95d0b74c2951f0dc82361ea279f64a7349f060 (diff)
3 files changed, 22 insertions, 11 deletions
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 1027e7215..a32848941 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -329,6 +329,8 @@ extern void kstat_delete(kstat_t *);
 #define	KM_NOSLEEP		UMEM_DEFAULT
 #define	KM_NODEBUG		0x0
 #define	KMC_NODEBUG		UMC_NODEBUG
+#define	KMC_KMEM		0x0
+#define	KMC_VMEM		0x0
 #define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
 #define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
 #define	kmem_free(_b, _s)	umem_free(_b, _s)
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 9889c3c36..5438f60d0 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -171,9 +171,8 @@ void
 dnode_init(void)
 {
 	ASSERT(dnode_cache == NULL);
-	dnode_cache = kmem_cache_create("dnode_t",
-	    sizeof (dnode_t),
-	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
+	dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t),
+	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_KMEM);
 	kmem_cache_set_move(dnode_cache, dnode_move);
 }
 
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 0022c64cc..6b03be6f3 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -108,9 +108,9 @@ zio_init(void)
 	data_alloc_arena = zio_alloc_arena;
 #endif
 	zio_cache = kmem_cache_create("zio_cache",
-	    sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+	    sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
 	zio_link_cache = kmem_cache_create("zio_link_cache",
-	    sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+	    sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
 
 	/*
 	 * For small buffers, we want a cache for each multiple of
@@ -136,17 +136,27 @@ zio_init(void)
 
 		if (align != 0) {
 			char name[36];
+			int flags = zio_bulk_flags;
+
+			/*
+			 * The smallest buffers (512b) are heavily used and
+			 * experience a lot of churn.  The slabs allocated
+			 * for them are also relatively small (32K).  Thus
+			 * in over to avoid expensive calls to vmalloc() we
+			 * make an exception to the usual slab allocation
+			 * policy and force these buffers to be kmem backed.
+			 */
+			if (size == (1 << SPA_MINBLOCKSHIFT))
+				flags |= KMC_KMEM;
+
 			(void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
 			zio_buf_cache[c] = kmem_cache_create(name, size,
-			    align, NULL, NULL, NULL, NULL, NULL,
-			    (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) |
-			    zio_bulk_flags);
+			    align, NULL, NULL, NULL, NULL, NULL, flags);
 
 			(void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
 			zio_data_buf_cache[c] = kmem_cache_create(name, size,
-			    align, NULL, NULL, NULL, NULL, data_alloc_arena,
-			    (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) |
-			    zio_bulk_flags);
+			    align, NULL, NULL, NULL, NULL,
+			    data_alloc_arena, flags);
 		}
 	}
author	Brian Behlendorf <[email protected]>	2011-11-01 16:56:48 -0700
committer	Brian Behlendorf <[email protected]>	2011-11-03 10:19:21 -0700
commit	ae6ba3dbe618bb7dbc46f2a3fb54c58243835d6b (patch)
tree	685ac79b11ac9b17218f826c60515625eb913226
parent	6a95d0b74c2951f0dc82361ea279f64a7349f060 (diff)