Improve spl slab cache alloc

The policy is to try to allocate with KM_NOSLEEP, which will lead to memory allocation with GFP_ATOMIC, and if it fails, it will launch an taskq to expand slab space. This way it should be able to get better NUMA memory locality and reduce the overhead of context switch. Signed-off-by: Jinshan Xiong <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #551
author: Jinshan Xiong <[email protected]> 2016-05-19 10:59:40 -0700
committer: Brian Behlendorf <[email protected]> 2016-06-01 10:26:42 -0700
commit: 16fc1ec3ba0438b1d657b421923b3969031f2678 (patch)
tree: 737b9b2c9196b9a6631848e83725469ca40ac9b5 /module/spl
parent: ea5f1a200b974c8fdd51993c282d8ae0dc2aa871 (diff)
1 files changed, 35 insertions, 8 deletions
diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c
index e3edca5a0..99967b14f 100644
--- a/module/spl/spl-kmem-cache.c
+++ b/module/spl/spl-kmem-cache.c
@@ -1149,15 +1149,13 @@ spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
  * It is responsible for allocating a new slab, linking it in to the list
  * of partial slabs, and then waking any waiters.
  */
-static void
-spl_cache_grow_work(void *data)
+static int
+__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
 {
-	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
-	spl_kmem_cache_t *skc = ska->ska_cache;
 	spl_kmem_slab_t *sks;
 
 	fstrans_cookie_t cookie = spl_fstrans_mark();
-	sks = spl_slab_alloc(skc, ska->ska_flags);
+	sks = spl_slab_alloc(skc, flags);
 	spl_fstrans_unmark(cookie);
 
 	spin_lock(&skc->skc_lock);
@@ -1165,15 +1163,29 @@ spl_cache_grow_work(void *data)
 		skc->skc_slab_total++;
 		skc->skc_obj_total += sks->sks_objs;
 		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+
+		smp_mb__before_atomic();
+		clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+		smp_mb__after_atomic();
+		wake_up_all(&skc->skc_waitq);
 	}
+	spin_unlock(&skc->skc_lock);
+
+	return (sks == NULL ? -ENOMEM : 0);
+}
+
+static void
+spl_cache_grow_work(void *data)
+{
+	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+	spl_kmem_cache_t *skc = ska->ska_cache;
+
+	(void)__spl_cache_grow(skc, ska->ska_flags);
 
 	atomic_dec(&skc->skc_ref);
 	smp_mb__before_atomic();
 	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
-	clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
 	smp_mb__after_atomic();
-	wake_up_all(&skc->skc_waitq);
-	spin_unlock(&skc->skc_lock);
 
 	kfree(ska);
 }
@@ -1214,6 +1226,21 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
 	}
 
 	/*
+	 * To reduce the overhead of context switch and improve NUMA locality,
+	 * it tries to allocate a new slab in the current process context with
+	 * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
+	 * allocation.
+	 *
+	 * However, this can't be applied to KVM_VMEM due to a bug that
+	 * __vmalloc() doesn't honor gfp flags in page table allocation.
+	 */
+	if (!(skc->skc_flags & KMC_VMEM)) {
+		rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
+		if (rc == 0)
+			return (0);
+	}
+
+	/*
 	 * This is handled by dispatching a work request to the global work
 	 * queue.  This allows us to asynchronously allocate a new slab while
 	 * retaining the ability to safely fall back to a smaller synchronous
author	Jinshan Xiong <[email protected]>	2016-05-19 10:59:40 -0700
committer	Brian Behlendorf <[email protected]>	2016-06-01 10:26:42 -0700
commit	16fc1ec3ba0438b1d657b421923b3969031f2678 (patch)
tree	737b9b2c9196b9a6631848e83725469ca40ac9b5 /module/spl
parent	ea5f1a200b974c8fdd51993c282d8ae0dc2aa871 (diff)