2 files changed, 95 insertions, 11 deletions
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
index 24bf084b9..7e423100d 100644
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -203,7 +203,23 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
 		ASSERT(ISP2(size));
 		ptr = (void *)__get_free_pages(lflags, get_order(size));
 	} else {
-		ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
+		/*
+		 * GFP_KERNEL allocations can safely use kvmalloc which may
+		 * improve performance by avoiding a) high latency caused by
+		 * vmalloc's on-access allocation, b) performance loss due to
+		 * MMU memory address mapping and c) vmalloc locking overhead.
+		 * This has the side-effect that the slab statistics will
+		 * incorrectly report this as a vmem allocation, but that is
+		 * purely cosmetic.
+		 *
+		 * For non-GFP_KERNEL allocations we stick to __vmalloc.
+		 */
+		if ((lflags & GFP_KERNEL) == GFP_KERNEL) {
+			ptr = spl_kvmalloc(size, lflags);
+		} else {
+			ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
+			    PAGE_KERNEL);
+		}
 	}
 
 	/* Resulting allocated memory will be page aligned */
@@ -231,7 +247,7 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 		ASSERT(ISP2(size));
 		free_pages((unsigned long)ptr, get_order(size));
 	} else {
-		vfree(ptr);
+		spl_kmem_free_impl(ptr, size);
 	}
 }
 
diff --git a/module/os/linux/spl/spl-kmem.c b/module/os/linux/spl/spl-kmem.c
index 8a32929c8..d2799b5bd 100644
--- a/module/os/linux/spl/spl-kmem.c
+++ b/module/os/linux/spl/spl-kmem.c
@@ -133,6 +133,73 @@ kmem_strfree(char *str)
 }
 EXPORT_SYMBOL(kmem_strfree);
 
+/* Kernel compatibility for <4.13 */
+#ifndef __GFP_RETRY_MAYFAIL
+#define	__GFP_RETRY_MAYFAIL	__GFP_REPEAT
+#endif
+
+void *
+spl_kvmalloc(size_t size, gfp_t lflags)
+{
+#ifdef HAVE_KVMALLOC
+	/*
+	 * GFP_KERNEL allocations can safely use kvmalloc which may
+	 * improve performance by avoiding a) high latency caused by
+	 * vmalloc's on-access allocation, b) performance loss due to
+	 * MMU memory address mapping and c) vmalloc locking overhead.
+	 * This has the side-effect that the slab statistics will
+	 * incorrectly report this as a vmem allocation, but that is
+	 * purely cosmetic.
+	 */
+	if ((lflags & GFP_KERNEL) == GFP_KERNEL)
+		return (kvmalloc(size, lflags));
+#endif
+
+	gfp_t kmalloc_lflags = lflags;
+
+	if (size > PAGE_SIZE) {
+		/*
+		 * We need to set __GFP_NOWARN here since spl_kvmalloc is not
+		 * only called by spl_kmem_alloc_impl but can be called
+		 * directly with custom lflags, too. In that case
+		 * kmem_flags_convert does not get called, which would
+		 * implicitly set __GFP_NOWARN.
+		 */
+		kmalloc_lflags |= __GFP_NOWARN;
+
+		/*
+		 * N.B. __GFP_RETRY_MAYFAIL is supported only for large
+		 * e (>32kB) allocations.
+		 *
+		 * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY
+		 * for !costly requests because there is no other way to tell
+		 * the allocator that we want to fail rather than retry
+		 * endlessly.
+		 */
+		if (!(kmalloc_lflags & __GFP_RETRY_MAYFAIL) ||
+		    (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+			kmalloc_lflags |= __GFP_NORETRY;
+		}
+	}
+
+	/*
+	 * We first try kmalloc - even for big sizes - and fall back to
+	 * __vmalloc if that fails.
+	 *
+	 * For non-GFP_KERNEL allocations we always stick to kmalloc_node,
+	 * and fail when kmalloc is not successful (returns NULL).
+	 * We cannot fall back to __vmalloc in this case because __vmalloc
+	 * internally uses GPF_KERNEL allocations.
+	 */
+	void *ptr = kmalloc_node(size, kmalloc_lflags, NUMA_NO_NODE);
+	if (ptr || size <= PAGE_SIZE ||
+	    (lflags & GFP_KERNEL) != GFP_KERNEL) {
+		return (ptr);
+	}
+
+	return (__vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL));
+}
+
 /*
  * General purpose unified implementation of kmem_alloc(). It is an
  * amalgamation of Linux and Illumos allocator design. It should never be
@@ -144,7 +211,6 @@ inline void *
 spl_kmem_alloc_impl(size_t size, int flags, int node)
 {
 	gfp_t lflags = kmem_flags_convert(flags);
-	int use_vmem = 0;
 	void *ptr;
 
 	/*
@@ -178,7 +244,7 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
 		 * impact performance so frequently manipulating the virtual
 		 * address space is strongly discouraged.
 		 */
-		if ((size > spl_kmem_alloc_max) || use_vmem) {
+		if (size > spl_kmem_alloc_max) {
 			if (flags & KM_VMEM) {
 				ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
 				    PAGE_KERNEL);
@@ -186,20 +252,22 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
 				return (NULL);
 			}
 		} else {
-			ptr = kmalloc_node(size, lflags, node);
+			if (flags & KM_VMEM) {
+				ptr = spl_kvmalloc(size, lflags);
+			} else {
+				ptr = kmalloc_node(size, lflags, node);
+			}
 		}
 
 		if (likely(ptr) || (flags & KM_NOSLEEP))
 			return (ptr);
 
 		/*
-		 * For vmem_alloc() and vmem_zalloc() callers retry immediately
-		 * using __vmalloc() which is unlikely to fail.
+		 * Try hard to satisfy the allocation. However, when progress
+		 * cannot be made, the allocation is allowed to fail.
 		 */
-		if ((flags & KM_VMEM) && (use_vmem == 0))  {
-			use_vmem = 1;
-			continue;
-		}
+		if ((lflags & GFP_KERNEL) == GFP_KERNEL)
+			lflags |= __GFP_RETRY_MAYFAIL;
 
 		/*
 		 * Use cond_resched() instead of congestion_wait() to avoid