2 files changed, 289 insertions, 193 deletions
diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index 17b3a2276..e90c6b8ce 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -87,10 +87,10 @@ kzalloc_nofail(size_t size, gfp_t flags)
 	return ptr;
 }
 
-#ifdef HAVE_KMALLOC_NODE
 static inline void *
 kmalloc_node_nofail(size_t size, gfp_t flags, int node)
 {
+#ifdef HAVE_KMALLOC_NODE
 	void *ptr;
 
 	do {
@@ -98,16 +98,63 @@ kmalloc_node_nofail(size_t size, gfp_t flags, int node)
 	} while (ptr == NULL && (flags & __GFP_WAIT));
 
 	return ptr;
-}
+#else
+	return kmalloc_nofail(size, flags);
 #endif /* HAVE_KMALLOC_NODE */
+}
+
+static inline void *
+vmalloc_nofail(size_t size, gfp_t flags)
+{
+	void *ptr;
+
+	/*
+	 * Retry failed __vmalloc() allocations once every second.  The
+	 * rational for the delay is that the likely failure modes are:
+	 *
+	 * 1) The system has completely exhausted memory, in which case
+	 *    delaying 1 second for the memory reclaim to run is reasonable
+	 *    to avoid thrashing the system.
+	 * 2) The system has memory but has exhausted the small virtual
+	 *    address space available on 32-bit systems.  Retrying the
+	 *    allocation immediately will only result in spinning on the
+	 *    virtual address space lock.  It is better delay a second and
+	 *    hope that another process will free some of the address space.
+	 *    But the bottom line is there is not much we can actually do
+	 *    since we can never safely return a failure and honor the
+	 *    Solaris semantics.
+	 */
+	while (1) {
+		ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+		if (unlikely((ptr == NULL) && (flags & __GFP_WAIT))) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(HZ);
+		} else {
+			break;
+		}
+	}
+
+	return ptr;
+}
+
+static inline void *
+vzalloc_nofail(size_t size, gfp_t flags)
+{
+	void *ptr;
+
+	ptr = vmalloc_nofail(size, flags);
+	if (ptr)
+		memset(ptr, 0, (size));
+
+	return ptr;
+}
 
 #ifdef DEBUG_KMEM
-# ifdef HAVE_ATOMIC64_T
 
-extern atomic64_t kmem_alloc_used;
-extern unsigned long long kmem_alloc_max;
-extern atomic64_t vmem_alloc_used;
-extern unsigned long long vmem_alloc_max;
+/*
+ * Memory accounting functions to be used only when DEBUG_KMEM is set.
+ */
+# ifdef HAVE_ATOMIC64_T
 
 # define kmem_alloc_used_add(size)      atomic64_add(size, &kmem_alloc_used)
 # define kmem_alloc_used_sub(size)      atomic64_sub(size, &kmem_alloc_used)
@@ -118,13 +165,13 @@ extern unsigned long long vmem_alloc_max;
 # define vmem_alloc_used_read()         atomic64_read(&vmem_alloc_used)
 # define vmem_alloc_used_set(size)      atomic64_set(&vmem_alloc_used, size)
 
-# else
-
-extern atomic_t kmem_alloc_used;
+extern atomic64_t kmem_alloc_used;
 extern unsigned long long kmem_alloc_max;
-extern atomic_t vmem_alloc_used;
+extern atomic64_t vmem_alloc_used;
 extern unsigned long long vmem_alloc_max;
 
+# else  /* HAVE_ATOMIC64_T */
+
 # define kmem_alloc_used_add(size)      atomic_add(size, &kmem_alloc_used)
 # define kmem_alloc_used_sub(size)      atomic_sub(size, &kmem_alloc_used)
 # define kmem_alloc_used_read()         atomic_read(&kmem_alloc_used)
@@ -134,90 +181,107 @@ extern unsigned long long vmem_alloc_max;
 # define vmem_alloc_used_read()         atomic_read(&vmem_alloc_used)
 # define vmem_alloc_used_set(size)      atomic_set(&vmem_alloc_used, size)
 
-# endif /* _LP64 */
-
-# define kmem_alloc(size, flags)             __kmem_alloc((size), (flags), 0, 0)
-# define kmem_zalloc(size, flags)            __kmem_alloc((size), ((flags) |  \
-                                                 __GFP_ZERO), 0, 0)
-
-/* The node alloc functions are only used by the SPL code itself */
-# ifdef HAVE_KMALLOC_NODE
-#  define kmem_alloc_node(size, flags, node) __kmem_alloc((size), (flags), 1, \
-                                                 node)
-# else
-#  define kmem_alloc_node(size, flags, node) __kmem_alloc((size), (flags), 0, 0)
-# endif
+extern atomic_t kmem_alloc_used;
+extern unsigned long long kmem_alloc_max;
+extern atomic_t vmem_alloc_used;
+extern unsigned long long vmem_alloc_max;
 
-# define vmem_zalloc(size, flags)            vmem_alloc((size), ((flags) |    \
-                                                 __GFP_ZERO))
+# endif /* HAVE_ATOMIC64_T */
 
 # ifdef DEBUG_KMEM_TRACKING
-
-extern void *kmem_alloc_track(size_t size, int flags, const char *func,
-    int line, int node_alloc, int node);
-extern void kmem_free_track(void *ptr, size_t size);
-extern void *vmem_alloc_track(size_t size, int flags, const char *func,
-    int line);
-extern void vmem_free_track(void *ptr, size_t size);
-
-#  define __kmem_alloc(size, flags, na, node) kmem_alloc_track((size),        \
-                                                  (flags), __FUNCTION__,      \
-                                                  __LINE__, (na), (node))
-#  define kmem_free(ptr, size)                kmem_free_track((ptr), (size))
-#  define vmem_alloc(size, flags)             vmem_alloc_track((size),        \
-                                                  (flags),__FUNCTION__,       \
-                                                  __LINE__)
-#  define vmem_free(ptr, size)                vmem_free_track((ptr), (size))
+/*
+ * DEBUG_KMEM && DEBUG_KMEM_TRACKING
+ *
+ * The maximum level of memory debugging.  All memory will be accounted
+ * for and each allocation will be explicitly tracked.  Any allocation
+ * which is leaked will be reported on module unload and the exact location
+ * where that memory was allocation will be reported.  This level of memory
+ * tracking will have a significant impact on performance and should only
+ * be enabled for debugging.  This feature may be enabled by passing
+ * --enable-debug-kmem-tracking to configure.
+ */
+#  define kmem_alloc(sz, fl)            kmem_alloc_track((sz), (fl),           \
+                                             __FUNCTION__, __LINE__, 0, 0)
+#  define kmem_zalloc(sz, fl)           kmem_alloc_track((sz), (fl)|__GFP_ZERO,\
+                                             __FUNCTION__, __LINE__, 0, 0)
+#  define kmem_alloc_node(sz, fl, nd)   kmem_alloc_track((sz), (fl),           \
+                                             __FUNCTION__, __LINE__, 1, nd)
+#  define kmem_free(ptr, sz)            kmem_free_track((ptr), (sz))
+
+#  define vmem_alloc(sz, fl)            vmem_alloc_track((sz), (fl),           \
+                                             __FUNCTION__, __LINE__)
+#  define vmem_zalloc(sz, fl)           vmem_alloc_track((sz), (fl)|__GFP_ZERO,\
+                                             __FUNCTION__, __LINE__)
+#  define vmem_free(ptr, sz)            vmem_free_track((ptr), (sz))
+
+extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
+extern void kmem_free_track(void *, size_t);
+extern void *vmem_alloc_track(size_t, int, const char *, int);
+extern void vmem_free_track(void *, size_t);
 
 # else /* DEBUG_KMEM_TRACKING */
-
-extern void *kmem_alloc_debug(size_t size, int flags, const char *func,
-    int line, int node_alloc, int node);
-extern void kmem_free_debug(void *ptr, size_t size);
-extern void *vmem_alloc_debug(size_t size, int flags, const char *func,
-    int line);
-extern void vmem_free_debug(void *ptr, size_t size);
-
-#  define __kmem_alloc(size, flags, na, node) kmem_alloc_debug((size),        \
-                                                  (flags), __FUNCTION__,      \
-                                                  __LINE__, (na), (node))
-#  define kmem_free(ptr, size)                kmem_free_debug((ptr), (size))
-#  define vmem_alloc(size, flags)             vmem_alloc_debug((size),        \
-                                                  (flags), __FUNCTION__,      \
-                                                  __LINE__)
-#  define vmem_free(ptr, size)                vmem_free_debug((ptr), (size))
+/*
+ * DEBUG_KMEM && !DEBUG_KMEM_TRACKING
+ *
+ * The default build will set DEBUG_KEM.  This provides basic memory
+ * accounting with little to no impact on performance.  When the module
+ * is unloaded in any memory was leaked the total number of leaked bytes
+ * will be reported on the console.  To disable this basic accounting
+ * pass the --disable-debug-kmem option to configure.
+ */
+#  define kmem_alloc(sz, fl)            kmem_alloc_debug((sz), (fl),           \
+                                             __FUNCTION__, __LINE__, 0, 0)
+#  define kmem_zalloc(sz, fl)           kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
+                                             __FUNCTION__, __LINE__, 0, 0)
+#  define kmem_alloc_node(sz, fl, nd)   kmem_alloc_debug((sz), (fl),           \
+                                             __FUNCTION__, __LINE__, 1, nd)
+#  define kmem_free(ptr, sz)            kmem_free_debug((ptr), (sz))
+
+#  define vmem_alloc(sz, fl)            vmem_alloc_debug((sz), (fl),           \
+                                             __FUNCTION__, __LINE__)
+#  define vmem_zalloc(sz, fl)           vmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
+                                             __FUNCTION__, __LINE__)
+#  define vmem_free(ptr, sz)            vmem_free_debug((ptr), (sz))
+
+extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int);
+extern void kmem_free_debug(void *, size_t);
+extern void *vmem_alloc_debug(size_t, int, const char *, int);
+extern void vmem_free_debug(void *, size_t);
 
 # endif /* DEBUG_KMEM_TRACKING */
-
 #else /* DEBUG_KMEM */
+/*
+ * !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
+ *
+ * All debugging is disabled.  There will be no overhead even for
+ * minimal memory accounting.  To enable basic accounting pass the
+ * --enable-debug-kmem option to configure.
+ */
+# define kmem_alloc(sz, fl)             kmalloc_nofail((sz), (fl))
+# define kmem_zalloc(sz, fl)            kzalloc_nofail((sz), (fl))
+# define kmem_alloc_node(sz, fl, nd)    kmalloc_node_nofail((sz), (fl), (nd))
+# define kmem_free(ptr, sz)             ((void)(sz), kfree(ptr))
 
-# define kmem_alloc(size, flags)              kmalloc_nofail((size), (flags))
-# define kmem_zalloc(size, flags)             kzalloc_nofail((size), (flags))
-# define kmem_free(ptr, size)                 ((void)(size), kfree(ptr))
-
-# ifdef HAVE_KMALLOC_NODE
-#  define kmem_alloc_node(size, flags, node)                                  \
-          kmalloc_node_nofail((size), (flags), (node))
-# else
-#  define kmem_alloc_node(size, flags, node)                                  \
-          kmalloc_nofail((size), (flags))
-# endif
-
-# define vmem_alloc(size, flags)              __vmalloc((size), ((flags) |    \
-                                                  __GFP_HIGHMEM), PAGE_KERNEL)
-# define vmem_zalloc(size, flags)                                             \
-({                                                                            \
-        void *_ptr_ = __vmalloc((size),((flags)|__GFP_HIGHMEM),PAGE_KERNEL);  \
-        if (_ptr_)                                                            \
-                memset(_ptr_, 0, (size));                                     \
-        _ptr_;                                                                \
-})
-# define vmem_free(ptr, size)           ((void)(size), vfree(ptr))
+# define vmem_alloc(sz, fl)             vmalloc_nofail((sz), (fl))
+# define vmem_zalloc(sz, fl)            vzalloc_nofail((sz), (fl))
+# define vmem_free(ptr, sz)             ((void)(sz), vfree(ptr))
 
 #endif /* DEBUG_KMEM */
 
+extern int kmem_debugging(void);
+extern char *kmem_vasprintf(const char *fmt, va_list ap);
+extern char *kmem_asprintf(const char *fmt, ...);
+extern char *strdup(const char *str);
+extern void strfree(char *str);
+
+
 /*
- * Slab allocation interfaces
+ * Slab allocation interfaces.  The SPL slab differs from the standard
+ * Linux SLAB or SLUB primarily in that each cache may be backed by slabs
+ * allocated from the physical or virtal memory address space.  The virtual
+ * slabs allow for good behavior when allocation large objects of identical
+ * size.  This slab implementation also supports both constructors and
+ * destructions which the Linux slab does not.
  */
 enum {
 	KMC_BIT_NOTOUCH		= 0,	/* Don't update ages */
@@ -246,12 +310,6 @@ enum {
 #define KMC_REAP_CHUNK			INT_MAX
 #define KMC_DEFAULT_SEEKS		1
 
-extern int kmem_debugging(void);
-extern char *kmem_vasprintf(const char *fmt, va_list ap);
-extern char *kmem_asprintf(const char *fmt, ...);
-#define strfree(str)		kfree(str)
-#define strdup(str)		kstrdup(str, GFP_KERNEL)
-
 extern struct list_head spl_kmem_cache_list;
 extern struct rw_semaphore spl_kmem_cache_sem;
 
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
index e575b1ee9..ec1ccb4ce 100644
--- a/module/spl/spl-kmem.c
+++ b/module/spl/spl-kmem.c
@@ -271,6 +271,34 @@ kmem_asprintf(const char *fmt, ...)
 }
 EXPORT_SYMBOL(kmem_asprintf);
 
+static char *
+__strdup(const char *str, int flags)
+{
+	char *ptr;
+	int n;
+
+	n = strlen(str);
+	ptr = kmalloc_nofail(n + 1, flags);
+	if (ptr)
+		memcpy(ptr, str, n + 1);
+
+	return ptr;
+}
+
+char *
+strdup(const char *str)
+{
+	return __strdup(str, KM_SLEEP);
+}
+EXPORT_SYMBOL(strdup);
+
+void
+strfree(char *str)
+{
+	kmem_free(str, strlen(str) + 1);
+}
+EXPORT_SYMBOL(strfree);
+
 /*
  * Memory allocation interfaces and debugging for basic kmem_*
  * and vmem_* style memory allocation.  When DEBUG_KMEM is enabled
@@ -285,12 +313,12 @@ atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
 unsigned long long kmem_alloc_max = 0;
 atomic64_t vmem_alloc_used = ATOMIC64_INIT(0);
 unsigned long long vmem_alloc_max = 0;
-# else
+# else  /* HAVE_ATOMIC64_T */
 atomic_t kmem_alloc_used = ATOMIC_INIT(0);
 unsigned long long kmem_alloc_max = 0;
 atomic_t vmem_alloc_used = ATOMIC_INIT(0);
 unsigned long long vmem_alloc_max = 0;
-# endif /* _LP64 */
+# endif /* HAVE_ATOMIC64_T */
 
 EXPORT_SYMBOL(kmem_alloc_used);
 EXPORT_SYMBOL(kmem_alloc_max);
@@ -340,77 +368,9 @@ EXPORT_SYMBOL(kmem_list);
 EXPORT_SYMBOL(vmem_lock);
 EXPORT_SYMBOL(vmem_table);
 EXPORT_SYMBOL(vmem_list);
-# endif
-#endif
-
-/*
- * Slab allocation interfaces
- *
- * While the Linux slab implementation was inspired by the Solaris
- * implemenation I cannot use it to emulate the Solaris APIs.  I
- * require two features which are not provided by the Linux slab.
- *
- * 1) Constructors AND destructors.  Recent versions of the Linux
- *    kernel have removed support for destructors.  This is a deal
- *    breaker for the SPL which contains particularly expensive
- *    initializers for mutex's, condition variables, etc.  We also
- *    require a minimal level of cleanup for these data types unlike
- *    many Linux data type which do need to be explicitly destroyed.
- *
- * 2) Virtual address space backed slab.  Callers of the Solaris slab
- *    expect it to work well for both small are very large allocations.
- *    Because of memory fragmentation the Linux slab which is backed
- *    by kmalloc'ed memory performs very badly when confronted with
- *    large numbers of large allocations.  Basing the slab on the
- *    virtual address space removes the need for contigeous pages
- *    and greatly improve performance for large allocations.
- *
- * For these reasons, the SPL has its own slab implementation with
- * the needed features.  It is not as highly optimized as either the
- * Solaris or Linux slabs, but it should get me most of what is
- * needed until it can be optimized or obsoleted by another approach.
- *
- * One serious concern I do have about this method is the relatively
- * small virtual address space on 32bit arches.  This will seriously
- * constrain the size of the slab caches and their performance.
- *
- * XXX: Improve the partial slab list by carefully maintaining a
- *      strict ordering of fullest to emptiest slabs based on
- *      the slab reference count.  This gaurentees the when freeing
- *      slabs back to the system we need only linearly traverse the
- *      last N slabs in the list to discover all the freeable slabs.
- *
- * XXX: NUMA awareness for optionally allocating memory close to a
- *      particular core.  This can be adventageous if you know the slab
- *      object will be short lived and primarily accessed from one core.
- *
- * XXX: Slab coloring may also yield performance improvements and would
- *      be desirable to implement.
- */
-
-struct list_head spl_kmem_cache_list;   /* List of caches */
-struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
-
-static int spl_cache_flush(spl_kmem_cache_t *skc,
-                           spl_kmem_magazine_t *skm, int flush);
-
-#ifdef HAVE_SET_SHRINKER
-static struct shrinker *spl_kmem_cache_shrinker;
-#else
-static int spl_kmem_cache_generic_shrinker(int nr_to_scan,
-                                           unsigned int gfp_mask);
-static struct shrinker spl_kmem_cache_shrinker = {
-	.shrink = spl_kmem_cache_generic_shrinker,
-	.seeks = KMC_DEFAULT_SEEKS,
-};
-#endif
-
-#ifdef DEBUG_KMEM
-# ifdef DEBUG_KMEM_TRACKING
 
 static kmem_debug_t *
-kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits,
-                void *addr)
+kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, void *addr)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
@@ -444,17 +404,20 @@ kmem_alloc_track(size_t size, int flags, const char *func, int line,
 	unsigned long irq_flags;
 	SENTRY;
 
+	/* Function may be called with KM_NOSLEEP so failure is possible */
 	dptr = (kmem_debug_t *) kmalloc_nofail(sizeof(kmem_debug_t),
 	    flags & ~__GFP_ZERO);
 
-	if (dptr == NULL) {
+	if (unlikely(dptr == NULL)) {
 		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "debug "
 		    "kmem_alloc(%ld, 0x%x) at %s:%d failed (%lld/%llu)\n",
 		    sizeof(kmem_debug_t), flags, func, line,
 		    kmem_alloc_used_read(), kmem_alloc_max);
 	} else {
-		/* Marked unlikely because we should never be doing this,
-		 * we tolerate to up 2 pages but a single page is best.   */
+		/*
+		 * Marked unlikely because we should never be doing this,
+		 * we tolerate to up 2 pages but a single page is best.
+		 */
 		if (unlikely((size > PAGE_SIZE*2) && !(flags & KM_NODEBUG))) {
 			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "large "
 			    "kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",
@@ -463,14 +426,17 @@ kmem_alloc_track(size_t size, int flags, const char *func, int line,
 			spl_debug_dumpstack(NULL);
 		}
 
-		/* We use kstrdup() below because the string pointed to by
+		/*
+		 *  We use __strdup() below because the string pointed to by
 		 * __FUNCTION__ might not be available by the time we want
-		 * to print it since the module might have been unloaded. */
-		dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO);
+		 * to print it since the module might have been unloaded.
+		 * This can only fail in the KM_NOSLEEP case.
+		 */
+		dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO);
 		if (unlikely(dptr->kd_func == NULL)) {
 			kfree(dptr);
 			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
-			    "debug kstrdup() at %s:%d failed (%lld/%llu)\n",
+			    "debug __strdup() at %s:%d failed (%lld/%llu)\n",
 			    func, line, kmem_alloc_used_read(), kmem_alloc_max);
 			goto out;
 		}
@@ -533,7 +499,8 @@ kmem_free_track(void *ptr, size_t size)
 
 	dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
 
-	ASSERT(dptr); /* Must exist in hash due to kmem_alloc() */
+	/* Must exist in hash due to kmem_alloc() */
+	ASSERT(dptr);
 
 	/* Size must match */
 	ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
@@ -567,28 +534,37 @@ vmem_alloc_track(size_t size, int flags, const char *func, int line)
 
 	ASSERT(flags & KM_SLEEP);
 
+	/* Function may be called with KM_NOSLEEP so failure is possible */
 	dptr = (kmem_debug_t *) kmalloc_nofail(sizeof(kmem_debug_t),
 	    flags & ~__GFP_ZERO);
-	if (dptr == NULL) {
+	if (unlikely(dptr == NULL)) {
 		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "debug "
 		    "vmem_alloc(%ld, 0x%x) at %s:%d failed (%lld/%llu)\n",
 		    sizeof(kmem_debug_t), flags, func, line,
 		    vmem_alloc_used_read(), vmem_alloc_max);
 	} else {
-		/* We use kstrdup() below because the string pointed to by
+		/*
+		 * We use __strdup() below because the string pointed to by
 		 * __FUNCTION__ might not be available by the time we want
-		 * to print it, since the module might have been unloaded. */
-		dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO);
+		 * to print it, since the module might have been unloaded.
+		 * This can never fail because we have already asserted
+		 * that flags is KM_SLEEP.
+		 */
+		dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO);
 		if (unlikely(dptr->kd_func == NULL)) {
 			kfree(dptr);
 			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
-			    "debug kstrdup() at %s:%d failed (%lld/%llu)\n",
+			    "debug __strdup() at %s:%d failed (%lld/%llu)\n",
 			    func, line, vmem_alloc_used_read(), vmem_alloc_max);
 			goto out;
 		}
 
-		ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO,
-		    PAGE_KERNEL);
+		/* Use the correct allocator */
+		if (flags & __GFP_ZERO) {
+			ptr = vzalloc_nofail(size, flags & ~__GFP_ZERO);
+		} else {
+			ptr = vmalloc_nofail(size, flags);
+		}
 
 		if (unlikely(ptr == NULL)) {
 			kfree(dptr->kd_func);
@@ -600,9 +576,6 @@ vmem_alloc_track(size_t size, int flags, const char *func, int line)
 			goto out;
 		}
 
-		if (flags & __GFP_ZERO)
-			memset(ptr, 0, size);
-
 		vmem_alloc_used_add(size);
 		if (unlikely(vmem_alloc_used_read() > vmem_alloc_max))
 			vmem_alloc_max = vmem_alloc_used_read();
@@ -640,7 +613,9 @@ vmem_free_track(void *ptr, size_t size)
 	    (unsigned long long) size);
 
 	dptr = kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr);
-	ASSERT(dptr); /* Must exist in hash due to vmem_alloc() */
+
+	/* Must exist in hash due to vmem_alloc() */
+	ASSERT(dptr);
 
 	/* Size must match */
 	ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
@@ -673,11 +648,13 @@ kmem_alloc_debug(size_t size, int flags, const char *func, int line,
 	void *ptr;
 	SENTRY;
 
-	/* Marked unlikely because we should never be doing this,
-	 * we tolerate to up 2 pages but a single page is best.   */
+	/*
+	 * Marked unlikely because we should never be doing this,
+	 * we tolerate to up 2 pages but a single page is best.
+	 */
 	if (unlikely((size > PAGE_SIZE * 2) && !(flags & KM_NODEBUG))) {
 		SDEBUG(SD_CONSOLE | SD_WARNING,
-		    "Large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",
+		    "large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",
 		    (unsigned long long) size, flags, func, line,
 		    kmem_alloc_used_read(), kmem_alloc_max);
 		spl_debug_dumpstack(NULL);
@@ -693,7 +670,7 @@ kmem_alloc_debug(size_t size, int flags, const char *func, int line,
 		ptr = kmalloc_nofail(size, flags);
 	}
 
-	if (ptr == NULL) {
+	if (unlikely(ptr == NULL)) {
 		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
 		    "kmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
 		    (unsigned long long) size, flags, func, line,
@@ -706,8 +683,9 @@ kmem_alloc_debug(size_t size, int flags, const char *func, int line,
 		SDEBUG_LIMIT(SD_INFO,
 		    "kmem_alloc(%llu, 0x%x) at %s:%d = %p (%lld/%llu)\n",
 		    (unsigned long long) size, flags, func, line, ptr,
-		       kmem_alloc_used_read(), kmem_alloc_max);
+		    kmem_alloc_used_read(), kmem_alloc_max);
 	}
+
 	SRETURN(ptr);
 }
 EXPORT_SYMBOL(kmem_alloc_debug);
@@ -724,8 +702,6 @@ kmem_free_debug(void *ptr, size_t size)
 	SDEBUG_LIMIT(SD_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr,
 	    (unsigned long long) size, kmem_alloc_used_read(),
 	    kmem_alloc_max);
-
-	memset(ptr, 0x5a, size);
 	kfree(ptr);
 
 	SEXIT;
@@ -740,17 +716,19 @@ vmem_alloc_debug(size_t size, int flags, const char *func, int line)
 
 	ASSERT(flags & KM_SLEEP);
 
-	ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO,
-	    PAGE_KERNEL);
-	if (ptr == NULL) {
+	/* Use the correct allocator */
+	if (flags & __GFP_ZERO) {
+		ptr = vzalloc_nofail(size, flags & (~__GFP_ZERO));
+	} else {
+		ptr = vmalloc_nofail(size, flags);
+	}
+
+	if (unlikely(ptr == NULL)) {
 		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
 		    "vmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
 		    (unsigned long long) size, flags, func, line,
 		    vmem_alloc_used_read(), vmem_alloc_max);
 	} else {
-		if (flags & __GFP_ZERO)
-			memset(ptr, 0, size);
-
 		vmem_alloc_used_add(size);
 		if (unlikely(vmem_alloc_used_read() > vmem_alloc_max))
 			vmem_alloc_max = vmem_alloc_used_read();
@@ -776,8 +754,6 @@ vmem_free_debug(void *ptr, size_t size)
 	SDEBUG_LIMIT(SD_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr,
 	    (unsigned long long) size, vmem_alloc_used_read(),
 	    vmem_alloc_max);
-
-	memset(ptr, 0x5a, size);
 	vfree(ptr);
 
 	SEXIT;
@@ -787,6 +763,68 @@ EXPORT_SYMBOL(vmem_free_debug);
 # endif /* DEBUG_KMEM_TRACKING */
 #endif /* DEBUG_KMEM */
 
+/*
+ * Slab allocation interfaces
+ *
+ * While the Linux slab implementation was inspired by the Solaris
+ * implemenation I cannot use it to emulate the Solaris APIs.  I
+ * require two features which are not provided by the Linux slab.
+ *
+ * 1) Constructors AND destructors.  Recent versions of the Linux
+ *    kernel have removed support for destructors.  This is a deal
+ *    breaker for the SPL which contains particularly expensive
+ *    initializers for mutex's, condition variables, etc.  We also
+ *    require a minimal level of cleanup for these data types unlike
+ *    many Linux data type which do need to be explicitly destroyed.
+ *
+ * 2) Virtual address space backed slab.  Callers of the Solaris slab
+ *    expect it to work well for both small are very large allocations.
+ *    Because of memory fragmentation the Linux slab which is backed
+ *    by kmalloc'ed memory performs very badly when confronted with
+ *    large numbers of large allocations.  Basing the slab on the
+ *    virtual address space removes the need for contigeous pages
+ *    and greatly improve performance for large allocations.
+ *
+ * For these reasons, the SPL has its own slab implementation with
+ * the needed features.  It is not as highly optimized as either the
+ * Solaris or Linux slabs, but it should get me most of what is
+ * needed until it can be optimized or obsoleted by another approach.
+ *
+ * One serious concern I do have about this method is the relatively
+ * small virtual address space on 32bit arches.  This will seriously
+ * constrain the size of the slab caches and their performance.
+ *
+ * XXX: Improve the partial slab list by carefully maintaining a
+ *      strict ordering of fullest to emptiest slabs based on
+ *      the slab reference count.  This gaurentees the when freeing
+ *      slabs back to the system we need only linearly traverse the
+ *      last N slabs in the list to discover all the freeable slabs.
+ *
+ * XXX: NUMA awareness for optionally allocating memory close to a
+ *      particular core.  This can be adventageous if you know the slab
+ *      object will be short lived and primarily accessed from one core.
+ *
+ * XXX: Slab coloring may also yield performance improvements and would
+ *      be desirable to implement.
+ */
+
+struct list_head spl_kmem_cache_list;   /* List of caches */
+struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
+
+static int spl_cache_flush(spl_kmem_cache_t *skc,
+                           spl_kmem_magazine_t *skm, int flush);
+
+#ifdef HAVE_SET_SHRINKER
+static struct shrinker *spl_kmem_cache_shrinker;
+#else
+static int spl_kmem_cache_generic_shrinker(int nr_to_scan,
+                                           unsigned int gfp_mask);
+static struct shrinker spl_kmem_cache_shrinker = {
+	.shrink = spl_kmem_cache_generic_shrinker,
+	.seeks = KMC_DEFAULT_SEEKS,
+};
+#endif
+
 static void *
 kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
 {