summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config/spl-build.m438
-rw-r--r--include/sys/kmem.h27
-rw-r--r--module/spl/spl-kmem.c174
-rw-r--r--module/spl/spl-proc.c6
-rw-r--r--module/splat/splat-kmem.c33
5 files changed, 241 insertions, 37 deletions
diff --git a/config/spl-build.m4 b/config/spl-build.m4
index 08b84efe9..eef52334f 100644
--- a/config/spl-build.m4
+++ b/config/spl-build.m4
@@ -93,6 +93,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
SPL_AC_SCHED_RT_HEADER
SPL_AC_2ARGS_VFS_GETATTR
SPL_AC_USLEEP_RANGE
+ SPL_AC_KMEM_CACHE_ALLOCFLAGS
])
AC_DEFUN([SPL_AC_MODULE_SYMVERS], [
@@ -2532,3 +2533,40 @@ AC_DEFUN([SPL_AC_USLEEP_RANGE], [
AC_MSG_RESULT(no)
])
])
+
+dnl #
+dnl # 2.6.35 API change,
+dnl # The cachep->gfpflags member was renamed cachep->allocflags. These are
+dnl # private allocation flags which are applied when allocating a new slab
+dnl # in kmem_getpages(). Unfortunately there is no public API for setting
+dnl # non-default flags.
+dnl #
+AC_DEFUN([SPL_AC_KMEM_CACHE_ALLOCFLAGS], [
+ AC_MSG_CHECKING([whether struct kmem_cache has allocflags])
+ SPL_LINUX_TRY_COMPILE([
+ #include <linux/slab.h>
+ ],[
+ struct kmem_cache cachep __attribute__ ((unused));
+ cachep.allocflags = GFP_KERNEL;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_KMEM_CACHE_ALLOCFLAGS, 1,
+ [struct kmem_cache has allocflags])
+ ],[
+ AC_MSG_RESULT(no)
+
+ AC_MSG_CHECKING([whether struct kmem_cache has gfpflags])
+ SPL_LINUX_TRY_COMPILE([
+ #include <linux/slab.h>
+ ],[
+ struct kmem_cache cachep __attribute__ ((unused));
+ cachep.gfpflags = GFP_KERNEL;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_KMEM_CACHE_GFPFLAGS, 1,
+ [struct kmem_cache has gfpflags])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ ])
+])
diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index 516114fd7..18533fe39 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -340,8 +340,9 @@ enum {
KMC_BIT_QCACHE = 4, /* XXX: Unsupported */
KMC_BIT_KMEM = 5, /* Use kmem cache */
KMC_BIT_VMEM = 6, /* Use vmem cache */
- KMC_BIT_OFFSLAB = 7, /* Objects not on slab */
- KMC_BIT_NOEMERGENCY = 8, /* Disable emergency objects */
+ KMC_BIT_SLAB = 7, /* Use Linux slab cache */
+ KMC_BIT_OFFSLAB = 8, /* Objects not on slab */
+ KMC_BIT_NOEMERGENCY = 9, /* Disable emergency objects */
KMC_BIT_DEADLOCKED = 14, /* Deadlock detected */
KMC_BIT_GROWING = 15, /* Growing in progress */
KMC_BIT_REAPING = 16, /* Reaping in progress */
@@ -367,6 +368,7 @@ typedef enum kmem_cbrc {
#define KMC_QCACHE (1 << KMC_BIT_QCACHE)
#define KMC_KMEM (1 << KMC_BIT_KMEM)
#define KMC_VMEM (1 << KMC_BIT_VMEM)
+#define KMC_SLAB (1 << KMC_BIT_SLAB)
#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB)
#define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY)
#define KMC_DEADLOCKED (1 << KMC_BIT_DEADLOCKED)
@@ -456,6 +458,7 @@ typedef struct spl_kmem_cache {
spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
void *skc_private; /* Private data */
void *skc_vmp; /* Unused */
+ struct kmem_cache *skc_linux_cache; /* Linux slab cache if used */
unsigned long skc_flags; /* Flags */
uint32_t skc_obj_size; /* Object size */
uint32_t skc_obj_align; /* Object alignment */
@@ -513,4 +516,24 @@ void spl_kmem_fini(void);
#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
((ptr) < (void *)VMALLOC_END))
+/*
+ * Allow custom slab allocation flags to be set for KMC_SLAB based caches.
+ * One use for this function is to ensure the __GFP_COMP flag is part of
+ * the default allocation mask which ensures higher order allocations are
+ * properly refcounted. This flag was added to the default ->allocflags
+ * as of Linux 3.11.
+ */
+static inline void
+kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags)
+{
+ if (skc->skc_linux_cache == NULL)
+ return;
+
+#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
+ skc->skc_linux_cache->allocflags |= flags;
+#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
+ skc->skc_linux_cache->gfpflags |= flags;
+#endif
+}
+
#endif /* _SPL_KMEM_H */
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
index b673c2915..fc04604d5 100644
--- a/module/spl/spl-kmem.c
+++ b/module/spl/spl-kmem.c
@@ -34,6 +34,16 @@
#define SS_DEBUG_SUBSYS SS_KMEM
/*
+ * Within the scope of spl-kmem.c file the kmem_cache_* definitions
+ * are removed to allow access to the real Linux slab allocator.
+ */
+#undef kmem_cache_destroy
+#undef kmem_cache_create
+#undef kmem_cache_alloc
+#undef kmem_cache_free
+
+
+/*
* Cache expiration was implemented because it was part of the default Solaris
* kmem_cache behavior. The idea is that per-cpu objects which haven't been
* accessed in several seconds should be returned to the cache. On the other
@@ -60,6 +70,16 @@ unsigned int spl_kmem_cache_max_size = 32;
module_param(spl_kmem_cache_max_size, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
+unsigned int spl_kmem_cache_slab_limit = 0;
+module_param(spl_kmem_cache_slab_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
+ "Objects less than N bytes use the Linux slab");
+
+unsigned int spl_kmem_cache_kmem_limit = (PAGE_SIZE / 4);
+module_param(spl_kmem_cache_kmem_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
+ "Objects less than N bytes use the kmalloc");
+
/*
* The minimum amount of memory measured in pages to be free at all
* times on the system. This is similar to Linux's zone->pages_min
@@ -1348,7 +1368,10 @@ spl_cache_age(void *data)
return;
atomic_inc(&skc->skc_ref);
- spl_on_each_cpu(spl_magazine_age, skc, 1);
+
+ if (!(skc->skc_flags & KMC_NOMAGAZINE))
+ spl_on_each_cpu(spl_magazine_age, skc, 1);
+
spl_slab_reclaim(skc, skc->skc_reap, 0);
while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
@@ -1493,6 +1516,9 @@ spl_magazine_create(spl_kmem_cache_t *skc)
int i;
SENTRY;
+ if (skc->skc_flags & KMC_NOMAGAZINE)
+ SRETURN(0);
+
skc->skc_mag_size = spl_magazine_size(skc);
skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
@@ -1519,6 +1545,11 @@ spl_magazine_destroy(spl_kmem_cache_t *skc)
int i;
SENTRY;
+ if (skc->skc_flags & KMC_NOMAGAZINE) {
+ SEXIT;
+ return;
+ }
+
for_each_online_cpu(i) {
skm = skc->skc_mag[i];
spl_cache_flush(skc, skm, skm->skm_avail);
@@ -1541,11 +1572,12 @@ spl_magazine_destroy(spl_kmem_cache_t *skc)
* flags
* KMC_NOTOUCH Disable cache object aging (unsupported)
* KMC_NODEBUG Disable debugging (unsupported)
- * KMC_NOMAGAZINE Disable magazine (unsupported)
* KMC_NOHASH Disable hashing (unsupported)
* KMC_QCACHE Disable qcache (unsupported)
+ * KMC_NOMAGAZINE Enabled for kmem/vmem, Disabled for Linux slab
* KMC_KMEM Force kmem backed cache
* KMC_VMEM Force vmem backed cache
+ * KMC_SLAB Force Linux slab backed cache
* KMC_OFFSLAB Locate objects off the slab
*/
spl_kmem_cache_t *
@@ -1591,6 +1623,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_reclaim = reclaim;
skc->skc_private = priv;
skc->skc_vmp = vmp;
+ skc->skc_linux_cache = NULL;
skc->skc_flags = flags;
skc->skc_obj_size = size;
skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
@@ -1617,28 +1650,69 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_emergency = 0;
skc->skc_obj_emergency_max = 0;
+ /*
+ * Verify the requested alignment restriction is sane.
+ */
if (align) {
VERIFY(ISP2(align));
- VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN); /* Min alignment */
- VERIFY3U(align, <=, PAGE_SIZE); /* Max alignment */
+ VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
+ VERIFY3U(align, <=, PAGE_SIZE);
skc->skc_obj_align = align;
}
- /* If none passed select a cache type based on object size */
- if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
- if (spl_obj_size(skc) < (PAGE_SIZE / 8))
+ /*
+ * When no specific type of slab is requested (kmem, vmem, or
+ * linuxslab) then select a cache type based on the object size
+ * and default tunables.
+ */
+ if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {
+
+ /*
+ * Objects smaller than spl_kmem_cache_slab_limit can
+ * use the Linux slab for better space-efficiency. By
+ * default this functionality is disabled until its
+ * performance characters are fully understood.
+ */
+ if (spl_kmem_cache_slab_limit &&
+ size <= (size_t)spl_kmem_cache_slab_limit)
+ skc->skc_flags |= KMC_SLAB;
+
+ /*
+ * Small objects, less than spl_kmem_cache_kmem_limit per
+ * object should use kmem because their slabs are small.
+ */
+ else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit)
skc->skc_flags |= KMC_KMEM;
+
+ /*
+ * All other objects are considered large and are placed
+ * on vmem backed slabs.
+ */
else
skc->skc_flags |= KMC_VMEM;
}
- rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size);
- if (rc)
- SGOTO(out, rc);
+ /*
+ * Given the type of slab allocate the required resources.
+ */
+ if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+ rc = spl_slab_size(skc,
+ &skc->skc_slab_objs, &skc->skc_slab_size);
+ if (rc)
+ SGOTO(out, rc);
+
+ rc = spl_magazine_create(skc);
+ if (rc)
+ SGOTO(out, rc);
+ } else {
+ skc->skc_linux_cache = kmem_cache_create(
+ skc->skc_name, size, align, 0, NULL);
+ if (skc->skc_linux_cache == NULL)
+ SGOTO(out, rc = ENOMEM);
- rc = spl_magazine_create(skc);
- if (rc)
- SGOTO(out, rc);
+ kmem_cache_set_allocflags(skc, __GFP_COMP);
+ skc->skc_flags |= KMC_NOMAGAZINE;
+ }
if (spl_kmem_cache_expire & KMC_EXPIRE_AGE)
skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
@@ -1680,6 +1754,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
SENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB));
down_write(&spl_kmem_cache_sem);
list_del_init(&skc->skc_list);
@@ -1699,8 +1774,14 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
* cache reaping action which races with this destroy. */
wait_event(wq, atomic_read(&skc->skc_ref) == 0);
- spl_magazine_destroy(skc);
- spl_slab_reclaim(skc, 0, 1);
+ if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+ spl_magazine_destroy(skc);
+ spl_slab_reclaim(skc, 0, 1);
+ } else {
+ ASSERT(skc->skc_flags & KMC_SLAB);
+ kmem_cache_destroy(skc->skc_linux_cache);
+ }
+
spin_lock(&skc->skc_lock);
/* Validate there are no objects in use and free all the
@@ -1806,7 +1887,9 @@ spl_cache_reclaim_wait(void *word)
}
/*
- * No available objects on any slabs, create a new slab.
+ * No available objects on any slabs, create a new slab. Note that this
+ * functionality is disabled for KMC_SLAB caches which are backed by the
+ * Linux slab.
*/
static int
spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
@@ -1815,6 +1898,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
SENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT((skc->skc_flags & KMC_SLAB) == 0);
might_sleep();
*obj = NULL;
@@ -2016,7 +2100,28 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
ASSERT(flags & KM_SLEEP);
+
atomic_inc(&skc->skc_ref);
+
+ /*
+ * Allocate directly from a Linux slab. All optimizations are left
+ * to the underlying cache we only need to guarantee that KM_SLEEP
+ * callers will never fail.
+ */
+ if (skc->skc_flags & KMC_SLAB) {
+ struct kmem_cache *slc = skc->skc_linux_cache;
+
+ do {
+ obj = kmem_cache_alloc(slc, flags | __GFP_COMP);
+ if (obj && skc->skc_ctor)
+ skc->skc_ctor(obj, skc->skc_private, flags);
+
+ } while ((obj == NULL) && !(flags & KM_NOSLEEP));
+
+ atomic_dec(&skc->skc_ref);
+ SRETURN(obj);
+ }
+
local_irq_disable();
restart:
@@ -2069,6 +2174,17 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
atomic_inc(&skc->skc_ref);
/*
+ * Free the object from the Linux underlying Linux slab.
+ */
+ if (skc->skc_flags & KMC_SLAB) {
+ if (skc->skc_dtor)
+ skc->skc_dtor(obj, skc->skc_private);
+
+ kmem_cache_free(skc->skc_linux_cache, obj);
+ goto out;
+ }
+
+ /*
* Only virtual slabs may have emergency objects and these objects
* are guaranteed to have physical addresses. They must be removed
* from the tree of emergency objects and the freed.
@@ -2166,13 +2282,27 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- /* Prevent concurrent cache reaping when contended */
- if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
- SEXIT;
- return;
+ atomic_inc(&skc->skc_ref);
+
+ /*
+ * Execute the registered reclaim callback if it exists. The
+ * per-cpu caches will be drained when is set KMC_EXPIRE_MEM.
+ */
+ if (skc->skc_flags & KMC_SLAB) {
+ if (skc->skc_reclaim)
+ skc->skc_reclaim(skc->skc_private);
+
+ if (spl_kmem_cache_expire & KMC_EXPIRE_MEM)
+ kmem_cache_shrink(skc->skc_linux_cache);
+
+ SGOTO(out, 0);
}
- atomic_inc(&skc->skc_ref);
+ /*
+ * Prevent concurrent cache reaping when contended.
+ */
+ if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
+ SGOTO(out, 0);
/*
* When a reclaim function is available it may be invoked repeatedly
@@ -2222,7 +2352,7 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
clear_bit(KMC_BIT_REAPING, &skc->skc_flags);
smp_mb__after_clear_bit();
wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
-
+out:
atomic_dec(&skc->skc_ref);
SEXIT;
diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c
index 2bbc8d790..b4be84fef 100644
--- a/module/spl/spl-proc.c
+++ b/module/spl/spl-proc.c
@@ -646,6 +646,12 @@ slab_seq_show(struct seq_file *f, void *p)
ASSERT(skc->skc_magic == SKC_MAGIC);
+ /*
+ * Backed by Linux slab see /proc/slabinfo.
+ */
+ if (skc->skc_flags & KMC_SLAB)
+ return (0);
+
spin_lock(&skc->skc_lock);
seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c
index 25a52b43d..4d060c138 100644
--- a/module/splat/splat-kmem.c
+++ b/module/splat/splat-kmem.c
@@ -394,18 +394,25 @@ splat_kmem_cache_test_debug(struct file *file, char *name,
{
int j;
- splat_vprint(file, name,
- "%s cache objects %d, slabs %u/%u objs %u/%u mags ",
- kcp->kcp_cache->skc_name, kcp->kcp_count,
+ splat_vprint(file, name, "%s cache objects %d",
+ kcp->kcp_cache->skc_name, kcp->kcp_count);
+
+ if (kcp->kcp_cache->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+ splat_vprint(file, name, ", slabs %u/%u objs %u/%u",
(unsigned)kcp->kcp_cache->skc_slab_alloc,
(unsigned)kcp->kcp_cache->skc_slab_total,
(unsigned)kcp->kcp_cache->skc_obj_alloc,
(unsigned)kcp->kcp_cache->skc_obj_total);
- for_each_online_cpu(j)
- splat_print(file, "%u/%u ",
- kcp->kcp_cache->skc_mag[j]->skm_avail,
- kcp->kcp_cache->skc_mag[j]->skm_size);
+ if (!(kcp->kcp_cache->skc_flags & KMC_NOMAGAZINE)) {
+ splat_vprint(file, name, "%s", "mags");
+
+ for_each_online_cpu(j)
+ splat_print(file, "%u/%u ",
+ kcp->kcp_cache->skc_mag[j]->skm_avail,
+ kcp->kcp_cache->skc_mag[j]->skm_size);
+ }
+ }
splat_print(file, "%s\n", "");
}
@@ -900,14 +907,14 @@ splat_kmem_test8(struct file *file, void *arg)
kmem_cache_reap_now(kcp->kcp_cache);
splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST8_NAME, kcp);
- if (kcp->kcp_cache->skc_obj_total == 0)
+ if (kcp->kcp_count == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ / 10);
}
- if (kcp->kcp_cache->skc_obj_total == 0) {
+ if (kcp->kcp_count == 0) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
@@ -915,7 +922,7 @@ splat_kmem_test8(struct file *file, void *arg)
} else {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
- (unsigned)kcp->kcp_cache->skc_obj_total,
+ (unsigned)kcp->kcp_count,
SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}
@@ -995,14 +1002,14 @@ splat_kmem_test9(struct file *file, void *arg)
for (i = 0; i < 60; i++) {
splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST9_NAME, kcp);
- if (kcp->kcp_cache->skc_obj_total == 0)
+ if (kcp->kcp_count == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
}
- if (kcp->kcp_cache->skc_obj_total == 0) {
+ if (kcp->kcp_count == 0) {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
@@ -1010,7 +1017,7 @@ splat_kmem_test9(struct file *file, void *arg)
} else {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
- (unsigned)kcp->kcp_cache->skc_obj_total, count,
+ (unsigned)kcp->kcp_count, count,
SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}