aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2009-01-30 20:54:49 -0800
committerBrian Behlendorf <[email protected]>2009-01-30 20:54:49 -0800
commitea3e6ca9e595ebfba82b964ee2eaf1ddd7076f0f (patch)
tree7480b87145297f3882ffe18234280512e136cdb4
parent34e71c9e97f4d0d2b3ede850d016a7de558b0f3c (diff)
kmem_cache hardening and performance improvements
- Added slab work queue task which gradually ages and free's slabs from the cache which have not been used recently. - Optimized slab packing algorithm to ensure each slab contains the maximum number of objects without create to large a slab. - Fix deadlock, we can never call kv_free() under the skc_lock. We now unlink the objects and slabs from the cache itself and attach them to a private work list. The contents of the list are then subsequently freed outside the spin lock. - Move magazine create/destroy operation on to local cpu. - Further performace optimizations by minimize the usage of the large per-cache skc_lock. This includes the addition of KMC_BIT_REAPING bit mask which is used to prevent concurrent reaping, and to defer new slab creation when reaping is occuring. - Add KMC_BIT_DESTROYING bit mask which is set when the cache is being destroyed, this is used to catch any task accessing the cache while it is being destroyed. - Add comments to all the functions and additional comments to try and make everything as clear as possible. - Major cleanup and additions to the SPLAT kmem tests to more rigerously stress the cache implementation and look for any problems. This includes correctness and performance tests. - Updated portable work queue interfaces
-rw-r--r--include/sys/kmem.h73
-rw-r--r--include/sys/sysmacros.h12
-rw-r--r--include/sys/vmsystm.h3
-rw-r--r--module/spl/spl-kmem.c465
-rw-r--r--module/splat/splat-internal.h1
-rw-r--r--module/splat/splat-kmem.c967
6 files changed, 1025 insertions, 496 deletions
diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index ef5876312..4f939e0fc 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -45,6 +45,7 @@ extern "C" {
#include <asm/atomic_compat.h>
#include <sys/types.h>
#include <sys/debug.h>
+#include <sys/workqueue.h>
/*
* Memory allocation interfaces
@@ -161,17 +162,32 @@ kmem_alloc_tryhard(size_t size, size_t *alloc_size, int kmflags)
/*
* Slab allocation interfaces
*/
-#define KMC_NOTOUCH 0x00000001
-#define KMC_NODEBUG 0x00000002 /* Default behavior */
-#define KMC_NOMAGAZINE 0x00000004 /* XXX: No disable support available */
-#define KMC_NOHASH 0x00000008 /* XXX: No hash available */
-#define KMC_QCACHE 0x00000010 /* XXX: Unsupported */
-#define KMC_KMEM 0x00000100 /* Use kmem cache */
-#define KMC_VMEM 0x00000200 /* Use vmem cache */
-#define KMC_OFFSLAB 0x00000400 /* Objects not on slab */
-
-#define KMC_REAP_CHUNK 256
-#define KMC_DEFAULT_SEEKS DEFAULT_SEEKS
+enum {
+ KMC_BIT_NOTOUCH = 0, /* Don't update ages */
+ KMC_BIT_NODEBUG = 1, /* Default behavior */
+ KMC_BIT_NOMAGAZINE = 2, /* XXX: Unsupported */
+ KMC_BIT_NOHASH = 3, /* XXX: Unsupported */
+ KMC_BIT_QCACHE = 4, /* XXX: Unsupported */
+ KMC_BIT_KMEM = 5, /* Use kmem cache */
+ KMC_BIT_VMEM = 6, /* Use vmem cache */
+ KMC_BIT_OFFSLAB = 7, /* Objects not on slab */
+ KMC_BIT_REAPING = 16, /* Reaping in progress */
+ KMC_BIT_DESTROY = 17, /* Destroy in progress */
+};
+
+#define KMC_NOTOUCH (1 << KMC_BIT_NOTOUCH)
+#define KMC_NODEBUG (1 << KMC_BIT_NODEBUG)
+#define KMC_NOMAGAZINE (1 << KMC_BIT_NOMAGAZINE)
+#define KMC_NOHASH (1 << KMC_BIT_NOHASH)
+#define KMC_QCACHE (1 << KMC_BIT_QCACHE)
+#define KMC_KMEM (1 << KMC_BIT_KMEM)
+#define KMC_VMEM (1 << KMC_BIT_VMEM)
+#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB)
+#define KMC_REAPING (1 << KMC_BIT_REAPING)
+#define KMC_DESTROY (1 << KMC_BIT_DESTROY)
+
+#define KMC_REAP_CHUNK INT_MAX
+#define KMC_DEFAULT_SEEKS 1
#ifdef DEBUG_KMEM_UNIMPLEMENTED
static __inline__ void kmem_init(void) {
@@ -223,9 +239,10 @@ extern struct rw_semaphore spl_kmem_cache_sem;
#define SKS_MAGIC 0x22222222
#define SKC_MAGIC 0x2c2c2c2c
-#define SPL_KMEM_CACHE_DELAY 5
-#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32
-#define SPL_KMEM_CACHE_ALIGN 8
+#define SPL_KMEM_CACHE_DELAY 5 /* Minimum slab release age */
+#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 /* Target objects per slab */
+#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 8 /* Minimum objects per slab */
+#define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */
typedef int (*spl_kmem_ctor_t)(void *, void *, int);
typedef void (*spl_kmem_dtor_t)(void *, void *);
@@ -258,24 +275,28 @@ typedef struct spl_kmem_slab {
} spl_kmem_slab_t;
typedef struct spl_kmem_cache {
- uint32_t skc_magic; /* Sanity magic */
- uint32_t skc_name_size; /* Name length */
- char *skc_name; /* Name string */
+ uint32_t skc_magic; /* Sanity magic */
+ uint32_t skc_name_size; /* Name length */
+ char *skc_name; /* Name string */
spl_kmem_magazine_t *skc_mag[NR_CPUS]; /* Per-CPU warm cache */
uint32_t skc_mag_size; /* Magazine size */
uint32_t skc_mag_refill; /* Magazine refill count */
- spl_kmem_ctor_t skc_ctor; /* Constructor */
- spl_kmem_dtor_t skc_dtor; /* Destructor */
- spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
- void *skc_private; /* Private data */
- void *skc_vmp; /* Unused */
+ spl_kmem_ctor_t skc_ctor; /* Constructor */
+ spl_kmem_dtor_t skc_dtor; /* Destructor */
+ spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
+ void *skc_private; /* Private data */
+ void *skc_vmp; /* Unused */
uint32_t skc_flags; /* Flags */
uint32_t skc_obj_size; /* Object size */
uint32_t skc_obj_align; /* Object alignment */
uint32_t skc_slab_objs; /* Objects per slab */
- uint32_t skc_slab_size; /* Slab size */
- uint32_t skc_delay; /* slab reclaim interval */
- struct list_head skc_list; /* List of caches linkage */
+ uint32_t skc_slab_size; /* Slab size */
+ uint32_t skc_delay; /* Slab reclaim interval */
+ atomic_t skc_ref; /* Ref count callers */
+ struct delayed_work skc_work; /* Slab reclaim work */
+ struct work_struct work;
+ struct timer_list timer;
+ struct list_head skc_list; /* List of caches linkage */
struct list_head skc_complete_list;/* Completely alloc'ed */
struct list_head skc_partial_list; /* Partially alloc'ed */
spinlock_t skc_lock; /* Cache lock */
@@ -283,7 +304,7 @@ typedef struct spl_kmem_cache {
uint64_t skc_slab_create;/* Slab creates */
uint64_t skc_slab_destroy;/* Slab destroys */
uint64_t skc_slab_total; /* Slab total current */
- uint64_t skc_slab_alloc; /* Slab alloc current */
+ uint64_t skc_slab_alloc; /* Slab alloc current */
uint64_t skc_slab_max; /* Slab max historic */
uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc current */
diff --git a/include/sys/sysmacros.h b/include/sys/sysmacros.h
index 94ff3f84e..b82812385 100644
--- a/include/sys/sysmacros.h
+++ b/include/sys/sysmacros.h
@@ -203,18 +203,6 @@ extern int ddi_strtoul(const char *str, char **nptr,
#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
#endif
-#ifdef HAVE_3ARGS_INIT_WORK
-
-#define spl_init_work(wq,cb,d) INIT_WORK((wq), (void *)(cb), (void *)(d))
-#define spl_get_work_data(type,field,data) (data)
-
-#else
-
-#define spl_init_work(wq,cb,d) INIT_WORK((wq), (void *)(cb));
-#define spl_get_work_data(type,field,data) container_of(data,type,field)
-
-#endif
-
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/vmsystm.h b/include/sys/vmsystm.h
index e92c17bdd..1cb716f13 100644
--- a/include/sys/vmsystm.h
+++ b/include/sys/vmsystm.h
@@ -35,8 +35,7 @@
extern vmem_t *zio_alloc_arena; /* arena for zio caches */
#define physmem num_physpages
-#define freemem nr_free_pages() // Expensive on linux,
- // cheap on solaris
+#define freemem nr_free_pages()
#define minfree 0
#define needfree 0 /* # of needed pages */
#define ptob(pages) (pages * PAGE_SIZE)
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
index a68f8efe9..83eefe293 100644
--- a/module/spl/spl-kmem.c
+++ b/module/spl/spl-kmem.c
@@ -132,10 +132,6 @@ EXPORT_SYMBOL(kmem_set_warning);
* small virtual address space on 32bit arches. This will seriously
* constrain the size of the slab caches and their performance.
*
- * XXX: Implement work requests to keep an eye on each cache and
- * shrink them via spl_slab_reclaim() when they are wasting lots
- * of space. Currently this process is driven by the reapers.
- *
* XXX: Improve the partial slab list by carefully maintaining a
* strict ordering of fullest to emptiest slabs based on
* the slab reference count. This gaurentees the when freeing
@@ -571,7 +567,8 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
}
}
-/* It's important that we pack the spl_kmem_obj_t structure and the
+/*
+ * It's important that we pack the spl_kmem_obj_t structure and the
* actual objects in to one large address space to minimize the number
* of calls to the allocator. It is far better to do a few large
* allocations and then subdivide it ourselves. Now which allocator
@@ -662,14 +659,17 @@ out:
RETURN(sks);
}
-/* Removes slab from complete or partial list, so it must
- * be called with the 'skc->skc_lock' held.
+/*
+ * Remove a slab from complete or partial list, it must be called with
+ * the 'skc->skc_lock' held but the actual free must be performed
+ * outside the lock to prevent deadlocking on vmem addresses.
*/
static void
-spl_slab_free(spl_kmem_slab_t *sks) {
+spl_slab_free(spl_kmem_slab_t *sks,
+ struct list_head *sks_list, struct list_head *sko_list)
+{
spl_kmem_cache_t *skc;
spl_kmem_obj_t *sko, *n;
- int size;
ENTRY;
ASSERT(sks->sks_magic == SKS_MAGIC);
@@ -682,114 +682,190 @@ spl_slab_free(spl_kmem_slab_t *sks) {
skc->skc_obj_total -= sks->sks_objs;
skc->skc_slab_total--;
list_del(&sks->sks_list);
- size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) +
- P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align);
/* Run destructors slab is being released */
list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
ASSERT(sko->sko_magic == SKO_MAGIC);
+ list_del(&sko->sko_list);
if (skc->skc_dtor)
skc->skc_dtor(sko->sko_addr, skc->skc_private);
if (skc->skc_flags & KMC_OFFSLAB)
- kv_free(skc, sko->sko_addr, size);
+ list_add(&sko->sko_list, sko_list);
}
- kv_free(skc, sks, skc->skc_slab_size);
+ list_add(&sks->sks_list, sks_list);
EXIT;
}
-static int
-__spl_slab_reclaim(spl_kmem_cache_t *skc)
+/*
+ * Traverses all the partial slabs attached to a cache and free those
+ * which which are currently empty, and have not been touched for
+ * skc_delay seconds. This is to avoid thrashing.
+ */
+static void
+spl_slab_reclaim(spl_kmem_cache_t *skc, int flag)
{
spl_kmem_slab_t *sks, *m;
- int rc = 0;
+ spl_kmem_obj_t *sko, *n;
+ LIST_HEAD(sks_list);
+ LIST_HEAD(sko_list);
+ int size;
ENTRY;
- ASSERT(spin_is_locked(&skc->skc_lock));
/*
- * Free empty slabs which have not been touched in skc_delay
- * seconds. This delay time is important to avoid thrashing.
- * Empty slabs will be at the end of the skc_partial_list.
+ * Move empty slabs and objects which have not been touched in
+ * skc_delay seconds on to private lists to be freed outside
+ * the spin lock. This delay time is important to avoid
+ * thrashing however when flag is set the delay will not be
+ * used. Empty slabs will be at the end of the skc_partial_list.
*/
+ spin_lock(&skc->skc_lock);
list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list,
sks_list) {
if (sks->sks_ref > 0)
break;
- if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) {
- spl_slab_free(sks);
- rc++;
- }
+ if (flag || time_after(jiffies,sks->sks_age+skc->skc_delay*HZ))
+ spl_slab_free(sks, &sks_list, &sko_list);
}
+ spin_unlock(&skc->skc_lock);
- /* Returns number of slabs reclaimed */
- RETURN(rc);
+ /*
+ * We only have list of spl_kmem_obj_t's if they are located off
+ * the slab, otherwise they get feed with the spl_kmem_slab_t.
+ */
+ if (!list_empty(&sko_list)) {
+ ASSERT(skc->skc_flags & KMC_OFFSLAB);
+
+ size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) +
+ P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align);
+
+ list_for_each_entry_safe(sko, n, &sko_list, sko_list)
+ kv_free(skc, sko->sko_addr, size);
+ }
+
+ list_for_each_entry_safe(sks, m, &sks_list, sks_list)
+ kv_free(skc, sks, skc->skc_slab_size);
+
+ EXIT;
}
-static int
-spl_slab_reclaim(spl_kmem_cache_t *skc)
+/*
+ * Called regularly on all caches to age objects out of the magazines
+ * which have not been access in skc->skc_delay seconds. This prevents
+ * idle magazines from holding memory which might be better used by
+ * other caches or parts of the system. The delay is present to
+ * prevent thrashing the magazine.
+ */
+static void
+spl_magazine_age(void *data)
{
- int rc;
- ENTRY;
+ spl_kmem_cache_t *skc = data;
+ spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
- spin_lock(&skc->skc_lock);
- rc = __spl_slab_reclaim(skc);
- spin_unlock(&skc->skc_lock);
+ if (skm->skm_avail > 0 &&
+ time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
+ (void)spl_cache_flush(skc, skm, skm->skm_refill);
+}
- RETURN(rc);
+/*
+ * Called regularly to keep a downward pressure on the size of idle
+ * magazines and to release free slabs from the cache. This function
+ * never calls the registered reclaim function, that only occures
+ * under memory pressure or with a direct call to spl_kmem_reap().
+ */
+static void
+spl_cache_age(void *data)
+{
+ spl_kmem_cache_t *skc =
+ spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
+
+ ASSERT(skc->skc_magic == SKC_MAGIC);
+ on_each_cpu(spl_magazine_age, skc, 0, 1);
+ spl_slab_reclaim(skc, 0);
+
+ if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
+ schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ);
}
-/* Size slabs properly to ensure they are not too large */
+/*
+ * Size a slab based on the size of each aliged object plus spl_kmem_obj_t.
+ * When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB. However,
+ * for very small objects we may end up with more than this so as not
+ * to waste space in the minimal allocation of a single page. Also for
+ * very large objects we may use as few as SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN,
+ * lower than this and we will fail.
+ */
static int
spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
{
- int max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE;
- int align = skc->skc_obj_align;
-
- *objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
+ int sks_size, obj_size, max_size, align;
if (skc->skc_flags & KMC_OFFSLAB) {
+ *objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
*size = sizeof(spl_kmem_slab_t);
} else {
-resize:
- *size = P2ROUNDUP(sizeof(spl_kmem_slab_t), align) +
- *objs * (P2ROUNDUP(skc->skc_obj_size, align) +
- P2ROUNDUP(sizeof(spl_kmem_obj_t), align));
+ align = skc->skc_obj_align;
+ sks_size = P2ROUNDUP(sizeof(spl_kmem_slab_t), align);
+ obj_size = P2ROUNDUP(skc->skc_obj_size, align) +
+ P2ROUNDUP(sizeof(spl_kmem_obj_t), align);
+
+ if (skc->skc_flags & KMC_KMEM)
+ max_size = ((uint64_t)1 << (MAX_ORDER-1)) * PAGE_SIZE;
+ else
+ max_size = (32 * 1024 * 1024);
- if (*size > max)
- GOTO(resize, *objs = *objs - 1);
+ for (*size = PAGE_SIZE; *size <= max_size; *size += PAGE_SIZE) {
+ *objs = (*size - sks_size) / obj_size;
+ if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB)
+ RETURN(0);
+ }
- ASSERT(*objs > 0);
+ /*
+ * Unable to satisfy target objets per slab, fallback to
+ * allocating a maximally sized slab and assuming it can
+ * contain the minimum objects count use it. If not fail.
+ */
+ *size = max_size;
+ *objs = (*size - sks_size) / obj_size;
+ if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN)
+ RETURN(0);
}
- ASSERTF(*size <= max, "%d < %d\n", *size, max);
- RETURN(0);
+ RETURN(-ENOSPC);
}
+/*
+ * Make a guess at reasonable per-cpu magazine size based on the size of
+ * each object and the cost of caching N of them in each magazine. Long
+ * term this should really adapt based on an observed usage heuristic.
+ */
static int
spl_magazine_size(spl_kmem_cache_t *skc)
{
int size, align = skc->skc_obj_align;
ENTRY;
- /* Guesses for reasonable magazine sizes, they
- * should really adapt based on observed usage. */
+ /* Per-magazine sizes below assume a 4Kib page size */
if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 256))
- size = 4;
+ size = 4; /* Minimum 4Mib per-magazine */
else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 32))
- size = 16;
+ size = 16; /* Minimum 2Mib per-magazine */
else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE))
- size = 64;
+ size = 64; /* Minimum 256Kib per-magazine */
else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE / 4))
- size = 128;
+ size = 128; /* Minimum 128Kib per-magazine */
else
- size = 512;
+ size = 256;
RETURN(size);
}
+/*
+ * Allocate a per-cpu magazine to assoicate with a specific core.
+ */
static spl_kmem_magazine_t *
spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
{
@@ -798,19 +874,21 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
sizeof(void *) * skc->skc_mag_size;
ENTRY;
- skm = kmem_alloc_node(size, GFP_KERNEL, node);
+ skm = kmem_alloc_node(size, GFP_KERNEL | __GFP_NOFAIL, node);
if (skm) {
skm->skm_magic = SKM_MAGIC;
skm->skm_avail = 0;
skm->skm_size = skc->skc_mag_size;
skm->skm_refill = skc->skc_mag_refill;
- if (!(skc->skc_flags & KMC_NOTOUCH))
- skm->skm_age = jiffies;
+ skm->skm_age = jiffies;
}
RETURN(skm);
}
+/*
+ * Free a per-cpu magazine assoicated with a specific core.
+ */
static void
spl_magazine_free(spl_kmem_magazine_t *skm)
{
@@ -825,44 +903,72 @@ spl_magazine_free(spl_kmem_magazine_t *skm)
EXIT;
}
+static void
+__spl_magazine_create(void *data)
+{
+ spl_kmem_cache_t *skc = data;
+ int id = smp_processor_id();
+
+ skc->skc_mag[id] = spl_magazine_alloc(skc, cpu_to_node(id));
+ ASSERT(skc->skc_mag[id]);
+}
+
+/*
+ * Create all pre-cpu magazines of reasonable sizes.
+ */
static int
spl_magazine_create(spl_kmem_cache_t *skc)
{
- int i;
ENTRY;
skc->skc_mag_size = spl_magazine_size(skc);
- skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
+ skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
+ on_each_cpu(__spl_magazine_create, skc, 0, 1);
- for_each_online_cpu(i) {
- skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i));
- if (!skc->skc_mag[i]) {
- for (i--; i >= 0; i--)
- spl_magazine_free(skc->skc_mag[i]);
+ RETURN(0);
+}
- RETURN(-ENOMEM);
- }
- }
+static void
+__spl_magazine_destroy(void *data)
+{
+ spl_kmem_cache_t *skc = data;
+ spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
- RETURN(0);
+ (void)spl_cache_flush(skc, skm, skm->skm_avail);
+ spl_magazine_free(skm);
}
+/*
+ * Destroy all pre-cpu magazines.
+ */
static void
spl_magazine_destroy(spl_kmem_cache_t *skc)
{
- spl_kmem_magazine_t *skm;
- int i;
ENTRY;
-
- for_each_online_cpu(i) {
- skm = skc->skc_mag[i];
- (void)spl_cache_flush(skc, skm, skm->skm_avail);
- spl_magazine_free(skm);
- }
-
+ on_each_cpu(__spl_magazine_destroy, skc, 0, 1);
EXIT;
}
+/*
+ * Create a object cache based on the following arguments:
+ * name cache name
+ * size cache object size
+ * align cache object alignment
+ * ctor cache object constructor
+ * dtor cache object destructor
+ * reclaim cache object reclaim
+ * priv cache private data for ctor/dtor/reclaim
+ * vmp unused must be NULL
+ * flags
+ * KMC_NOTOUCH Disable cache object aging (unsupported)
+ * KMC_NODEBUG Disable debugging (unsupported)
+ * KMC_NOMAGAZINE Disable magazine (unsupported)
+ * KMC_NOHASH Disable hashing (unsupported)
+ * KMC_QCACHE Disable qcache (unsupported)
+ * KMC_KMEM Force kmem backed cache
+ * KMC_VMEM Force vmem backed cache
+ * KMC_OFFSLAB Locate objects off the slab
+ */
spl_kmem_cache_t *
spl_kmem_cache_create(char *name, size_t size, size_t align,
spl_kmem_ctor_t ctor,
@@ -908,6 +1014,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_size = size;
skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
skc->skc_delay = SPL_KMEM_CACHE_DELAY;
+ atomic_set(&skc->skc_ref, 0);
INIT_LIST_HEAD(&skc->skc_list);
INIT_LIST_HEAD(&skc->skc_complete_list);
@@ -947,6 +1054,9 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
if (rc)
GOTO(out, rc);
+ spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc);
+ schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ);
+
down_write(&spl_kmem_cache_sem);
list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
up_write(&spl_kmem_cache_sem);
@@ -959,10 +1069,13 @@ out:
}
EXPORT_SYMBOL(spl_kmem_cache_create);
+/*
+ * Destroy a cache and all objects assoicated with the cache.
+ */
void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
{
- spl_kmem_slab_t *sks, *m;
+ DECLARE_WAIT_QUEUE_HEAD(wq);
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
@@ -971,20 +1084,27 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
list_del_init(&skc->skc_list);
up_write(&spl_kmem_cache_sem);
+ /* Cancel any and wait for any pending delayed work */
+ ASSERT(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+ cancel_delayed_work(&skc->skc_work);
+ flush_scheduled_work();
+
+ /* Wait until all current callers complete, this is mainly
+ * to catch the case where a low memory situation triggers a
+ * cache reaping action which races with this destroy. */
+ wait_event(wq, atomic_read(&skc->skc_ref) == 0);
+
spl_magazine_destroy(skc);
+ spl_slab_reclaim(skc, 1);
spin_lock(&skc->skc_lock);
/* Validate there are no objects in use and free all the
* spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
+ ASSERT3U(skc->skc_slab_alloc, ==, 0);
+ ASSERT3U(skc->skc_obj_alloc, ==, 0);
+ ASSERT3U(skc->skc_slab_total, ==, 0);
+ ASSERT3U(skc->skc_obj_total, ==, 0);
ASSERT(list_empty(&skc->skc_complete_list));
- ASSERT(skc->skc_slab_alloc == 0);
- ASSERT(skc->skc_obj_alloc == 0);
-
- list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
- spl_slab_free(sks);
-
- ASSERT(skc->skc_slab_total == 0);
- ASSERT(skc->skc_obj_total == 0);
kmem_free(skc->skc_name, skc->skc_name_size);
spin_unlock(&skc->skc_lock);
@@ -995,6 +1115,10 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
}
EXPORT_SYMBOL(spl_kmem_cache_destroy);
+/*
+ * Allocate an object from a slab attached to the cache. This is used to
+ * repopulate the per-cpu magazine caches in batches when they run low.
+ */
static void *
spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
{
@@ -1030,10 +1154,11 @@ spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
return sko->sko_addr;
}
-/* No available objects create a new slab. Since this is an
- * expensive operation we do it without holding the spinlock
- * and only briefly aquire it when we link in the fully
- * allocated and constructed slab.
+/*
+ * No available objects on any slabsi, create a new slab. Since this
+ * is an expensive operation we do it without holding the spinlock and
+ * only briefly aquire it when we link in the fully allocated and
+ * constructed slab.
*/
static spl_kmem_slab_t *
spl_cache_grow(spl_kmem_cache_t *skc, int flags)
@@ -1042,34 +1167,42 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags)
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ local_irq_enable();
+ might_sleep();
- if (flags & __GFP_WAIT) {
- flags |= __GFP_NOFAIL;
- local_irq_enable();
- might_sleep();
- }
-
- sks = spl_slab_alloc(skc, flags);
- if (sks == NULL) {
- if (flags & __GFP_WAIT)
- local_irq_disable();
-
- RETURN(NULL);
+ /*
+ * Before allocating a new slab check if the slab is being reaped.
+ * If it is there is a good chance we can wait until it finishes
+ * and then use one of the newly freed but not aged-out slabs.
+ */
+ if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
+ schedule();
+ GOTO(out, sks= NULL);
}
- if (flags & __GFP_WAIT)
- local_irq_disable();
+ /* Allocate a new slab for the cache */
+ sks = spl_slab_alloc(skc, flags | __GFP_NORETRY | __GFP_NOWARN);
+ if (sks == NULL)
+ GOTO(out, sks = NULL);
- /* Link the new empty slab in to the end of skc_partial_list */
+ /* Link the new empty slab in to the end of skc_partial_list. */
spin_lock(&skc->skc_lock);
skc->skc_slab_total++;
skc->skc_obj_total += sks->sks_objs;
list_add_tail(&sks->sks_list, &skc->skc_partial_list);
spin_unlock(&skc->skc_lock);
+out:
+ local_irq_disable();
RETURN(sks);
}
+/*
+ * Refill a per-cpu magazine with objects from the slabs for this
+ * cache. Ideally the magazine can be repopulated using existing
+ * objects which have been released, however if we are unable to
+ * locate enough free objects new slabs of objects will be created.
+ */
static int
spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
{
@@ -1080,13 +1213,11 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(skm->skm_magic == SKM_MAGIC);
- /* XXX: Check for refill bouncing by age perhaps */
refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
-
spin_lock(&skc->skc_lock);
while (refill > 0) {
- /* No slabs available we must grow the cache */
+ /* No slabs available we may need to grow the cache */
if (list_empty(&skc->skc_partial_list)) {
spin_unlock(&skc->skc_lock);
@@ -1135,6 +1266,9 @@ out:
RETURN(rc);
}
+/*
+ * Release an object back to the slab from which it came.
+ */
static void
spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
{
@@ -1176,6 +1310,13 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
EXIT;
}
+/*
+ * Release a batch of objects from a per-cpu magazine back to their
+ * respective slabs. This occurs when we exceed the magazine size,
+ * are under memory pressure, when the cache is idle, or during
+ * cache cleanup. The flush argument contains the number of entries
+ * to remove from the magazine.
+ */
static int
spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
{
@@ -1185,12 +1326,17 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(skm->skm_magic == SKM_MAGIC);
+ /*
+ * XXX: Currently we simply return objects from the magazine to
+ * the slabs in fifo order. The ideal thing to do from a memory
+ * fragmentation standpoint is to cheaply determine the set of
+ * objects in the magazine which will result in the largest
+ * number of free slabs if released from the magazine.
+ */
spin_lock(&skc->skc_lock);
-
for (i = 0; i < count; i++)
spl_cache_shrink(skc, skm->skm_objs[i]);
-// __spl_slab_reclaim(skc);
skm->skm_avail -= count;
memmove(skm->skm_objs, &(skm->skm_objs[count]),
sizeof(void *) * skm->skm_avail);
@@ -1200,6 +1346,10 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
RETURN(count);
}
+/*
+ * Allocate an object from the per-cpu magazine, or if the magazine
+ * is empty directly allocate from a slab and repopulate the magazine.
+ */
void *
spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
{
@@ -1209,7 +1359,9 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(flags & KM_SLEEP); /* XXX: KM_NOSLEEP not yet supported */
+ ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+ ASSERT(flags & KM_SLEEP);
+ atomic_inc(&skc->skc_ref);
local_irq_save(irq_flags);
restart:
@@ -1225,8 +1377,7 @@ restart:
if (likely(skm->skm_avail)) {
/* Object available in CPU cache, use it */
obj = skm->skm_objs[--skm->skm_avail];
- if (!(skc->skc_flags & KMC_NOTOUCH))
- skm->skm_age = jiffies;
+ skm->skm_age = jiffies;
} else {
/* Per-CPU cache empty, directly allocate from
* the slab and refill the per-CPU cache. */
@@ -1240,11 +1391,18 @@ restart:
/* Pre-emptively migrate object to CPU L1 cache */
prefetchw(obj);
+ atomic_dec(&skc->skc_ref);
RETURN(obj);
}
EXPORT_SYMBOL(spl_kmem_cache_alloc);
+/*
+ * Free an object back to the local per-cpu magazine, there is no
+ * guarantee that this is the same magazine the object was originally
+ * allocated from. We may need to flush entire from the magazine
+ * back to the slabs to make space.
+ */
void
spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
{
@@ -1253,6 +1411,8 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+ atomic_inc(&skc->skc_ref);
local_irq_save(flags);
/* Safe to update per-cpu structure without lock, but
@@ -1270,62 +1430,87 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
skm->skm_objs[skm->skm_avail++] = obj;
local_irq_restore(flags);
+ atomic_dec(&skc->skc_ref);
EXIT;
}
EXPORT_SYMBOL(spl_kmem_cache_free);
+/*
+ * The generic shrinker function for all caches. Under linux a shrinker
+ * may not be tightly coupled with a slab cache. In fact linux always
+ * systematically trys calling all registered shrinker callbacks which
+ * report that they contain unused objects. Because of this we only
+ * register one shrinker function in the shim layer for all slab caches.
+ * We always attempt to shrink all caches when this generic shrinker
+ * is called. The shrinker should return the number of free objects
+ * in the cache when called with nr_to_scan == 0 but not attempt to
+ * free any objects. When nr_to_scan > 0 it is a request that nr_to_scan
+ * objects should be freed, because Solaris semantics are to free
+ * all available objects we may free more objects than requested.
+ */
static int
spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
{
spl_kmem_cache_t *skc;
+ int unused = 0;
- /* Under linux a shrinker is not tightly coupled with a slab
- * cache. In fact linux always systematically trys calling all
- * registered shrinker callbacks until its target reclamation level
- * is reached. Because of this we only register one shrinker
- * function in the shim layer for all slab caches. And we always
- * attempt to shrink all caches when this generic shrinker is called.
- */
down_read(&spl_kmem_cache_sem);
-
- list_for_each_entry(skc, &spl_kmem_cache_list, skc_list)
- spl_kmem_cache_reap_now(skc);
-
+ list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
+ if (nr_to_scan)
+ spl_kmem_cache_reap_now(skc);
+
+ /*
+ * Presume everything alloc'ed in reclaimable, this ensures
+ * we are called again with nr_to_scan > 0 so can try and
+ * reclaim. The exact number is not important either so
+ * we forgo taking this already highly contented lock.
+ */
+ unused += skc->skc_obj_alloc;
+ }
up_read(&spl_kmem_cache_sem);
- /* XXX: Under linux we should return the remaining number of
- * entries in the cache. We should do this as well.
- */
- return 1;
+ return (unused * sysctl_vfs_cache_pressure) / 100;
}
+/*
+ * Call the registered reclaim function for a cache. Depending on how
+ * many and which objects are released it may simply repopulate the
+ * local magazine which will then need to age-out. Objects which cannot
+ * fit in the magazine we will be released back to their slabs which will
+ * also need to age out before being release. This is all just best
+ * effort and we do not want to thrash creating and destroying slabs.
+ */
void
spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
{
- spl_kmem_magazine_t *skm;
- int i;
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- if (skc->skc_reclaim)
- skc->skc_reclaim(skc->skc_private);
+ /* Prevent concurrent cache reaping when contended */
+ if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
+ EXIT;
+ return;
+ }
- /* Ensure per-CPU caches which are idle gradually flush */
- for_each_online_cpu(i) {
- skm = skc->skc_mag[i];
+ atomic_inc(&skc->skc_ref);
- if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
- (void)spl_cache_flush(skc, skm, skm->skm_refill);
- }
+ if (skc->skc_reclaim)
+ skc->skc_reclaim(skc->skc_private);
- spl_slab_reclaim(skc);
+ spl_slab_reclaim(skc, 0);
+ clear_bit(KMC_BIT_REAPING, &skc->skc_flags);
+ atomic_dec(&skc->skc_ref);
EXIT;
}
EXPORT_SYMBOL(spl_kmem_cache_reap_now);
+/*
+ * Reap all free slabs from all registered caches.
+ */
void
spl_kmem_reap(void)
{
diff --git a/module/splat/splat-internal.h b/module/splat/splat-internal.h
index 87c47b173..0fa177c02 100644
--- a/module/splat/splat-internal.h
+++ b/module/splat/splat-internal.h
@@ -40,6 +40,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/list.h>
+#include <linux/swap.h>
#include <asm/ioctls.h>
#include <asm/uaccess.h>
diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c
index 9b96fce90..c592e983c 100644
--- a/module/splat/splat-kmem.c
+++ b/module/splat/splat-kmem.c
@@ -4,9 +4,9 @@
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
- * Brian Behlendorf <[email protected]>,
- * Herb Wartens <[email protected]>,
- * Jim Garlick <[email protected]>
+ * Brian Behlendorf <[email protected]>,
+ * Herb Wartens <[email protected]>,
+ * Jim Garlick <[email protected]>
* UCRL-CODE-235197
*
* This is free software; you can redistribute it and/or modify it
@@ -47,30 +47,37 @@
#define SPLAT_KMEM_TEST4_DESC "Memory allocation test (vmem_zalloc)"
#define SPLAT_KMEM_TEST5_ID 0x0105
-#define SPLAT_KMEM_TEST5_NAME "kmem_small"
+#define SPLAT_KMEM_TEST5_NAME "slab_small"
#define SPLAT_KMEM_TEST5_DESC "Slab ctor/dtor test (small)"
#define SPLAT_KMEM_TEST6_ID 0x0106
-#define SPLAT_KMEM_TEST6_NAME "kmem_large"
+#define SPLAT_KMEM_TEST6_NAME "slab_large"
#define SPLAT_KMEM_TEST6_DESC "Slab ctor/dtor test (large)"
#define SPLAT_KMEM_TEST7_ID 0x0107
-#define SPLAT_KMEM_TEST7_NAME "kmem_reap"
-#define SPLAT_KMEM_TEST7_DESC "Slab reaping test"
+#define SPLAT_KMEM_TEST7_NAME "slab_align"
+#define SPLAT_KMEM_TEST7_DESC "Slab alignment test"
#define SPLAT_KMEM_TEST8_ID 0x0108
-#define SPLAT_KMEM_TEST8_NAME "kmem_lock"
-#define SPLAT_KMEM_TEST8_DESC "Slab locking test"
+#define SPLAT_KMEM_TEST8_NAME "slab_reap"
+#define SPLAT_KMEM_TEST8_DESC "Slab reaping test"
#define SPLAT_KMEM_TEST9_ID 0x0109
-#define SPLAT_KMEM_TEST9_NAME "kmem_align"
-#define SPLAT_KMEM_TEST9_DESC "Slab alignment test"
+#define SPLAT_KMEM_TEST9_NAME "slab_age"
+#define SPLAT_KMEM_TEST9_DESC "Slab aging test"
+
+#define SPLAT_KMEM_TEST10_ID 0x010a
+#define SPLAT_KMEM_TEST10_NAME "slab_lock"
+#define SPLAT_KMEM_TEST10_DESC "Slab locking test"
+
+#define SPLAT_KMEM_TEST11_ID 0x010b
+#define SPLAT_KMEM_TEST11_NAME "slab_overcommit"
+#define SPLAT_KMEM_TEST11_DESC "Slab memory overcommit test"
#define SPLAT_KMEM_ALLOC_COUNT 10
#define SPLAT_VMEM_ALLOC_COUNT 10
-/* XXX - This test may fail under tight memory conditions */
static int
splat_kmem_test1(struct file *file, void *arg)
{
@@ -96,8 +103,8 @@ splat_kmem_test1(struct file *file, void *arg)
kmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST1_NAME,
- "%d byte allocations, %d/%d successful\n",
- size, count, SPLAT_KMEM_ALLOC_COUNT);
+ "%d byte allocations, %d/%d successful\n",
+ size, count, SPLAT_KMEM_ALLOC_COUNT);
if (count != SPLAT_KMEM_ALLOC_COUNT)
rc = -ENOMEM;
@@ -134,8 +141,8 @@ splat_kmem_test2(struct file *file, void *arg)
for (j = 0; j < size; j++) {
if (((char *)ptr[i])[j] != '\0') {
splat_vprint(file, SPLAT_KMEM_TEST2_NAME,
- "%d-byte allocation was "
- "not zeroed\n", size);
+ "%d-byte allocation was "
+ "not zeroed\n", size);
rc = -EFAULT;
}
}
@@ -146,8 +153,8 @@ splat_kmem_test2(struct file *file, void *arg)
kmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST2_NAME,
- "%d byte allocations, %d/%d successful\n",
- size, count, SPLAT_KMEM_ALLOC_COUNT);
+ "%d byte allocations, %d/%d successful\n",
+ size, count, SPLAT_KMEM_ALLOC_COUNT);
if (count != SPLAT_KMEM_ALLOC_COUNT)
rc = -ENOMEM;
@@ -180,8 +187,8 @@ splat_kmem_test3(struct file *file, void *arg)
vmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST3_NAME,
- "%d byte allocations, %d/%d successful\n",
- size, count, SPLAT_VMEM_ALLOC_COUNT);
+ "%d byte allocations, %d/%d successful\n",
+ size, count, SPLAT_VMEM_ALLOC_COUNT);
if (count != SPLAT_VMEM_ALLOC_COUNT)
rc = -ENOMEM;
@@ -212,8 +219,8 @@ splat_kmem_test4(struct file *file, void *arg)
for (j = 0; j < size; j++) {
if (((char *)ptr[i])[j] != '\0') {
splat_vprint(file, SPLAT_KMEM_TEST4_NAME,
- "%d-byte allocation was "
- "not zeroed\n", size);
+ "%d-byte allocation was "
+ "not zeroed\n", size);
rc = -EFAULT;
}
}
@@ -224,8 +231,8 @@ splat_kmem_test4(struct file *file, void *arg)
vmem_free(ptr[i], size);
splat_vprint(file, SPLAT_KMEM_TEST4_NAME,
- "%d byte allocations, %d/%d successful\n",
- size, count, SPLAT_VMEM_ALLOC_COUNT);
+ "%d byte allocations, %d/%d successful\n",
+ size, count, SPLAT_VMEM_ALLOC_COUNT);
if (count != SPLAT_VMEM_ALLOC_COUNT)
rc = -ENOMEM;
@@ -237,8 +244,11 @@ splat_kmem_test4(struct file *file, void *arg)
#define SPLAT_KMEM_TEST_MAGIC 0x004488CCUL
#define SPLAT_KMEM_CACHE_NAME "kmem_test"
-#define SPLAT_KMEM_OBJ_COUNT 128
-#define SPLAT_KMEM_OBJ_RECLAIM 16
+#define SPLAT_KMEM_OBJ_COUNT 1024
+#define SPLAT_KMEM_OBJ_RECLAIM 20 /* percent */
+#define SPLAT_KMEM_THREADS 32
+
+#define KCP_FLAG_READY 0x01
typedef struct kmem_cache_data {
unsigned long kcd_magic;
@@ -246,21 +256,95 @@ typedef struct kmem_cache_data {
char kcd_buf[0];
} kmem_cache_data_t;
+typedef struct kmem_cache_thread {
+ kmem_cache_t *kct_cache;
+ spinlock_t kct_lock;
+ int kct_id;
+ int kct_kcd_count;
+ kmem_cache_data_t *kct_kcd[0];
+} kmem_cache_thread_t;
+
typedef struct kmem_cache_priv {
unsigned long kcp_magic;
struct file *kcp_file;
kmem_cache_t *kcp_cache;
- kmem_cache_data_t *kcp_kcd[SPLAT_KMEM_OBJ_COUNT];
spinlock_t kcp_lock;
- wait_queue_head_t kcp_waitq;
+ wait_queue_head_t kcp_ctl_waitq;
+ wait_queue_head_t kcp_thr_waitq;
+ int kcp_flags;
+ int kcp_kct_count;
+ kmem_cache_thread_t *kcp_kct[SPLAT_KMEM_THREADS];
int kcp_size;
int kcp_align;
int kcp_count;
- int kcp_threads;
int kcp_alloc;
int kcp_rc;
+ int kcp_kcd_count;
+ kmem_cache_data_t *kcp_kcd[0];
} kmem_cache_priv_t;
+static kmem_cache_priv_t *
+splat_kmem_cache_test_kcp_alloc(struct file *file, char *name,
+ int size, int align, int alloc, int count)
+{
+ kmem_cache_priv_t *kcp;
+
+ kcp = vmem_zalloc(sizeof(kmem_cache_priv_t) +
+ count * sizeof(kmem_cache_data_t *), KM_SLEEP);
+ if (!kcp)
+ return NULL;
+
+ kcp->kcp_magic = SPLAT_KMEM_TEST_MAGIC;
+ kcp->kcp_file = file;
+ kcp->kcp_cache = NULL;
+ spin_lock_init(&kcp->kcp_lock);
+ init_waitqueue_head(&kcp->kcp_ctl_waitq);
+ init_waitqueue_head(&kcp->kcp_thr_waitq);
+ kcp->kcp_flags = 0;
+ kcp->kcp_kct_count = -1;
+ kcp->kcp_size = size;
+ kcp->kcp_align = align;
+ kcp->kcp_count = 0;
+ kcp->kcp_alloc = alloc;
+ kcp->kcp_rc = 0;
+ kcp->kcp_kcd_count = count;
+
+ return kcp;
+}
+
+static void
+splat_kmem_cache_test_kcp_free(kmem_cache_priv_t *kcp)
+{
+ vmem_free(kcp, sizeof(kmem_cache_priv_t) +
+ kcp->kcp_kcd_count * sizeof(kmem_cache_data_t *));
+}
+
+static kmem_cache_thread_t *
+splat_kmem_cache_test_kct_alloc(int id, int count)
+{
+ kmem_cache_thread_t *kct;
+
+ ASSERTF(id < SPLAT_KMEM_THREADS, "id=%d\n", id);
+ kct = vmem_zalloc(sizeof(kmem_cache_thread_t) +
+ count * sizeof(kmem_cache_data_t *), KM_SLEEP);
+ if (!kct)
+ return NULL;
+
+ spin_lock_init(&kct->kct_lock);
+ kct->kct_cache = NULL;
+ kct->kct_id = id;
+ kct->kct_kcd_count = count;
+
+ return kct;
+}
+
+static void
+splat_kmem_cache_test_kct_free(kmem_cache_thread_t *kct)
+{
+ vmem_free(kct, sizeof(kmem_cache_thread_t) +
+ kct->kct_kcd_count * sizeof(kmem_cache_data_t *));
+}
+
static int
splat_kmem_cache_test_constructor(void *ptr, void *priv, int flags)
{
@@ -293,83 +377,340 @@ splat_kmem_cache_test_destructor(void *ptr, void *priv)
return;
}
+/*
+ * Generic reclaim function which assumes that all objects may
+ * be reclaimed at any time. We free a small percentage of the
+ * objects linked off the kcp or kct[] every time we are called.
+ */
+static void
+splat_kmem_cache_test_reclaim(void *priv)
+{
+ kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
+ kmem_cache_thread_t *kct;
+ int i, j, count;
+
+ ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
+ count = kcp->kcp_kcd_count * SPLAT_KMEM_OBJ_RECLAIM / 100;
+
+ /* Objects directly attached to the kcp */
+ spin_lock(&kcp->kcp_lock);
+ for (i = 0; i < kcp->kcp_kcd_count; i++) {
+ if (kcp->kcp_kcd[i]) {
+ kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]);
+ kcp->kcp_kcd[i] = NULL;
+
+ if ((--count) == 0)
+ break;
+ }
+ }
+ spin_unlock(&kcp->kcp_lock);
+
+ /* No threads containing objects to consider */
+ if (kcp->kcp_kct_count == -1)
+ return;
+
+ /* Objects attached to a kct thread */
+ for (i = 0; i < kcp->kcp_kct_count; i++) {
+ spin_lock(&kcp->kcp_lock);
+ kct = kcp->kcp_kct[i];
+ spin_unlock(&kcp->kcp_lock);
+ if (!kct)
+ continue;
+
+ spin_lock(&kct->kct_lock);
+ count = kct->kct_kcd_count * SPLAT_KMEM_OBJ_RECLAIM / 100;
+
+ for (j = 0; j < kct->kct_kcd_count; j++) {
+ if (kct->kct_kcd[j]) {
+ kmem_cache_free(kcp->kcp_cache,kct->kct_kcd[j]);
+ kct->kct_kcd[j] = NULL;
+
+ if ((--count) == 0)
+ break;
+ }
+ }
+ spin_unlock(&kct->kct_lock);
+ }
+
+ return;
+}
+
+static int
+splat_kmem_cache_test_threads(kmem_cache_priv_t *kcp, int threads)
+{
+ int rc;
+
+ spin_lock(&kcp->kcp_lock);
+ rc = (kcp->kcp_kct_count == threads);
+ spin_unlock(&kcp->kcp_lock);
+
+ return rc;
+}
+
+static int
+splat_kmem_cache_test_flags(kmem_cache_priv_t *kcp, int flags)
+{
+ int rc;
+
+ spin_lock(&kcp->kcp_lock);
+ rc = (kcp->kcp_flags & flags);
+ spin_unlock(&kcp->kcp_lock);
+
+ return rc;
+}
+
+static void
+splat_kmem_cache_test_thread(void *arg)
+{
+ kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg;
+ kmem_cache_thread_t *kct;
+ int rc = 0, id, i;
+ void *obj;
+
+ ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
+
+ /* Assign thread ids */
+ spin_lock(&kcp->kcp_lock);
+ if (kcp->kcp_kct_count == -1)
+ kcp->kcp_kct_count = 0;
+
+ id = kcp->kcp_kct_count;
+ kcp->kcp_kct_count++;
+ spin_unlock(&kcp->kcp_lock);
+
+ kct = splat_kmem_cache_test_kct_alloc(id, kcp->kcp_alloc);
+ if (!kct) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ spin_lock(&kcp->kcp_lock);
+ kcp->kcp_kct[id] = kct;
+ spin_unlock(&kcp->kcp_lock);
+
+ /* Wait for all threads to have started and report they are ready */
+ if (kcp->kcp_kct_count == SPLAT_KMEM_THREADS)
+ wake_up(&kcp->kcp_ctl_waitq);
+
+ wait_event(kcp->kcp_thr_waitq,
+ splat_kmem_cache_test_flags(kcp, KCP_FLAG_READY));
+
+ /*
+ * Updates to kct->kct_kcd[] are performed under a spin_lock so
+ * they may safely run concurrent with the reclaim function. If
+ * we are not in a low memory situation we have one lock per-
+ * thread so they are not expected to be contended.
+ */
+ for (i = 0; i < kct->kct_kcd_count; i++) {
+ obj = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
+ spin_lock(&kct->kct_lock);
+ kct->kct_kcd[i] = obj;
+ spin_unlock(&kct->kct_lock);
+ }
+
+ for (i = 0; i < kct->kct_kcd_count; i++) {
+ spin_lock(&kct->kct_lock);
+ if (kct->kct_kcd[i]) {
+ kmem_cache_free(kcp->kcp_cache, kct->kct_kcd[i]);
+ kct->kct_kcd[i] = NULL;
+ }
+ spin_unlock(&kct->kct_lock);
+ }
+out:
+ spin_lock(&kcp->kcp_lock);
+ if (kct) {
+ splat_kmem_cache_test_kct_free(kct);
+ kcp->kcp_kct[id] = kct = NULL;
+ }
+
+ if (!kcp->kcp_rc)
+ kcp->kcp_rc = rc;
+
+ if ((--kcp->kcp_kct_count) == 0)
+ wake_up(&kcp->kcp_ctl_waitq);
+
+ spin_unlock(&kcp->kcp_lock);
+
+ thread_exit();
+}
+
static int
splat_kmem_cache_test(struct file *file, void *arg, char *name,
- int size, int align, int flags)
+ int size, int align, int flags)
{
- kmem_cache_t *cache = NULL;
- kmem_cache_data_t *kcd = NULL;
- kmem_cache_priv_t kcp;
+ kmem_cache_priv_t *kcp;
+ kmem_cache_data_t *kcd;
int rc = 0, max;
- kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC;
- kcp.kcp_file = file;
- kcp.kcp_size = size;
- kcp.kcp_align = align;
- kcp.kcp_count = 0;
- kcp.kcp_rc = 0;
-
- cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME,
- kcp.kcp_size, kcp.kcp_align,
- splat_kmem_cache_test_constructor,
- splat_kmem_cache_test_destructor,
- NULL, &kcp, NULL, flags);
- if (!cache) {
+ kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, align, 0, 1);
+ if (!kcp) {
+ splat_vprint(file, name, "Unable to create '%s'\n", "kcp");
+ return -ENOMEM;
+ }
+
+ kcp->kcp_cache =
+ kmem_cache_create(SPLAT_KMEM_CACHE_NAME,
+ kcp->kcp_size, kcp->kcp_align,
+ splat_kmem_cache_test_constructor,
+ splat_kmem_cache_test_destructor,
+ NULL, kcp, NULL, flags);
+ if (!kcp->kcp_cache) {
splat_vprint(file, name,
- "Unable to create '%s'\n",
+ "Unable to create '%s'\n",
SPLAT_KMEM_CACHE_NAME);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out_free;
}
- kcd = kmem_cache_alloc(cache, KM_SLEEP);
+ kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
if (!kcd) {
splat_vprint(file, name,
- "Unable to allocate from '%s'\n",
- SPLAT_KMEM_CACHE_NAME);
+ "Unable to allocate from '%s'\n",
+ SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
+ spin_lock(&kcp->kcp_lock);
+ kcp->kcp_kcd[0] = kcd;
+ spin_unlock(&kcp->kcp_lock);
- if (!kcd->kcd_flag) {
+ if (!kcp->kcp_kcd[0]->kcd_flag) {
splat_vprint(file, name,
- "Failed to run contructor for '%s'\n",
- SPLAT_KMEM_CACHE_NAME);
+ "Failed to run contructor for '%s'\n",
+ SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
- if (kcd->kcd_magic != kcp.kcp_magic) {
+ if (kcp->kcp_kcd[0]->kcd_magic != kcp->kcp_magic) {
splat_vprint(file, name,
- "Failed to pass private data to constructor "
- "for '%s'\n", SPLAT_KMEM_CACHE_NAME);
+ "Failed to pass private data to constructor "
+ "for '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
goto out_free;
}
- max = kcp.kcp_count;
- kmem_cache_free(cache, kcd);
+ max = kcp->kcp_count;
+ spin_lock(&kcp->kcp_lock);
+ kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[0]);
+ kcp->kcp_kcd[0] = NULL;
+ spin_unlock(&kcp->kcp_lock);
/* Destroy the entire cache which will force destructors to
* run and we can verify one was called for every object */
- kmem_cache_destroy(cache);
- if (kcp.kcp_count) {
+ kmem_cache_destroy(kcp->kcp_cache);
+ if (kcp->kcp_count) {
splat_vprint(file, name,
- "Failed to run destructor on all slab objects "
- "for '%s'\n", SPLAT_KMEM_CACHE_NAME);
+ "Failed to run destructor on all slab objects "
+ "for '%s'\n", SPLAT_KMEM_CACHE_NAME);
rc = -EINVAL;
}
splat_vprint(file, name,
- "Successfully ran ctors/dtors for %d elements in '%s'\n",
- max, SPLAT_KMEM_CACHE_NAME);
+ "Successfully ran ctors/dtors for %d elements in '%s'\n",
+ max, SPLAT_KMEM_CACHE_NAME);
return rc;
out_free:
- if (kcd)
- kmem_cache_free(cache, kcd);
+ if (kcp->kcp_kcd[0]) {
+ spin_lock(&kcp->kcp_lock);
+ kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[0]);
+ kcp->kcp_kcd[0] = NULL;
+ spin_unlock(&kcp->kcp_lock);
+ }
+
+ if (kcp->kcp_cache)
+ kmem_cache_destroy(kcp->kcp_cache);
+
+ splat_kmem_cache_test_kcp_free(kcp);
+
+ return rc;
+}
+
+static int
+splat_kmem_cache_thread_test(struct file *file, void *arg, char *name,
+ int size, int alloc)
+{
+ kmem_cache_priv_t *kcp;
+ kthread_t *thr;
+ struct timespec start, stop, delta;
+ char cache_name[32];
+ int i, rc = 0;
+
+ kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, 0, alloc, 0);
+ if (!kcp) {
+ splat_vprint(file, name, "Unable to create '%s'\n", "kcp");
+ return -ENOMEM;
+ }
+
+ (void)snprintf(cache_name, 32, "%s-%d-%d",
+ SPLAT_KMEM_CACHE_NAME, size, alloc);
+ kcp->kcp_cache =
+ kmem_cache_create(cache_name, kcp->kcp_size, 0,
+ splat_kmem_cache_test_constructor,
+ splat_kmem_cache_test_destructor,
+ splat_kmem_cache_test_reclaim,
+ kcp, NULL, KMC_VMEM);
+ if (!kcp->kcp_cache) {
+ splat_vprint(file, name, "Unable to create '%s'\n", cache_name);
+ rc = -ENOMEM;
+ goto out_kcp;
+ }
+
+ start = current_kernel_time();
+
+ for (i = 0; i < SPLAT_KMEM_THREADS; i++) {
+ thr = thread_create(NULL, 0,
+ splat_kmem_cache_test_thread,
+ kcp, 0, &p0, TS_RUN, minclsyspri);
+ if (thr == NULL) {
+ rc = -ESRCH;
+ goto out_cache;
+ }
+ }
+
+ /* Sleep until all threads have started, then set the ready
+ * flag and wake them all up for maximum concurrency. */
+ wait_event(kcp->kcp_ctl_waitq,
+ splat_kmem_cache_test_threads(kcp, SPLAT_KMEM_THREADS));
+
+ spin_lock(&kcp->kcp_lock);
+ kcp->kcp_flags |= KCP_FLAG_READY;
+ spin_unlock(&kcp->kcp_lock);
+ wake_up_all(&kcp->kcp_thr_waitq);
+
+ /* Sleep until all thread have finished */
+ wait_event(kcp->kcp_ctl_waitq, splat_kmem_cache_test_threads(kcp, 0));
+
+ stop = current_kernel_time();
+ delta = timespec_sub(stop, start);
- kmem_cache_destroy(cache);
+ splat_vprint(file, name,
+ "%-22s %2ld.%09ld\t"
+ "%lu/%lu/%lu\t%lu/%lu/%lu\n",
+ kcp->kcp_cache->skc_name,
+ delta.tv_sec, delta.tv_nsec,
+ (unsigned long)kcp->kcp_cache->skc_slab_total,
+ (unsigned long)kcp->kcp_cache->skc_slab_max,
+ (unsigned long)(kcp->kcp_alloc *
+ SPLAT_KMEM_THREADS /
+ SPL_KMEM_CACHE_OBJ_PER_SLAB),
+ (unsigned long)kcp->kcp_cache->skc_obj_total,
+ (unsigned long)kcp->kcp_cache->skc_obj_max,
+ (unsigned long)(kcp->kcp_alloc *
+ SPLAT_KMEM_THREADS));
+
+ if (delta.tv_sec >= 5)
+ rc = -ETIME;
+
+ if (!rc && kcp->kcp_rc)
+ rc = kcp->kcp_rc;
+
+out_cache:
+ kmem_cache_destroy(kcp->kcp_cache);
+out_kcp:
+ splat_kmem_cache_test_kcp_free(kcp);
return rc;
}
@@ -409,291 +750,279 @@ splat_kmem_test6(struct file *file, void *arg)
return splat_kmem_cache_test(file, arg, name, 128*1028, 0, KMC_VMEM);
}
-static void
-splat_kmem_cache_test_reclaim(void *priv)
+/* Validate object alignment cache behavior for caches */
+static int
+splat_kmem_test7(struct file *file, void *arg)
{
- kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
- int i, count;
-
- count = min(SPLAT_KMEM_OBJ_RECLAIM, kcp->kcp_count);
- splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST7_NAME,
- "Reaping %d objects from '%s'\n", count,
- SPLAT_KMEM_CACHE_NAME);
-
- for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) {
- if (kcp->kcp_kcd[i]) {
- kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]);
- kcp->kcp_kcd[i] = NULL;
+ char *name = SPLAT_KMEM_TEST7_NAME;
+ int i, rc;
- if (--count == 0)
- break;
- }
+ for (i = 8; i <= PAGE_SIZE; i *= 2) {
+ rc = splat_kmem_cache_test(file, arg, name, 157, i, 0);
+ if (rc)
+ return rc;
}
- return;
+ return rc;
}
static int
-splat_kmem_test7(struct file *file, void *arg)
+splat_kmem_test8(struct file *file, void *arg)
{
- kmem_cache_t *cache;
- kmem_cache_priv_t kcp;
- int i, rc = 0;
-
- kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC;
- kcp.kcp_file = file;
- kcp.kcp_size = 256;
- kcp.kcp_count = 0;
- kcp.kcp_rc = 0;
-
- cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp.kcp_size, 0,
- splat_kmem_cache_test_constructor,
- splat_kmem_cache_test_destructor,
- splat_kmem_cache_test_reclaim,
- &kcp, NULL, 0);
- if (!cache) {
- splat_vprint(file, SPLAT_KMEM_TEST7_NAME,
- "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
+ kmem_cache_priv_t *kcp;
+ kmem_cache_data_t *kcd;
+ int i, j, rc = 0;
+
+ kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST8_NAME,
+ 256, 0, 0, SPLAT_KMEM_OBJ_COUNT);
+ if (!kcp) {
+ splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
+ "Unable to create '%s'\n", "kcp");
return -ENOMEM;
}
- kcp.kcp_cache = cache;
+ kcp->kcp_cache =
+ kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
+ splat_kmem_cache_test_constructor,
+ splat_kmem_cache_test_destructor,
+ splat_kmem_cache_test_reclaim,
+ kcp, NULL, 0);
+ if (!kcp->kcp_cache) {
+ splat_kmem_cache_test_kcp_free(kcp);
+ splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
+ "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
+ return -ENOMEM;
+ }
for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) {
- /* All allocations need not succeed */
- kcp.kcp_kcd[i] = kmem_cache_alloc(cache, KM_SLEEP);
- if (!kcp.kcp_kcd[i]) {
- splat_vprint(file, SPLAT_KMEM_TEST7_NAME,
- "Unable to allocate from '%s'\n",
- SPLAT_KMEM_CACHE_NAME);
+ kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
+ spin_lock(&kcp->kcp_lock);
+ kcp->kcp_kcd[i] = kcd;
+ spin_unlock(&kcp->kcp_lock);
+ if (!kcd) {
+ splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
+ "Unable to allocate from '%s'\n",
+ SPLAT_KMEM_CACHE_NAME);
}
}
- ASSERT(kcp.kcp_count > 0);
-
/* Request the slab cache free any objects it can. For a few reasons
* this may not immediately result in more free memory even if objects
* are freed. First off, due to fragmentation we may not be able to
* reclaim any slabs. Secondly, even if we do we fully clear some
* slabs we will not want to immedately reclaim all of them because
* we may contend with cache allocs and thrash. What we want to see
- * is slab size decrease more gradually as it becomes clear they
+ * is the slab size decrease more gradually as it becomes clear they
* will not be needed. This should be acheivable in less than minute
* if it takes longer than this something has gone wrong.
*/
for (i = 0; i < 60; i++) {
- kmem_cache_reap_now(cache);
- splat_vprint(file, SPLAT_KMEM_TEST7_NAME,
- "%s cache objects %d, slabs %u/%u objs %u/%u\n",
- SPLAT_KMEM_CACHE_NAME, kcp.kcp_count,
- (unsigned)cache->skc_slab_alloc,
- (unsigned)cache->skc_slab_total,
- (unsigned)cache->skc_obj_alloc,
- (unsigned)cache->skc_obj_total);
-
- if (cache->skc_obj_total == 0)
+ kmem_cache_reap_now(kcp->kcp_cache);
+ splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
+ "%s cache objects %d, slabs %u/%u objs %u/%u mags ",
+ SPLAT_KMEM_CACHE_NAME, kcp->kcp_count,
+ (unsigned)kcp->kcp_cache->skc_slab_alloc,
+ (unsigned)kcp->kcp_cache->skc_slab_total,
+ (unsigned)kcp->kcp_cache->skc_obj_alloc,
+ (unsigned)kcp->kcp_cache->skc_obj_total);
+
+ for_each_online_cpu(j)
+ splat_print(file, "%u/%u ",
+ kcp->kcp_cache->skc_mag[j]->skm_avail,
+ kcp->kcp_cache->skc_mag[j]->skm_size);
+
+ splat_print(file, "%s\n", "");
+
+ if (kcp->kcp_cache->skc_obj_total == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
}
- if (cache->skc_obj_total == 0) {
- splat_vprint(file, SPLAT_KMEM_TEST7_NAME,
+ if (kcp->kcp_cache->skc_obj_total == 0) {
+ splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
- SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
+ SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
} else {
- splat_vprint(file, SPLAT_KMEM_TEST7_NAME,
+ splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
- (unsigned)cache->skc_obj_total, SPLAT_KMEM_OBJ_COUNT,
- SPLAT_KMEM_CACHE_NAME);
+ (unsigned)kcp->kcp_cache->skc_obj_total,
+ SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}
/* Cleanup our mess (for failure case of time expiring) */
+ spin_lock(&kcp->kcp_lock);
for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++)
- if (kcp.kcp_kcd[i])
- kmem_cache_free(cache, kcp.kcp_kcd[i]);
+ if (kcp->kcp_kcd[i])
+ kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]);
+ spin_unlock(&kcp->kcp_lock);
- kmem_cache_destroy(cache);
+ kmem_cache_destroy(kcp->kcp_cache);
+ splat_kmem_cache_test_kcp_free(kcp);
return rc;
}
-static void
-splat_kmem_test8_thread(void *arg)
+static int
+splat_kmem_test9(struct file *file, void *arg)
{
- kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg;
- int count = kcp->kcp_alloc, rc = 0, i;
- void **objs;
-
- ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
+ kmem_cache_priv_t *kcp;
+ kmem_cache_data_t *kcd;
+ int i, j, rc = 0, count = SPLAT_KMEM_OBJ_COUNT * 128;
+
+ kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST9_NAME,
+ 256, 0, 0, count);
+ if (!kcp) {
+ splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
+ "Unable to create '%s'\n", "kcp");
+ return -ENOMEM;
+ }
- objs = vmem_zalloc(count * sizeof(void *), KM_SLEEP);
- if (!objs) {
- splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME,
- "Unable to alloc objp array for cache '%s'\n",
- kcp->kcp_cache->skc_name);
- rc = -ENOMEM;
- goto out;
+ kcp->kcp_cache =
+ kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
+ splat_kmem_cache_test_constructor,
+ splat_kmem_cache_test_destructor,
+ NULL, kcp, NULL, 0);
+ if (!kcp->kcp_cache) {
+ splat_kmem_cache_test_kcp_free(kcp);
+ splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
+ "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
+ return -ENOMEM;
}
for (i = 0; i < count; i++) {
- objs[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
- if (!objs[i]) {
- splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME,
- "Unable to allocate from cache '%s'\n",
- kcp->kcp_cache->skc_name);
- rc = -ENOMEM;
- break;
+ kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
+ spin_lock(&kcp->kcp_lock);
+ kcp->kcp_kcd[i] = kcd;
+ spin_unlock(&kcp->kcp_lock);
+ if (!kcd) {
+ splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
+ "Unable to allocate from '%s'\n",
+ SPLAT_KMEM_CACHE_NAME);
}
}
- for (i = 0; i < count; i++)
- if (objs[i])
- kmem_cache_free(kcp->kcp_cache, objs[i]);
-
- vmem_free(objs, count * sizeof(void *));
-out:
spin_lock(&kcp->kcp_lock);
- if (!kcp->kcp_rc)
- kcp->kcp_rc = rc;
-
- if (--kcp->kcp_threads == 0)
- wake_up(&kcp->kcp_waitq);
-
+ for (i = 0; i < count; i++)
+ if (kcp->kcp_kcd[i])
+ kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]);
spin_unlock(&kcp->kcp_lock);
- thread_exit();
-}
+ /* We have allocated a large number of objects thus creating a
+ * large number of slabs and then free'd them all. However since
+ * there should be little memory pressure at the moment those
+ * slabs have not been freed. What we want to see is the slab
+ * size decrease gradually as it becomes clear they will not be
+ * be needed. This should be acheivable in less than minute
+ * if it takes longer than this something has gone wrong.
+ */
+ for (i = 0; i < 60; i++) {
+ splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
+ "%s cache objects %d, slabs %u/%u objs %u/%u mags ",
+ SPLAT_KMEM_CACHE_NAME, kcp->kcp_count,
+ (unsigned)kcp->kcp_cache->skc_slab_alloc,
+ (unsigned)kcp->kcp_cache->skc_slab_total,
+ (unsigned)kcp->kcp_cache->skc_obj_alloc,
+ (unsigned)kcp->kcp_cache->skc_obj_total);
+
+ for_each_online_cpu(j)
+ splat_print(file, "%u/%u ",
+ kcp->kcp_cache->skc_mag[j]->skm_avail,
+ kcp->kcp_cache->skc_mag[j]->skm_size);
+
+ splat_print(file, "%s\n", "");
+
+ if (kcp->kcp_cache->skc_obj_total == 0)
+ break;
-static int
-splat_kmem_test8_count(kmem_cache_priv_t *kcp, int threads)
-{
- int ret;
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+ }
- spin_lock(&kcp->kcp_lock);
- ret = (kcp->kcp_threads == threads);
- spin_unlock(&kcp->kcp_lock);
+ if (kcp->kcp_cache->skc_obj_total == 0) {
+ splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
+ "Successfully created %d objects "
+ "in cache %s and reclaimed them\n",
+ count, SPLAT_KMEM_CACHE_NAME);
+ } else {
+ splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
+ "Failed to reclaim %u/%d objects from cache %s\n",
+ (unsigned)kcp->kcp_cache->skc_obj_total, count,
+ SPLAT_KMEM_CACHE_NAME);
+ rc = -ENOMEM;
+ }
+
+ kmem_cache_destroy(kcp->kcp_cache);
+ splat_kmem_cache_test_kcp_free(kcp);
- return ret;
+ return rc;
}
-/* This test will always pass and is simply here so I can easily
- * eyeball the slab cache locking overhead to ensure it is reasonable.
+/*
+ * This test creates N threads with a shared kmem cache. They then all
+ * concurrently allocate and free from the cache to stress the locking and
+ * concurrent cache performance. If any one test takes longer than 5
+ * seconds to complete it is treated as a failure and may indicate a
+ * performance regression. On my test system no one test takes more
+ * than 1 second to complete so a 5x slowdown likely a problem.
*/
static int
-splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
+splat_kmem_test10(struct file *file, void *arg)
{
- kmem_cache_priv_t kcp;
- kthread_t *thr;
- struct timespec start, stop, delta;
- char cache_name[32];
- int i, j, rc = 0, threads = 32;
-
- kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC;
- kcp.kcp_file = file;
-
- splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "name",
- "time (sec)\tslabs \tobjs \thash\n");
- splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "",
- " \ttot/max/calc\ttot/max/calc\n");
-
- for (i = 1; i <= count; i *= 2) {
- kcp.kcp_size = size;
- kcp.kcp_count = 0;
- kcp.kcp_threads = 0;
- kcp.kcp_alloc = i;
- kcp.kcp_rc = 0;
- spin_lock_init(&kcp.kcp_lock);
- init_waitqueue_head(&kcp.kcp_waitq);
-
- (void)snprintf(cache_name, 32, "%s-%d-%d",
- SPLAT_KMEM_CACHE_NAME, size, i);
- kcp.kcp_cache = kmem_cache_create(cache_name, kcp.kcp_size, 0,
- splat_kmem_cache_test_constructor,
- splat_kmem_cache_test_destructor,
- NULL, &kcp, NULL, 0);
- if (!kcp.kcp_cache) {
- splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
- "Unable to create '%s' cache\n",
- SPLAT_KMEM_CACHE_NAME);
- rc = -ENOMEM;
- break;
- }
-
- start = current_kernel_time();
-
- for (j = 0; j < threads; j++) {
- thr = thread_create(NULL, 0, splat_kmem_test8_thread,
- &kcp, 0, &p0, TS_RUN, minclsyspri);
- if (thr == NULL) {
- rc = -ESRCH;
- break;
- }
- spin_lock(&kcp.kcp_lock);
- kcp.kcp_threads++;
- spin_unlock(&kcp.kcp_lock);
- }
+ uint64_t size, alloc, free_mem, rc = 0;
- /* Sleep until the thread sets kcp.kcp_threads == 0 */
- wait_event(kcp.kcp_waitq, splat_kmem_test8_count(&kcp, 0));
- stop = current_kernel_time();
- delta = timespec_sub(stop, start);
+ free_mem = nr_free_pages() * PAGE_SIZE;
+ for (size = 16; size <= 1024*1024; size *= 2) {
- splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t"
- "%lu/%lu/%lu\t%lu/%lu/%lu\n",
- kcp.kcp_cache->skc_name,
- delta.tv_sec, delta.tv_nsec,
- (unsigned long)kcp.kcp_cache->skc_slab_total,
- (unsigned long)kcp.kcp_cache->skc_slab_max,
- (unsigned long)(kcp.kcp_alloc * threads /
- SPL_KMEM_CACHE_OBJ_PER_SLAB),
- (unsigned long)kcp.kcp_cache->skc_obj_total,
- (unsigned long)kcp.kcp_cache->skc_obj_max,
- (unsigned long)(kcp.kcp_alloc * threads));
+ splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name",
+ "time (sec)\tslabs \tobjs \thash\n");
+ splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "",
+ " \ttot/max/calc\ttot/max/calc\n");
- kmem_cache_destroy(kcp.kcp_cache);
+ for (alloc = 1; alloc <= 1024; alloc *= 2) {
- if (!rc && kcp.kcp_rc)
- rc = kcp.kcp_rc;
+ /* Skip tests which exceed free memory */
+ if (size * alloc * SPLAT_KMEM_THREADS > free_mem / 2)
+ continue;
- if (rc)
- break;
+ rc = splat_kmem_cache_thread_test(file, arg,
+ SPLAT_KMEM_TEST10_NAME, size, alloc);
+ if (rc)
+ break;
+ }
}
return rc;
}
+/*
+ * This test creates N threads with a shared kmem cache which overcommits
+ * memory by 4x. This makes it impossible for the slab to satify the
+ * thread requirements without having its reclaim hook run which will
+ * free objects back for use. This behavior is triggered by the linum VM
+ * detecting a low memory condition on the node and invoking the shrinkers.
+ * This should allow all the threads to complete while avoiding deadlock
+ * and for the most part out of memory events. This is very tough on the
+ * system so it is possible the test app may get oom'ed.
+ */
static int
-splat_kmem_test8(struct file *file, void *arg)
+splat_kmem_test11(struct file *file, void *arg)
{
- int i, rc = 0;
+ uint64_t size, alloc, rc;
- /* Run through slab cache with objects size from
- * 16-1Mb in 4x multiples with 1024 objects each */
- for (i = 16; i <= 1024*1024; i *= 4) {
- rc = splat_kmem_test8_sc(file, arg, i, 256);
- if (rc)
- break;
- }
-
- return rc;
-}
+ size = 1024*1024;
+ alloc = ((4 * num_physpages * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS;
-/* Validate object alignment cache behavior for caches */
-static int
-splat_kmem_test9(struct file *file, void *arg)
-{
- char *name = SPLAT_KMEM_TEST9_NAME;
- int i, rc;
+ splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name",
+ "time (sec)\tslabs \tobjs \thash\n");
+ splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "",
+ " \ttot/max/calc\ttot/max/calc\n");
- for (i = 8; i <= PAGE_SIZE; i *= 2) {
- rc = splat_kmem_cache_test(file, arg, name, 157, i, 0);
- if (rc)
- return rc;
- }
+ rc = splat_kmem_cache_thread_test(file, arg,
+ SPLAT_KMEM_TEST11_NAME, size, alloc);
return rc;
}
@@ -701,60 +1030,66 @@ splat_kmem_test9(struct file *file, void *arg)
splat_subsystem_t *
splat_kmem_init(void)
{
- splat_subsystem_t *sub;
+ splat_subsystem_t *sub;
- sub = kmalloc(sizeof(*sub), GFP_KERNEL);
- if (sub == NULL)
- return NULL;
+ sub = kmalloc(sizeof(*sub), GFP_KERNEL);
+ if (sub == NULL)
+ return NULL;
- memset(sub, 0, sizeof(*sub));
- strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE);
+ memset(sub, 0, sizeof(*sub));
+ strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE);
strncpy(sub->desc.desc, SPLAT_KMEM_DESC, SPLAT_DESC_SIZE);
- INIT_LIST_HEAD(&sub->subsystem_list);
+ INIT_LIST_HEAD(&sub->subsystem_list);
INIT_LIST_HEAD(&sub->test_list);
- spin_lock_init(&sub->test_lock);
- sub->desc.id = SPLAT_SUBSYSTEM_KMEM;
-
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC,
- SPLAT_KMEM_TEST1_ID, splat_kmem_test1);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC,
- SPLAT_KMEM_TEST2_ID, splat_kmem_test2);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC,
- SPLAT_KMEM_TEST3_ID, splat_kmem_test3);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC,
- SPLAT_KMEM_TEST4_ID, splat_kmem_test4);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC,
- SPLAT_KMEM_TEST5_ID, splat_kmem_test5);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC,
- SPLAT_KMEM_TEST6_ID, splat_kmem_test6);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC,
- SPLAT_KMEM_TEST7_ID, splat_kmem_test7);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC,
- SPLAT_KMEM_TEST8_ID, splat_kmem_test8);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC,
- SPLAT_KMEM_TEST9_ID, splat_kmem_test9);
-
- return sub;
+ spin_lock_init(&sub->test_lock);
+ sub->desc.id = SPLAT_SUBSYSTEM_KMEM;
+
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC,
+ SPLAT_KMEM_TEST1_ID, splat_kmem_test1);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC,
+ SPLAT_KMEM_TEST2_ID, splat_kmem_test2);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC,
+ SPLAT_KMEM_TEST3_ID, splat_kmem_test3);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC,
+ SPLAT_KMEM_TEST4_ID, splat_kmem_test4);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC,
+ SPLAT_KMEM_TEST5_ID, splat_kmem_test5);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC,
+ SPLAT_KMEM_TEST6_ID, splat_kmem_test6);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC,
+ SPLAT_KMEM_TEST7_ID, splat_kmem_test7);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC,
+ SPLAT_KMEM_TEST8_ID, splat_kmem_test8);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC,
+ SPLAT_KMEM_TEST9_ID, splat_kmem_test9);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST10_NAME, SPLAT_KMEM_TEST10_DESC,
+ SPLAT_KMEM_TEST10_ID, splat_kmem_test10);
+ SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC,
+ SPLAT_KMEM_TEST11_ID, splat_kmem_test11);
+
+ return sub;
}
void
splat_kmem_fini(splat_subsystem_t *sub)
{
- ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID);
-
- kfree(sub);
+ ASSERT(sub);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST11_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST10_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID);
+ SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID);
+
+ kfree(sub);
}
int
splat_kmem_id(void) {
- return SPLAT_SUBSYSTEM_KMEM;
+ return SPLAT_SUBSYSTEM_KMEM;
}