summaryrefslogtreecommitdiffstats
path: root/modules/spl/spl-kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'modules/spl/spl-kmem.c')
-rw-r--r--modules/spl/spl-kmem.c234
1 files changed, 164 insertions, 70 deletions
diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c
index 0ee04a287..be20c5b44 100644
--- a/modules/spl/spl-kmem.c
+++ b/modules/spl/spl-kmem.c
@@ -167,17 +167,9 @@ static struct shrinker spl_kmem_cache_shrinker = {
};
#endif
-static spl_kmem_slab_t *
-spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
- spl_kmem_slab_t *sks;
- spl_kmem_obj_t *sko, *n;
- int i;
- ENTRY;
-
- sks = kmem_cache_alloc(spl_slab_cache, flags);
- if (sks == NULL)
- RETURN(sks);
-
+static void
+spl_slab_init(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
+{
sks->sks_magic = SKS_MAGIC;
sks->sks_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
sks->sks_age = jiffies;
@@ -185,91 +177,201 @@ spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
INIT_LIST_HEAD(&sks->sks_list);
INIT_LIST_HEAD(&sks->sks_free_list);
sks->sks_ref = 0;
+}
+
+static int
+spl_slab_alloc_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks, int flags)
+{
+ spl_kmem_obj_t *sko, *n;
+ int i, rc = 0;
+ /* This is based on the linux slab cache for now simply because
+ * it means I get slab coloring, hardware cache alignment, etc
+ * for free. There's no reason we can't do this ourselves. And
+ * we probably should at in the future. For now I'll just
+ * leverage the existing linux slab here. */
for (i = 0; i < sks->sks_objs; i++) {
sko = kmem_cache_alloc(spl_obj_cache, flags);
if (sko == NULL) {
-out_alloc:
- /* Unable to fully construct slab, objects,
- * and object data buffers unwind everything.
- */
- list_for_each_entry_safe(sko, n, &sks->sks_free_list,
- sko_list) {
- ASSERT(sko->sko_magic == SKO_MAGIC);
- vmem_free(sko->sko_addr, skc->skc_obj_size);
- list_del(&sko->sko_list);
- kmem_cache_free(spl_obj_cache, sko);
- }
-
- kmem_cache_free(spl_slab_cache, sks);
- GOTO(out, sks = NULL);
+ rc = -ENOMEM;
+ break;
}
- /* Objects less than a page can use kmem_alloc() and avoid
- * the locking overhead in __get_vm_area_node() when locking
- * for a free address. For objects over a page we use
- * vmem_alloc() because it is usually worth paying this
- * overhead to avoid the need to find contigeous pages.
- * This should give us the best of both worlds. */
- if (skc->skc_obj_size <= PAGE_SIZE)
- sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags);
- else
- sko->sko_addr = vmem_alloc(skc->skc_obj_size, flags);
-
+ sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags);
if (sko->sko_addr == NULL) {
kmem_cache_free(spl_obj_cache, sko);
- GOTO(out_alloc, sks = NULL);
+ rc = -ENOMEM;
+ break;
}
sko->sko_magic = SKO_MAGIC;
- sko->sko_flags = 0;
sko->sko_slab = sks;
INIT_LIST_HEAD(&sko->sko_list);
INIT_HLIST_NODE(&sko->sko_hlist);
list_add(&sko->sko_list, &sks->sks_free_list);
}
+
+ /* Unable to fully construct slab, unwind everything */
+ if (rc) {
+ list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
+ ASSERT(sko->sko_magic == SKO_MAGIC);
+ kmem_free(sko->sko_addr, skc->skc_obj_size);
+ list_del(&sko->sko_list);
+ kmem_cache_free(spl_obj_cache, sko);
+ }
+ }
+
+ RETURN(rc);
+}
+
+static spl_kmem_slab_t *
+spl_slab_alloc_vmem(spl_kmem_cache_t *skc, int flags)
+{
+ spl_kmem_slab_t *sks;
+ spl_kmem_obj_t *sko, *sko_base;
+ void *slab, *obj, *obj_base;
+ int i, size;
+
+ /* For large vmem_alloc'ed buffers it's important that we pack the
+ * spl_kmem_obj_t structure and the actual objects in to one large
+ * virtual address zone to minimize the number of calls to
+ * vmalloc(). Mapping the virtual address in done under a single
+ * global lock which walks a list of all virtual zones. So doing
+ * lots of allocations simply results in lock contention and a
+ * longer list of mapped addresses. It is far better to do a
+ * few large allocations and then subdivide it ourselves. The
+ * large vmem_alloc'ed space is divied as follows:
+ *
+ * 1 slab struct: sizeof(spl_kmem_slab_t)
+ * N obj structs: sizeof(spl_kmem_obj_t) * skc->skc_objs
+ * N objects: skc->skc_obj_size * skc->skc_objs
+ *
+ * XXX: It would probably be a good idea to more carefully
+ * align the starts of these objects in memory.
+ */
+ size = sizeof(spl_kmem_slab_t) + SPL_KMEM_CACHE_OBJ_PER_SLAB *
+ (skc->skc_obj_size + sizeof(spl_kmem_obj_t));
+
+ slab = vmem_alloc(size, flags);
+ if (slab == NULL)
+ RETURN(NULL);
+
+ sks = (spl_kmem_slab_t *)slab;
+ spl_slab_init(skc, sks);
+
+ sko_base = (spl_kmem_obj_t *)(slab + sizeof(spl_kmem_slab_t));
+ obj_base = (void *)sko_base + sizeof(spl_kmem_obj_t) * sks->sks_objs;
+
+ for (i = 0; i < sks->sks_objs; i++) {
+ sko = &sko_base[i];
+ obj = obj_base + skc->skc_obj_size * i;
+ sko->sko_addr = obj;
+ sko->sko_magic = SKO_MAGIC;
+ sko->sko_slab = sks;
+ INIT_LIST_HEAD(&sko->sko_list);
+ INIT_HLIST_NODE(&sko->sko_hlist);
+ list_add_tail(&sko->sko_list, &sks->sks_free_list);
+ }
+
+ RETURN(sks);
+}
+
+static spl_kmem_slab_t *
+spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
+ spl_kmem_slab_t *sks;
+ spl_kmem_obj_t *sko;
+ int rc;
+ ENTRY;
+
+ /* Objects less than a page can use kmem_alloc() and avoid
+ * the locking overhead in __get_vm_area_node() when locking
+ * for a free address. For objects over a page we use
+ * vmem_alloc() because it is usually worth paying this
+ * overhead to avoid the need to find contigeous pages.
+ * This should give us the best of both worlds. */
+ if (skc->skc_obj_size <= PAGE_SIZE) {
+ sks = kmem_cache_alloc(spl_slab_cache, flags);
+ if (sks == NULL)
+ GOTO(out, sks = NULL);
+
+ spl_slab_init(skc, sks);
+
+ rc = spl_slab_alloc_kmem(skc, sks, flags);
+ if (rc) {
+ kmem_cache_free(spl_slab_cache, sks);
+ GOTO(out, sks = NULL);
+ }
+ } else {
+ sks = spl_slab_alloc_vmem(skc, flags);
+ if (sks == NULL)
+ GOTO(out, sks = NULL);
+ }
+
+ ASSERT(sks);
+ list_for_each_entry(sko, &sks->sks_free_list, sko_list)
+ if (skc->skc_ctor)
+ skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
out:
RETURN(sks);
}
+static void
+spl_slab_free_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
+{
+ spl_kmem_obj_t *sko, *n;
+
+ ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(sks->sks_magic == SKS_MAGIC);
+
+ list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
+ ASSERT(sko->sko_magic == SKO_MAGIC);
+ kmem_free(sko->sko_addr, skc->skc_obj_size);
+ list_del(&sko->sko_list);
+ kmem_cache_free(spl_obj_cache, sko);
+ }
+
+ kmem_cache_free(spl_slab_cache, sks);
+}
+
+static void
+spl_slab_free_vmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
+{
+ ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(sks->sks_magic == SKS_MAGIC);
+
+ vmem_free(sks, SPL_KMEM_CACHE_OBJ_PER_SLAB *
+ (skc->skc_obj_size + sizeof(spl_kmem_obj_t)));
+}
+
/* Removes slab from complete or partial list, so it must
* be called with the 'skc->skc_lock' held.
- * */
+ */
static void
spl_slab_free(spl_kmem_slab_t *sks) {
spl_kmem_cache_t *skc;
spl_kmem_obj_t *sko, *n;
- int i = 0;
ENTRY;
ASSERT(sks->sks_magic == SKS_MAGIC);
ASSERT(sks->sks_ref == 0);
- skc = sks->sks_cache;
- skc->skc_obj_total -= sks->sks_objs;
- skc->skc_slab_total--;
+ skc = sks->sks_cache;
+ ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(spin_is_locked(&skc->skc_lock));
- list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
- ASSERT(sko->sko_magic == SKO_MAGIC);
+ skc->skc_obj_total -= sks->sks_objs;
+ skc->skc_slab_total--;
+ list_del(&sks->sks_list);
- /* Run destructors for being freed */
+ /* Run destructors slab is being released */
+ list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list)
if (skc->skc_dtor)
skc->skc_dtor(sko->sko_addr, skc->skc_private);
- if (skc->skc_obj_size <= PAGE_SIZE)
- kmem_free(sko->sko_addr, skc->skc_obj_size);
- else
- vmem_free(sko->sko_addr, skc->skc_obj_size);
-
- list_del(&sko->sko_list);
- kmem_cache_free(spl_obj_cache, sko);
- i++;
- }
-
- ASSERT(sks->sks_objs == i);
- list_del(&sks->sks_list);
- kmem_cache_free(spl_slab_cache, sks);
+ if (skc->skc_obj_size <= PAGE_SIZE)
+ spl_slab_free_kmem(skc, sks);
+ else
+ spl_slab_free_vmem(skc, sks);
EXIT;
}
@@ -629,14 +731,13 @@ static spl_kmem_slab_t *
spl_cache_grow(spl_kmem_cache_t *skc, int flags)
{
spl_kmem_slab_t *sks;
- spl_kmem_obj_t *sko;
cycles_t start;
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
if (flags & __GFP_WAIT) {
-// flags |= __GFP_NOFAIL; /* XXX: Solaris assumes this */
+ flags |= __GFP_NOFAIL;
might_sleep();
local_irq_enable();
}
@@ -649,14 +750,6 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags)
RETURN(NULL);
}
- /* Run all the constructors now that the slab is fully allocated */
- list_for_each_entry(sko, &sks->sks_free_list, sko_list) {
- ASSERT(sko->sko_magic == SKO_MAGIC);
-
- if (skc->skc_ctor)
- skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
- }
-
if (flags & __GFP_WAIT)
local_irq_disable();
@@ -697,7 +790,7 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
if (list_empty(&skc->skc_partial_list)) {
spin_unlock(&skc->skc_lock);
- if (unlikely((get_cycles() - start) > skc->skc_lock_refill))
+ if (unlikely((get_cycles()-start)>skc->skc_lock_refill))
skc->skc_lock_refill = get_cycles() - start;
sks = spl_cache_grow(skc, flags);
@@ -861,6 +954,7 @@ restart:
}
local_irq_restore(irq_flags);
+ ASSERT(obj);
/* Pre-emptively migrate object to CPU L1 cache */
prefetchw(obj);