aboutsummaryrefslogtreecommitdiffstats
path: root/module/spl/spl-kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/spl/spl-kmem.c')
-rw-r--r--module/spl/spl-kmem.c91
1 files changed, 50 insertions, 41 deletions
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
index f78f820aa..3900c9cf0 100644
--- a/module/spl/spl-kmem.c
+++ b/module/spl/spl-kmem.c
@@ -825,6 +825,7 @@ EXPORT_SYMBOL(vmem_free_debug);
struct list_head spl_kmem_cache_list; /* List of caches */
struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
+taskq_t *spl_kmem_cache_taskq; /* Task queue for ageing / reclaim */
static int spl_cache_flush(spl_kmem_cache_t *skc,
spl_kmem_magazine_t *skm, int flush);
@@ -1243,50 +1244,59 @@ spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
SRETURN(0);
}
-/*
- * Called regularly on all caches to age objects out of the magazines
- * which have not been access in skc->skc_delay seconds. This prevents
- * idle magazines from holding memory which might be better used by
- * other caches or parts of the system. The delay is present to
- * prevent thrashing the magazine.
- */
static void
spl_magazine_age(void *data)
{
- spl_kmem_magazine_t *skm =
- spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work);
- spl_kmem_cache_t *skc = skm->skm_cache;
+ spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
+ spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
ASSERT(skm->skm_magic == SKM_MAGIC);
- ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(skc->skc_mag[skm->skm_cpu] == skm);
+ ASSERT(skm->skm_cpu == smp_processor_id());
- if (skm->skm_avail > 0 &&
- time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
- (void)spl_cache_flush(skc, skm, skm->skm_refill);
-
- if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
- schedule_delayed_work_on(skm->skm_cpu, &skm->skm_work,
- skc->skc_delay / 3 * HZ);
+ if (skm->skm_avail > 0)
+ if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
+ (void) spl_cache_flush(skc, skm, skm->skm_refill);
}
/*
- * Called regularly to keep a downward pressure on the size of idle
- * magazines and to release free slabs from the cache. This function
- * never calls the registered reclaim function, that only occurs
- * under memory pressure or with a direct call to spl_kmem_reap().
+ * Called regularly to keep a downward pressure on the cache.
+ *
+ * Objects older than skc->skc_delay seconds in the per-cpu magazines will
+ * be returned to the caches. This is done to prevent idle magazines from
+ * holding memory which could be better used elsewhere. The delay is
+ * present to prevent thrashing the magazine.
+ *
+ * The newly released objects may result in empty partial slabs. Those
+ * slabs should be released to the system. Otherwise moving the objects
+ * out of the magazines is just wasted work.
*/
static void
spl_cache_age(void *data)
{
- spl_kmem_cache_t *skc =
- spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
+ spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
+ taskqid_t id = 0;
ASSERT(skc->skc_magic == SKC_MAGIC);
+
+ atomic_inc(&skc->skc_ref);
+ spl_on_each_cpu(spl_magazine_age, skc, 1);
spl_slab_reclaim(skc, skc->skc_reap, 0);
- if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
- schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
+ while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
+ id = taskq_dispatch_delay(
+ spl_kmem_cache_taskq, spl_cache_age, skc, TQ_SLEEP,
+ ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
+
+ /* Destroy issued after dispatch immediately cancel it */
+ if (test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && id)
+ taskq_cancel_id(spl_kmem_cache_taskq, id);
+ }
+
+ spin_lock(&skc->skc_lock);
+ skc->skc_taskqid = id;
+ spin_unlock(&skc->skc_lock);
+
+ atomic_dec(&skc->skc_ref);
}
/*
@@ -1380,7 +1390,6 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
skm->skm_size = skc->skc_mag_size;
skm->skm_refill = skc->skc_mag_refill;
skm->skm_cache = skc;
- spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm);
skm->skm_age = jiffies;
skm->skm_cpu = cpu;
}
@@ -1427,11 +1436,6 @@ spl_magazine_create(spl_kmem_cache_t *skc)
}
}
- /* Only after everything is allocated schedule magazine work */
- for_each_online_cpu(i)
- schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work,
- skc->skc_delay / 3 * HZ);
-
SRETURN(0);
}
@@ -1566,8 +1570,9 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
if (rc)
SGOTO(out, rc);
- spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc);
- schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
+ skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
+ spl_cache_age, skc, TQ_SLEEP,
+ ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
down_write(&spl_kmem_cache_sem);
list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
@@ -1600,7 +1605,7 @@ void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
{
DECLARE_WAIT_QUEUE_HEAD(wq);
- int i;
+ taskqid_t id;
SENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
@@ -1609,13 +1614,14 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
list_del_init(&skc->skc_list);
up_write(&spl_kmem_cache_sem);
- /* Cancel any and wait for any pending delayed work */
+ /* Cancel any and wait for any pending delayed tasks */
VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- cancel_delayed_work_sync(&skc->skc_work);
- for_each_online_cpu(i)
- cancel_delayed_work_sync(&skc->skc_mag[i]->skm_work);
- flush_scheduled_work();
+ spin_lock(&skc->skc_lock);
+ id = skc->skc_taskqid;
+ spin_unlock(&skc->skc_lock);
+
+ taskq_cancel_id(spl_kmem_cache_taskq, id);
/* Wait until all current callers complete, this is mainly
* to catch the case where a low memory situation triggers a
@@ -2394,6 +2400,8 @@ spl_kmem_init(void)
init_rwsem(&spl_kmem_cache_sem);
INIT_LIST_HEAD(&spl_kmem_cache_list);
+ spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
+ 1, maxclsyspri, 1, 32, TASKQ_PREPOPULATE);
spl_register_shrinker(&spl_kmem_cache_shrinker);
@@ -2432,6 +2440,7 @@ spl_kmem_fini(void)
SENTRY;
spl_unregister_shrinker(&spl_kmem_cache_shrinker);
+ taskq_destroy(spl_kmem_cache_taskq);
SEXIT;
}