diff options
author | Brian Behlendorf <[email protected]> | 2012-12-10 10:53:46 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2012-12-12 09:56:54 -0800 |
commit | a10287e00d13c4c4dbbff14f42b00b03da363fcb (patch) | |
tree | 946bdaf4fc2f002953374abe7e19403463a9c9e6 /module | |
parent | 296a8e596dac344cf3af5e7f2dff5be12c979d80 (diff) |
kmem-cache: Use taskqs for ageing
Shift the cache and magazine ageing functionality over to the new
delayed taskq interfaces. This allows us to abandon the kernels
delayed work queue interface and all the compatibility code it
requires.
However, the delayed taskq interface does not allow us to schedule
a task for a specfic cpu so the ageing code was slightly reworked.
The magazine ageing delay has been directly linked to the cache
ageing function. The spl_cache_age() function invokes on_each_cpu()
in order to run spl_magazine_age() on each cpu. It then blocks
waiting for them to complete and promptly reclaims any free slabs.
When restructing the code wasn't the primary goal I think the
new code is far more understable and maintainable. It also should
help minimize magazine thrashing because free slabs are immediately
released after the magazine is aged.
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module')
-rw-r--r-- | module/spl/spl-kmem.c | 91 |
1 files changed, 50 insertions, 41 deletions
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index f78f820aa..3900c9cf0 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -825,6 +825,7 @@ EXPORT_SYMBOL(vmem_free_debug); struct list_head spl_kmem_cache_list; /* List of caches */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ +taskq_t *spl_kmem_cache_taskq; /* Task queue for ageing / reclaim */ static int spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush); @@ -1243,50 +1244,59 @@ spl_emergency_free(spl_kmem_cache_t *skc, void *obj) SRETURN(0); } -/* - * Called regularly on all caches to age objects out of the magazines - * which have not been access in skc->skc_delay seconds. This prevents - * idle magazines from holding memory which might be better used by - * other caches or parts of the system. The delay is present to - * prevent thrashing the magazine. - */ static void spl_magazine_age(void *data) { - spl_kmem_magazine_t *skm = - spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work); - spl_kmem_cache_t *skc = skm->skm_cache; + spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data; + spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()]; ASSERT(skm->skm_magic == SKM_MAGIC); - ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT(skc->skc_mag[skm->skm_cpu] == skm); + ASSERT(skm->skm_cpu == smp_processor_id()); - if (skm->skm_avail > 0 && - time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) - (void)spl_cache_flush(skc, skm, skm->skm_refill); - - if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) - schedule_delayed_work_on(skm->skm_cpu, &skm->skm_work, - skc->skc_delay / 3 * HZ); + if (skm->skm_avail > 0) + if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) + (void) spl_cache_flush(skc, skm, skm->skm_refill); } /* - * Called regularly to keep a downward pressure on the size of idle - * magazines and to release free slabs from the cache. This function - * never calls the registered reclaim function, that only occurs - * under memory pressure or with a direct call to spl_kmem_reap(). + * Called regularly to keep a downward pressure on the cache. + * + * Objects older than skc->skc_delay seconds in the per-cpu magazines will + * be returned to the caches. This is done to prevent idle magazines from + * holding memory which could be better used elsewhere. The delay is + * present to prevent thrashing the magazine. + * + * The newly released objects may result in empty partial slabs. Those + * slabs should be released to the system. Otherwise moving the objects + * out of the magazines is just wasted work. */ static void spl_cache_age(void *data) { - spl_kmem_cache_t *skc = - spl_get_work_data(data, spl_kmem_cache_t, skc_work.work); + spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data; + taskqid_t id = 0; ASSERT(skc->skc_magic == SKC_MAGIC); + + atomic_inc(&skc->skc_ref); + spl_on_each_cpu(spl_magazine_age, skc, 1); spl_slab_reclaim(skc, skc->skc_reap, 0); - if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) - schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); + while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) { + id = taskq_dispatch_delay( + spl_kmem_cache_taskq, spl_cache_age, skc, TQ_SLEEP, + ddi_get_lbolt() + skc->skc_delay / 3 * HZ); + + /* Destroy issued after dispatch immediately cancel it */ + if (test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && id) + taskq_cancel_id(spl_kmem_cache_taskq, id); + } + + spin_lock(&skc->skc_lock); + skc->skc_taskqid = id; + spin_unlock(&skc->skc_lock); + + atomic_dec(&skc->skc_ref); } /* @@ -1380,7 +1390,6 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu) skm->skm_size = skc->skc_mag_size; skm->skm_refill = skc->skc_mag_refill; skm->skm_cache = skc; - spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm); skm->skm_age = jiffies; skm->skm_cpu = cpu; } @@ -1427,11 +1436,6 @@ spl_magazine_create(spl_kmem_cache_t *skc) } } - /* Only after everything is allocated schedule magazine work */ - for_each_online_cpu(i) - schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work, - skc->skc_delay / 3 * HZ); - SRETURN(0); } @@ -1566,8 +1570,9 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, if (rc) SGOTO(out, rc); - spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc); - schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); + skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq, + spl_cache_age, skc, TQ_SLEEP, + ddi_get_lbolt() + skc->skc_delay / 3 * HZ); down_write(&spl_kmem_cache_sem); list_add_tail(&skc->skc_list, &spl_kmem_cache_list); @@ -1600,7 +1605,7 @@ void spl_kmem_cache_destroy(spl_kmem_cache_t *skc) { DECLARE_WAIT_QUEUE_HEAD(wq); - int i; + taskqid_t id; SENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -1609,13 +1614,14 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) list_del_init(&skc->skc_list); up_write(&spl_kmem_cache_sem); - /* Cancel any and wait for any pending delayed work */ + /* Cancel any and wait for any pending delayed tasks */ VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags)); - cancel_delayed_work_sync(&skc->skc_work); - for_each_online_cpu(i) - cancel_delayed_work_sync(&skc->skc_mag[i]->skm_work); - flush_scheduled_work(); + spin_lock(&skc->skc_lock); + id = skc->skc_taskqid; + spin_unlock(&skc->skc_lock); + + taskq_cancel_id(spl_kmem_cache_taskq, id); /* Wait until all current callers complete, this is mainly * to catch the case where a low memory situation triggers a @@ -2394,6 +2400,8 @@ spl_kmem_init(void) init_rwsem(&spl_kmem_cache_sem); INIT_LIST_HEAD(&spl_kmem_cache_list); + spl_kmem_cache_taskq = taskq_create("spl_kmem_cache", + 1, maxclsyspri, 1, 32, TASKQ_PREPOPULATE); spl_register_shrinker(&spl_kmem_cache_shrinker); @@ -2432,6 +2440,7 @@ spl_kmem_fini(void) SENTRY; spl_unregister_shrinker(&spl_kmem_cache_shrinker); + taskq_destroy(spl_kmem_cache_taskq); SEXIT; } |