aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2009-02-17 15:52:18 -0800
committerBrian Behlendorf <[email protected]>2009-02-17 15:52:18 -0800
commit9b1b8e4c2459a02fc230cdda65b13908f263fd36 (patch)
treea07c353f6d12800d37ad8da669e0975649cc8c5a
parent1a944a7d0b1d9e62c7ac34d9041300007a656a17 (diff)
kmem slab magazine ageing deadlock
- The previous magazine ageing sceme relied on the on_each_cpu() function to call spl_magazine_age() on each cpu. It turns out this could deadlock with do_flush_tlb_all() which also relies on the IPI based on_each_cpu(). To avoid this problem a per- magazine delayed work item is created and indepentantly scheduled to the correct cpu removing the need for on_each_cpu(). - Additionally two unused fields were removed from the type spl_kmem_cache_t, they were hold overs from previous cleanup. - struct work_struct work - struct timer_list timer
-rw-r--r--include/sys/kmem.h6
-rw-r--r--module/spl/spl-kmem.c28
2 files changed, 27 insertions, 7 deletions
diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index dc66a9153..cad652c91 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -250,10 +250,12 @@ typedef void (*spl_kmem_dtor_t)(void *, void *);
typedef void (*spl_kmem_reclaim_t)(void *);
typedef struct spl_kmem_magazine {
- uint32_t skm_magic; /* Sanity magic */
+ uint32_t skm_magic; /* Sanity magic */
uint32_t skm_avail; /* Available objects */
uint32_t skm_size; /* Magazine size */
uint32_t skm_refill; /* Batch refill size */
+ struct spl_kmem_cache *skm_cache; /* Owned by cache */
+ struct delayed_work skm_work; /* Magazine reclaim work */
unsigned long skm_age; /* Last cache access */
void *skm_objs[0]; /* Object pointers */
} spl_kmem_magazine_t;
@@ -296,8 +298,6 @@ typedef struct spl_kmem_cache {
uint32_t skc_reap; /* Slab reclaim count */
atomic_t skc_ref; /* Ref count callers */
struct delayed_work skc_work; /* Slab reclaim work */
- struct work_struct work;
- struct timer_list timer;
struct list_head skc_list; /* List of caches linkage */
struct list_head skc_complete_list;/* Completely alloc'ed */
struct list_head skc_partial_list; /* Partially alloc'ed */
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
index ba7e19b4e..18613e799 100644
--- a/module/spl/spl-kmem.c
+++ b/module/spl/spl-kmem.c
@@ -932,12 +932,22 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
static void
spl_magazine_age(void *data)
{
- spl_kmem_cache_t *skc = data;
- spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
+ spl_kmem_magazine_t *skm =
+ spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work);
+ spl_kmem_cache_t *skc = skm->skm_cache;
+ int i = smp_processor_id();
+
+ ASSERT(skm->skm_magic == SKM_MAGIC);
+ ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(skc->skc_mag[i] == skm);
if (skm->skm_avail > 0 &&
time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
(void)spl_cache_flush(skc, skm, skm->skm_refill);
+
+ if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
+ schedule_delayed_work_on(i, &skm->skm_work,
+ skc->skc_delay / 3 * HZ);
}
/*
@@ -949,12 +959,11 @@ spl_magazine_age(void *data)
static void
spl_cache_age(void *data)
{
- spl_kmem_cache_t *skc =
+ spl_kmem_cache_t *skc =
spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
ASSERT(skc->skc_magic == SKC_MAGIC);
spl_slab_reclaim(skc, skc->skc_reap, 0);
- spl_on_each_cpu(spl_magazine_age, skc, 0);
if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
@@ -1050,6 +1059,8 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
skm->skm_avail = 0;
skm->skm_size = skc->skc_mag_size;
skm->skm_refill = skc->skc_mag_refill;
+ skm->skm_cache = skc;
+ spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm);
skm->skm_age = jiffies;
}
@@ -1095,6 +1106,11 @@ spl_magazine_create(spl_kmem_cache_t *skc)
}
}
+ /* Only after everything is allocated schedule magazine work */
+ for_each_online_cpu(i)
+ schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work,
+ skc->skc_delay / 3 * HZ);
+
RETURN(0);
}
@@ -1245,6 +1261,7 @@ void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
{
DECLARE_WAIT_QUEUE_HEAD(wq);
+ int i;
ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
@@ -1256,6 +1273,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
/* Cancel any and wait for any pending delayed work */
ASSERT(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
cancel_delayed_work(&skc->skc_work);
+ for_each_online_cpu(i)
+ cancel_delayed_work(&skc->skc_mag[i]->skm_work);
+
flush_scheduled_work();
/* Wait until all current callers complete, this is mainly