summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/os/linux/spl/sys/kmem_cache.h4
-rw-r--r--module/os/linux/spl/spl-kmem-cache.c140
-rw-r--r--module/zfs/arc.c42
3 files changed, 20 insertions, 166 deletions
diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h
index ce18eb474..30d2dd5ed 100644
--- a/include/os/linux/spl/sys/kmem_cache.h
+++ b/include/os/linux/spl/sys/kmem_cache.h
@@ -124,7 +124,6 @@ extern struct rw_semaphore spl_kmem_cache_sem;
typedef int (*spl_kmem_ctor_t)(void *, void *, int);
typedef void (*spl_kmem_dtor_t)(void *, void *);
-typedef void (*spl_kmem_reclaim_t)(void *);
typedef struct spl_kmem_magazine {
uint32_t skm_magic; /* Sanity magic */
@@ -174,7 +173,6 @@ typedef struct spl_kmem_cache {
uint32_t skc_mag_refill; /* Magazine refill count */
spl_kmem_ctor_t skc_ctor; /* Constructor */
spl_kmem_dtor_t skc_dtor; /* Destructor */
- spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
void *skc_private; /* Private data */
void *skc_vmp; /* Unused */
struct kmem_cache *skc_linux_cache; /* Linux slab cache if used */
@@ -210,7 +208,7 @@ typedef struct spl_kmem_cache {
extern spl_kmem_cache_t *spl_kmem_cache_create(char *name, size_t size,
size_t align, spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor,
- spl_kmem_reclaim_t reclaim, void *priv, void *vmp, int flags);
+ void *reclaim, void *priv, void *vmp, int flags);
extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
kmem_cbrc_t (*)(void *, void *, size_t, void *));
extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
index 034727410..76b89b254 100644
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -25,7 +25,6 @@
#include <linux/percpu_compat.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
-#include <sys/shrinker.h>
#include <sys/taskq.h>
#include <sys/timer.h>
#include <sys/vmem.h>
@@ -883,7 +882,7 @@ spl_magazine_destroy(spl_kmem_cache_t *skc)
*/
spl_kmem_cache_t *
spl_kmem_cache_create(char *name, size_t size, size_t align,
- spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim,
+ spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, void *reclaim,
void *priv, void *vmp, int flags)
{
gfp_t lflags = kmem_flags_convert(KM_SLEEP);
@@ -897,6 +896,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
ASSERT0(flags & KMC_NOHASH);
ASSERT0(flags & KMC_QCACHE);
ASSERT(vmp == NULL);
+ ASSERT(reclaim == NULL);
might_sleep();
@@ -915,7 +915,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_ctor = ctor;
skc->skc_dtor = dtor;
- skc->skc_reclaim = reclaim;
skc->skc_private = priv;
skc->skc_vmp = vmp;
skc->skc_linux_cache = NULL;
@@ -1606,78 +1605,11 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
EXPORT_SYMBOL(spl_kmem_cache_free);
/*
- * The generic shrinker function for all caches. Under Linux a shrinker
- * may not be tightly coupled with a slab cache. In fact Linux always
- * systematically tries calling all registered shrinker callbacks which
- * report that they contain unused objects. Because of this we only
- * register one shrinker function in the shim layer for all slab caches.
- * We always attempt to shrink all caches when this generic shrinker
- * is called.
- *
- * The _count() function returns the number of free-able objects.
- * The _scan() function returns the number of objects that were freed.
- */
-static unsigned long
-spl_kmem_cache_shrinker_count(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- spl_kmem_cache_t *skc = NULL;
- int alloc = 0;
-
- down_read(&spl_kmem_cache_sem);
- list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
- alloc += skc->skc_obj_alloc;
- }
- up_read(&spl_kmem_cache_sem);
-
- return (MAX(alloc, 0));
-}
-
-static unsigned long
-spl_kmem_cache_shrinker_scan(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- spl_kmem_cache_t *skc = NULL;
- int alloc = 0;
-
- /*
- * No shrinking in a transaction context. Can cause deadlocks.
- */
- if (spl_fstrans_check())
- return (SHRINK_STOP);
-
- down_read(&spl_kmem_cache_sem);
- list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
- uint64_t oldalloc = skc->skc_obj_alloc;
- spl_kmem_cache_reap_now(skc);
- if (oldalloc > skc->skc_obj_alloc)
- alloc += oldalloc - skc->skc_obj_alloc;
- }
- up_read(&spl_kmem_cache_sem);
-
- /*
- * When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.
- * This functionality only exists to work around a rare issue where
- * shrink_slabs() is repeatedly invoked by many cores causing the
- * system to thrash.
- */
- if (spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE)
- return (SHRINK_STOP);
-
- return (MAX(alloc, 0));
-}
-
-SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker,
- spl_kmem_cache_shrinker_count, spl_kmem_cache_shrinker_scan,
- KMC_DEFAULT_SEEKS);
-
-/*
- * Call the registered reclaim function for a cache. Depending on how
- * many and which objects are released it may simply repopulate the
- * local magazine which will then need to age-out. Objects which cannot
- * fit in the magazine we will be released back to their slabs which will
- * also need to age out before being release. This is all just best
- * effort and we do not want to thrash creating and destroying slabs.
+ * Depending on how many and which objects are released it may simply
+ * repopulate the local magazine which will then need to age-out. Objects
+ * which cannot fit in the magazine will be released back to their slabs
+ * which will also need to age out before being released. This is all just
+ * best effort and we do not want to thrash creating and destroying slabs.
*/
void
spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
@@ -1685,16 +1617,10 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- atomic_inc(&skc->skc_ref);
+ if (skc->skc_flags & KMC_SLAB)
+ return;
- /*
- * Execute the registered reclaim callback if it exists.
- */
- if (skc->skc_flags & KMC_SLAB) {
- if (skc->skc_reclaim)
- skc->skc_reclaim(skc->skc_private);
- goto out;
- }
+ atomic_inc(&skc->skc_ref);
/*
* Prevent concurrent cache reaping when contended.
@@ -1702,39 +1628,6 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
goto out;
- /*
- * When a reclaim function is available it may be invoked repeatedly
- * until at least a single slab can be freed. This ensures that we
- * do free memory back to the system. This helps minimize the chance
- * of an OOM event when the bulk of memory is used by the slab.
- *
- * When free slabs are already available the reclaim callback will be
- * skipped. Additionally, if no forward progress is detected despite
- * a reclaim function the cache will be skipped to avoid deadlock.
- *
- * Longer term this would be the correct place to add the code which
- * repacks the slabs in order minimize fragmentation.
- */
- if (skc->skc_reclaim) {
- uint64_t objects = UINT64_MAX;
- int do_reclaim;
-
- do {
- spin_lock(&skc->skc_lock);
- do_reclaim =
- (skc->skc_slab_total > 0) &&
- ((skc->skc_slab_total-skc->skc_slab_alloc) == 0) &&
- (skc->skc_obj_alloc < objects);
-
- objects = skc->skc_obj_alloc;
- spin_unlock(&skc->skc_lock);
-
- if (do_reclaim)
- skc->skc_reclaim(skc->skc_private);
-
- } while (do_reclaim);
- }
-
/* Reclaim from the magazine and free all now empty slabs. */
if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) {
spl_kmem_magazine_t *skm;
@@ -1773,12 +1666,13 @@ EXPORT_SYMBOL(spl_kmem_cache_reap_active);
void
spl_kmem_reap(void)
{
- struct shrink_control sc;
-
- sc.nr_to_scan = KMC_REAP_CHUNK;
- sc.gfp_mask = GFP_KERNEL;
+ spl_kmem_cache_t *skc = NULL;
- (void) spl_kmem_cache_shrinker_scan(NULL, &sc);
+ down_read(&spl_kmem_cache_sem);
+ list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
+ spl_kmem_cache_reap_now(skc);
+ }
+ up_read(&spl_kmem_cache_sem);
}
EXPORT_SYMBOL(spl_kmem_reap);
@@ -1791,7 +1685,6 @@ spl_kmem_cache_init(void)
spl_kmem_cache_kmem_threads, maxclsyspri,
spl_kmem_cache_kmem_threads * 8, INT_MAX,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
- spl_register_shrinker(&spl_kmem_cache_shrinker);
return (0);
}
@@ -1799,6 +1692,5 @@ spl_kmem_cache_init(void)
void
spl_kmem_cache_fini(void)
{
- spl_unregister_shrinker(&spl_kmem_cache_shrinker);
taskq_destroy(spl_kmem_cache_taskq);
}
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index ea22686cc..5b9df43d2 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -381,11 +381,6 @@ static int arc_min_prescient_prefetch_ms;
int arc_lotsfree_percent = 10;
/*
- * hdr_recl() uses this to determine if the arc is up and running.
- */
-static boolean_t arc_initialized;
-
-/*
* The arc has filled available memory and has now warmed up.
*/
boolean_t arc_warm;
@@ -1198,22 +1193,6 @@ buf_dest(void *vbuf, void *unused)
arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
}
-/*
- * Reclaim callback -- invoked when memory is low.
- */
-/* ARGSUSED */
-static void
-hdr_recl(void *unused)
-{
- dprintf("hdr_recl called\n");
- /*
- * umem calls the reclaim func when we destroy the buf cache,
- * which is after we do arc_fini().
- */
- if (arc_initialized)
- zthr_wakeup(arc_reap_zthr);
-}
-
static void
buf_init(void)
{
@@ -1249,12 +1228,12 @@ retry:
}
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
- 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0);
+ 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
- hdr_recl, NULL, NULL, 0);
+ NULL, NULL, NULL, 0);
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
- HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl,
+ HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
NULL, NULL, 0);
buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
@@ -4688,9 +4667,6 @@ arc_kmem_reap_soon(void)
static boolean_t
arc_adjust_cb_check(void *arg, zthr_t *zthr)
{
- if (!arc_initialized)
- return (B_FALSE);
-
/*
* This is necessary so that any changes which may have been made to
* many of the zfs_arc_* module parameters will be propagated to
@@ -4778,9 +4754,6 @@ arc_adjust_cb(void *arg, zthr_t *zthr)
static boolean_t
arc_reap_cb_check(void *arg, zthr_t *zthr)
{
- if (!arc_initialized)
- return (B_FALSE);
-
int64_t free_memory = arc_available_memory();
/*
@@ -7348,12 +7321,6 @@ arc_init(void)
arc_state_init();
- /*
- * The arc must be "uninitialized", so that hdr_recl() (which is
- * registered by buf_init()) will not access arc_reap_zthr before
- * it is created.
- */
- ASSERT(!arc_initialized);
buf_init();
list_create(&arc_prune_list, sizeof (arc_prune_t),
@@ -7377,7 +7344,6 @@ arc_init(void)
arc_reap_zthr = zthr_create_timer(arc_reap_cb_check,
arc_reap_cb, NULL, SEC2NSEC(1));
- arc_initialized = B_TRUE;
arc_warm = B_FALSE;
/*
@@ -7412,8 +7378,6 @@ arc_fini(void)
/* Use B_TRUE to ensure *all* buffers are evicted */
arc_flush(NULL, B_TRUE);
- arc_initialized = B_FALSE;
-
if (arc_ksp != NULL) {
kstat_delete(arc_ksp);
arc_ksp = NULL;