aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2011-03-29 18:08:59 -0700
committerBrian Behlendorf <[email protected]>2011-04-07 09:52:10 -0700
commit7cb67b45f33fd7a61af24c675c7347eb5264b38c (patch)
tree519875ed0fdffbd9946299ca5b3eda533cda0623
parent1834f2d8b715d25bafbb0e4a099994f45c3211ae (diff)
Add direct+indirect ARC reclaim
Under OpenSolaris all memory reclaim is done asyncronously. Under Linux memory reclaim is done asynchronously _and_ synchronously. When a process allocates memory with GFP_KERNEL it explicitly allows the kernel to do reclaim on its behalf to satify the allocation. If that GFP_KERNEL allocation fails the kernel may take more drastic measures to reclaim the memory such as killing user space processes. This was observed to happen with ZFS because the ARC could consume a large fraction of the system memory but no synchronous reclaim could be performed on it. The result was GFP_KERNEL allocations could fail resulting in OOM events, and only moments latter the arc_reclaim thread would free unused memory from the ARC. This change leaves the arc_thread in place to manage the fundamental ARC behavior. But it adds a synchronous (direct) reclaim path for the ARC which can be called when memory is badly needed. It also adds an asynchronous (indirect) reclaim path which is called much more frequently to prune the ARC slab caches.
-rw-r--r--module/zfs/arc.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 63b659b9a..9ca8ad17d 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -282,6 +282,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_size;
kstat_named_t arcstat_l2_hdr_size;
kstat_named_t arcstat_memory_throttle_count;
+ kstat_named_t arcstat_memory_direct_count;
+ kstat_named_t arcstat_memory_indirect_count;
kstat_named_t arcstat_no_grow;
kstat_named_t arcstat_tempreserve;
kstat_named_t arcstat_loaned_bytes;
@@ -344,6 +346,8 @@ static arc_stats_t arc_stats = {
{ "l2_size", KSTAT_DATA_UINT64 },
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
+ { "memory_direct_count", KSTAT_DATA_UINT64 },
+ { "memory_indirect_count", KSTAT_DATA_UINT64 },
{ "arc_no_grow", KSTAT_DATA_UINT64 },
{ "arc_tempreserve", KSTAT_DATA_UINT64 },
{ "arc_loaned_bytes", KSTAT_DATA_UINT64 },
@@ -2171,6 +2175,51 @@ arc_reclaim_thread(void)
thread_exit();
}
+#ifdef _KERNEL
+/*
+ * Under Linux the arc shrinker may be called for synchronous (direct)
+ * reclaim, or asynchronous (indirect) reclaim. When called by kswapd
+ * for indirect reclaim we take a conservative approach and just reap
+ * free slabs from the ARC caches. If this proves to be insufficient
+ * direct reclaim will be trigger. In direct reclaim a more aggressive
+ * strategy is used, data is evicted from the ARC and free slabs reaped.
+ */
+SPL_SHRINKER_CALLBACK_PROTO(arc_shrinker_func, cb, nr_to_scan, gfp_mask)
+{
+ arc_reclaim_strategy_t strategy;
+ int arc_reclaim;
+
+ /* Not allowed to perform filesystem reclaim */
+ if (!(gfp_mask & __GFP_FS))
+ return (-1);
+
+ /* Return number of reclaimable pages based on arc_shrink_shift */
+ arc_reclaim = btop((arc_size - arc_c_min)) >> arc_shrink_shift;
+ if (nr_to_scan == 0)
+ return (arc_reclaim);
+
+ /* Reclaim in progress */
+ if (mutex_tryenter(&arc_reclaim_thr_lock) == 0)
+ return (-1);
+
+ if (current_is_kswapd()) {
+ strategy = ARC_RECLAIM_CONS;
+ ARCSTAT_INCR(arcstat_memory_indirect_count, 1);
+ } else {
+ strategy = ARC_RECLAIM_AGGR;
+ ARCSTAT_INCR(arcstat_memory_direct_count, 1);
+ }
+
+ arc_kmem_reap_now(strategy);
+ arc_reclaim = btop((arc_size - arc_c_min)) >> arc_shrink_shift;
+ mutex_exit(&arc_reclaim_thr_lock);
+
+ return (arc_reclaim);
+}
+
+SPL_SHRINKER_DECLARE(arc_shrinker, arc_shrinker_func, DEFAULT_SEEKS);
+#endif /* _KERNEL */
+
/*
* Adapt arc info given the number of bytes we are trying to add and
* the state that we are comming from. This function is only called
@@ -3485,6 +3534,12 @@ arc_init(void)
* need to limit the cache to 1/8 of VM size.
*/
arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
+ /*
+ * Register a shrinker to support synchronous (direct) memory
+ * reclaim from the arc. This is done to prevent kswapd from
+ * swapping out pages when it is preferable to shrink the arc.
+ */
+ spl_register_shrinker(&arc_shrinker);
#endif
/* set min cache to 1/32 of all memory, or 64MB, whichever is more */
@@ -3602,6 +3657,10 @@ void
arc_fini(void)
{
mutex_enter(&arc_reclaim_thr_lock);
+#ifdef _KERNEL
+ spl_unregister_shrinker(&arc_shrinker);
+#endif /* _KERNEL */
+
arc_thread_exit = 1;
while (arc_thread_exit != 0)
cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock);