From 11f552fa9074a94dac3463300e369013ec2f1544 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 27 Jul 2015 13:17:32 -0700 Subject: Update arc_available_memory() to check freemem While Linux doesn't provide detailed information about the state of the VM it does provide us total free pages. This information should be incorporated in to the arc_available_memory() calculation rather than solely relying on a signal from direct reclaim. Conceptually this brings arc_available_memory() back in sync with illumos. It is also desirable that the target amount of free memory be tunable on a system. While the default values are expected to work well for most workloads there may be cases where custom values are needed. The zfs_arc_sys_free module option was added for this purpose. zfs_arc_sys_free - The target number of bytes the ARC should leave as free memory on the system. This value can checked in /proc/spl/kstat/zfs/arcstats and setting this module option will override the default value. Signed-off-by: Brian Behlendorf Closes #3637 --- man/man5/zfs-module-parameters.5 | 13 ++++++++ module/zfs/arc.c | 64 +++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index ccf386135..c0365ea5c 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -566,6 +566,19 @@ log2(fraction of arc to reclaim) Default value: \fB5\fR. .RE +.sp +.ne 2 +.na +\fBzfs_arc_sys_free\fR (ulong) +.ad +.RS 12n +The target number of bytes the ARC should leave as free memory on the system. +Defaults to the larger of 1/64 of physical memory or 512K. Setting this +option to a non-zero value will override the default. +.sp +Default value: \fB0\fR. +.RE + .sp .ne 2 .na diff --git a/module/zfs/arc.c b/module/zfs/arc.c index c2fdf1630..97bfb2c25 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -240,6 +240,7 @@ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ /* * These tunables are Linux specific */ +unsigned long zfs_arc_sys_free = 0; int zfs_arc_memory_throttle_disable = 1; int zfs_arc_min_prefetch_lifespan = 0; int zfs_arc_p_aggressive_disable = 1; @@ -473,6 +474,8 @@ typedef struct arc_stats { kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_meta_max; kstat_named_t arcstat_meta_min; + kstat_named_t arcstat_need_free; + kstat_named_t arcstat_sys_free; } arc_stats_t; static arc_stats_t arc_stats = { @@ -564,7 +567,9 @@ static arc_stats_t arc_stats = { { "arc_meta_used", KSTAT_DATA_UINT64 }, { "arc_meta_limit", KSTAT_DATA_UINT64 }, { "arc_meta_max", KSTAT_DATA_UINT64 }, - { "arc_meta_min", KSTAT_DATA_UINT64 } + { "arc_meta_min", KSTAT_DATA_UINT64 }, + { "arc_need_free", KSTAT_DATA_UINT64 }, + { "arc_sys_free", KSTAT_DATA_UINT64 } }; #define ARCSTAT(stat) (arc_stats.stat.value.ui64) @@ -633,6 +638,8 @@ static arc_state_t *arc_l2c_only; #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ #define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */ #define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ +#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */ +#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */ #define L2ARC_IS_VALID_COMPRESS(_c_) \ ((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY) @@ -3222,12 +3229,6 @@ int64_t last_free_memory; free_memory_reason_t last_free_reason; #ifdef _KERNEL -#ifdef __linux__ -/* - * expiration time for arc_no_grow set by direct memory reclaim. - */ -static clock_t arc_grow_time = 0; -#else /* * Additional reserve of pages for pp_reserve. */ @@ -3237,7 +3238,6 @@ int64_t arc_pages_pp_reserve = 64; * Additional reserve of pages for swapfs. */ int64_t arc_swapfs_reserve = 64; -#endif #endif /* _KERNEL */ /* @@ -3250,26 +3250,14 @@ arc_available_memory(void) { int64_t lowest = INT64_MAX; free_memory_reason_t r = FMR_UNKNOWN; - #ifdef _KERNEL -#ifdef __linux__ - /* - * Under Linux we are not allowed to directly interrogate the global - * memory state. Instead rely on observing that direct reclaim has - * recently occurred therefore the system must be low on memory. The - * exact values returned are not critical but should be small. - */ - if (ddi_time_after_eq(ddi_get_lbolt(), arc_grow_time)) - lowest = PAGE_SIZE; - else - lowest = -PAGE_SIZE; -#else int64_t n; +#ifdef __linux__ + pgcnt_t needfree = btop(arc_need_free); + pgcnt_t lotsfree = btop(arc_sys_free); + pgcnt_t desfree = 0; +#endif - /* - * Platforms like illumos have greater visibility in to the memory - * subsystem and can return a more detailed analysis of memory. - */ if (needfree > 0) { n = PAGESIZE * (-needfree); if (n < lowest) { @@ -3291,6 +3279,7 @@ arc_available_memory(void) r = FMR_LOTSFREE; } +#ifndef __linux__ /* * check to make sure that swapfs has enough space so that anon * reservations can still succeed. anon_resvmem() checks that the @@ -3319,6 +3308,7 @@ arc_available_memory(void) lowest = n; r = FMR_PAGES_PP_MAXIMUM; } +#endif #if defined(__i386) /* @@ -3357,12 +3347,11 @@ arc_available_memory(void) r = FMR_ZIO_ARENA; } } -#endif /* __linux__ */ -#else +#else /* _KERNEL */ /* Every 100 calls, free a small amount */ if (spa_get_random(100) == 0) lowest = -1024; -#endif +#endif /* _KERNEL */ last_free_memory = lowest; last_free_reason = r; @@ -3480,7 +3469,7 @@ arc_reclaim_thread(void) to_free = (arc_c >> arc_shrink_shift) - free_memory; if (to_free > 0) { #ifdef _KERNEL - to_free = MAX(to_free, ptob(needfree)); + to_free = MAX(to_free, arc_need_free); #endif arc_shrink(to_free); } @@ -3507,9 +3496,11 @@ arc_reclaim_thread(void) /* * We're either no longer overflowing, or we * can't evict anything more, so we should wake - * up any threads before we go to sleep. + * up any threads before we go to sleep and clear + * arc_need_free since nothing more can be done. */ cv_broadcast(&arc_reclaim_waiters_cv); + arc_need_free = 0; /* * Block until signaled, or after one second (we @@ -3713,7 +3704,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc) ARCSTAT_BUMP(arcstat_memory_indirect_count); } else { arc_no_grow = B_TRUE; - arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz); + arc_need_free = ptob(sc->nr_to_scan); ARCSTAT_BUMP(arcstat_memory_direct_count); } @@ -5288,6 +5279,10 @@ arc_tuning_update(void) /* Valid range: 1 - N ticks */ if (zfs_arc_min_prefetch_lifespan) arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan; + + /* Valid range: 0 - */ + if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free)) + arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem)); } void @@ -5329,6 +5324,10 @@ arc_init(void) * swapping out pages when it is preferable to shrink the arc. */ spl_register_shrinker(&arc_shrinker); + + /* Set to 1/64 of all memory or a minimum of 512K */ + arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024)); + arc_need_free = 0; #endif /* Set min cache to allow safe operation of arc_adapt() */ @@ -7064,4 +7063,7 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup"); module_param(l2arc_norw, int, 0644); MODULE_PARM_DESC(l2arc_norw, "No reads during writes"); +module_param(zfs_arc_sys_free, ulong, 0644); +MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes"); + #endif -- cgit v1.2.3