diff options
author | Matthew Ahrens <[email protected]> | 2020-08-17 20:04:04 -0700 |
---|---|---|
committer | GitHub <[email protected]> | 2020-08-17 20:04:04 -0700 |
commit | 85ec5cbae228defb4332da4cf0ebb64d53aea157 (patch) | |
tree | f26f4f1e35fb32052c5294bdee661e629d3e1f48 /module/zfs/arc.c | |
parent | 994de7e4b748465f175b7cc48995b5c44adf2200 (diff) |
Include scatter_chunk_waste in arc_size
The ARC caches data in scatter ABD's, which are collections of pages,
which are typically 4K. Therefore, the space used to cache each block
is rounded up to a multiple of 4K. The ABD subsystem tracks this wasted
memory in the `scatter_chunk_waste` kstat. However, the ARC's `size` is
not aware of the memory used by this round-up, it only accounts for the
size that it requested from the ABD subsystem.
Therefore, the ARC is effectively using more memory than it is aware of,
due to the `scatter_chunk_waste`. This impacts observability, e.g.
`arcstat` will show that the ARC is using less memory than it
effectively is. It also impacts how the ARC responds to memory
pressure. As the amount of `scatter_chunk_waste` changes, it appears to
the ARC as memory pressure, so it needs to resize `arc_c`.
If the sector size (`1<<ashift`) is the same as the page size (or
larger), there won't be any waste. If the (compressed) block size is
relatively large compared to the page size, the amount of
`scatter_chunk_waste` will be small, so the problematic effects are
minimal.
However, if using 512B sectors (`ashift=9`), and the (compressed) block
size is small (e.g. `compression=on` with the default `volblocksize=8k`
or a decreased `recordsize`), the amount of `scatter_chunk_waste` can be
very large. On a production system, with `arc_size` at a constant 50%
of memory, `scatter_chunk_waste` has been been observed to be 10-30% of
memory.
This commit adds `scatter_chunk_waste` to `arc_size`, and adds a new
`waste` field to `arcstat`. As a result, the ARC's memory usage is more
observable, and `arc_c` does not need to be adjusted as frequently.
Reviewed-by: Pavel Zakharov <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: George Wilson <[email protected]>
Reviewed-by: Ryan Moeller <[email protected]>
Signed-off-by: Matthew Ahrens <[email protected]>
Closes #10701
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r-- | module/zfs/arc.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 06c2d5fac..f63f92b86 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -583,6 +583,7 @@ arc_stats_t arc_stats = { { "arc_sys_free", KSTAT_DATA_UINT64 }, { "arc_raw_size", KSTAT_DATA_UINT64 }, { "cached_only_in_progress", KSTAT_DATA_UINT64 }, + { "abd_chunk_waste_size", KSTAT_DATA_UINT64 }, }; #define ARCSTAT_MAX(stat, val) { \ @@ -685,6 +686,7 @@ aggsum_t astat_dnode_size; aggsum_t astat_bonus_size; aggsum_t astat_hdr_size; aggsum_t astat_l2_hdr_size; +aggsum_t astat_abd_chunk_waste_size; hrtime_t arc_growtime; list_t arc_prune_list; @@ -2611,9 +2613,18 @@ arc_space_consume(uint64_t space, arc_space_type_t type) case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, space); break; + case ARC_SPACE_ABD_CHUNK_WASTE: + /* + * Note: this includes space wasted by all scatter ABD's, not + * just those allocated by the ARC. But the vast majority of + * scatter ABD's come from the ARC, because other users are + * very short-lived. + */ + aggsum_add(&astat_abd_chunk_waste_size, space); + break; } - if (type != ARC_SPACE_DATA) + if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) aggsum_add(&arc_meta_used, space); aggsum_add(&arc_size, space); @@ -2648,9 +2659,12 @@ arc_space_return(uint64_t space, arc_space_type_t type) case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, -space); break; + case ARC_SPACE_ABD_CHUNK_WASTE: + aggsum_add(&astat_abd_chunk_waste_size, -space); + break; } - if (type != ARC_SPACE_DATA) { + if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) { ASSERT(aggsum_compare(&arc_meta_used, space) >= 0); /* * We use the upper bound here rather than the precise value @@ -7090,6 +7104,8 @@ arc_kstat_update(kstat_t *ksp, int rw) ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size); ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size); ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size); + ARCSTAT(arcstat_abd_chunk_waste_size) = + aggsum_value(&astat_abd_chunk_waste_size); as->arcstat_memory_all_bytes.value.ui64 = arc_all_memory(); @@ -7329,6 +7345,7 @@ arc_state_init(void) aggsum_init(&astat_bonus_size, 0); aggsum_init(&astat_dnode_size, 0); aggsum_init(&astat_dbuf_size, 0); + aggsum_init(&astat_abd_chunk_waste_size, 0); arc_anon->arcs_state = ARC_STATE_ANON; arc_mru->arcs_state = ARC_STATE_MRU; @@ -7381,6 +7398,7 @@ arc_state_fini(void) aggsum_fini(&astat_bonus_size); aggsum_fini(&astat_dnode_size); aggsum_fini(&astat_dbuf_size); + aggsum_fini(&astat_abd_chunk_waste_size); } uint64_t |