summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2015-03-17 15:08:22 -0700
committerBrian Behlendorf <[email protected]>2015-03-20 10:35:20 -0700
commitbc88866657979c5658441e201e19df365c67ddfe (patch)
tree2725fb8c50d648642b2d3a9f10b0114aeb8aff0b
parent2cbb06b561f500732de2214eb590149d0c4f3cf5 (diff)
Fix arc_adjust_meta() behavior
The goal of this function is to evict enough meta data buffers from the ARC in order to enforce the arc_meta_limit. Achieving this is slightly more complicated than it appears because it is common for data buffers to have holds on meta data buffers. In addition, dnode meta data buffers will be held by the dnodes in the block preventing them from being freed. This means we can't simply traverse the ARC and expect to always find enough unheld meta data buffer to release. Therefore, this function has been updated to make alternating passes over the ARC releasing data buffers and then newly unheld meta data buffers. This ensures forward progress is maintained and arc_meta_used will decrease. Normally this is sufficient, but if required the ARC will call the registered prune callbacks causing dentry and inodes to be dropped from the VFS cache. This will make dnode meta data buffers available for reclaim. The number of total restarts in limited by zfs_arc_meta_adjust_restarts to prevent spinning in the rare case where all meta data is pinned. Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Pavel Snajdr <[email protected]> Issue #3160
-rw-r--r--man/man5/zfs-module-parameters.514
-rw-r--r--module/zfs/arc.c89
2 files changed, 80 insertions, 23 deletions
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 4b3dc3666..fe31e292a 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -414,6 +414,20 @@ Default value: \fB10,000\fR.
.sp
.ne 2
.na
+\fBzfs_arc_meta_adjust_restarts\fR (ulong)
+.ad
+.RS 12n
+The number of restart passes to make while scanning the ARC attempting
+the free buffers in order to stay below the \fBzfs_arc_meta_limit\fR.
+This value should not need to be tuned but is available to facilitate
+performance analysis.
+.sp
+Default value: \fB4096\fR.
+.RE
+
+.sp
+.ne 2
+.na
\fBzfs_arc_min\fR (ulong)
.ad
.RS 12n
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index f9f0008c0..bdf116c35 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -221,6 +221,11 @@ unsigned long zfs_arc_max = 0;
unsigned long zfs_arc_min = 0;
unsigned long zfs_arc_meta_limit = 0;
+/*
+ * Limit the number of restarts in arc_adjust_meta()
+ */
+unsigned long zfs_arc_meta_adjust_restarts = 4096;
+
/* The 6 states: */
static arc_state_t ARC_anon;
static arc_state_t ARC_mru;
@@ -2195,15 +2200,30 @@ arc_do_user_evicts(void)
}
/*
- * Evict only meta data objects from the cache leaving the data objects.
- * This is only used to enforce the tunable arc_meta_limit, if we are
- * unable to evict enough buffers notify the user via the prune callback.
+ * The goal of this function is to evict enough meta data buffers from the
+ * ARC in order to enforce the arc_meta_limit. Achieving this is slightly
+ * more complicated than it appears because it is common for data buffers
+ * to have holds on meta data buffers. In addition, dnode meta data buffers
+ * will be held by the dnodes in the block preventing them from being freed.
+ * This means we can't simply traverse the ARC and expect to always find
+ * enough unheld meta data buffer to release.
+ *
+ * Therefore, this function has been updated to make alternating passes
+ * over the ARC releasing data buffers and then newly unheld meta data
+ * buffers. This ensures forward progress is maintained and arc_meta_used
+ * will decrease. Normally this is sufficient, but if required the ARC
+ * will call the registered prune callbacks causing dentry and inodes to
+ * be dropped from the VFS cache. This will make dnode meta data buffers
+ * available for reclaim.
*/
static void
arc_adjust_meta(void)
{
- int64_t adjustmnt, delta;
+ int64_t adjustmnt, delta, prune = 0;
+ arc_buf_contents_t type = ARC_BUFC_DATA;
+ unsigned long restarts = zfs_arc_meta_adjust_restarts;
+restart:
/*
* This slightly differs than the way we evict from the mru in
* arc_adjust because we don't have a "target" value (i.e. no
@@ -2214,9 +2234,9 @@ arc_adjust_meta(void)
*/
adjustmnt = arc_meta_used - arc_meta_limit;
- if (adjustmnt > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
- delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustmnt);
- arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA);
+ if (adjustmnt > 0 && arc_mru->arcs_lsize[type] > 0) {
+ delta = MIN(arc_mru->arcs_lsize[type], adjustmnt);
+ arc_evict(arc_mru, 0, delta, FALSE, type);
adjustmnt -= delta;
}
@@ -2230,31 +2250,50 @@ arc_adjust_meta(void)
* simply decrement the amount of data evicted from the MRU.
*/
- if (adjustmnt > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
- delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustmnt);
- arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA);
+ if (adjustmnt > 0 && arc_mfu->arcs_lsize[type] > 0) {
+ delta = MIN(arc_mfu->arcs_lsize[type], adjustmnt);
+ arc_evict(arc_mfu, 0, delta, FALSE, type);
}
- adjustmnt = arc_mru->arcs_lsize[ARC_BUFC_METADATA] +
- arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] - arc_meta_limit;
+ adjustmnt = arc_meta_used - arc_meta_limit;
- if (adjustmnt > 0 && arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] > 0) {
+ if (adjustmnt > 0 && arc_mru_ghost->arcs_lsize[type] > 0) {
delta = MIN(adjustmnt,
- arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA]);
- arc_evict_ghost(arc_mru_ghost, 0, delta, ARC_BUFC_METADATA);
+ arc_mru_ghost->arcs_lsize[type]);
+ arc_evict_ghost(arc_mru_ghost, 0, delta, type);
+ adjustmnt -= delta;
}
- adjustmnt = arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] +
- arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA] - arc_meta_limit;
-
- if (adjustmnt > 0 && arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA] > 0) {
+ if (adjustmnt > 0 && arc_mfu_ghost->arcs_lsize[type] > 0) {
delta = MIN(adjustmnt,
- arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA]);
- arc_evict_ghost(arc_mfu_ghost, 0, delta, ARC_BUFC_METADATA);
+ arc_mfu_ghost->arcs_lsize[type]);
+ arc_evict_ghost(arc_mfu_ghost, 0, delta, type);
}
- if (arc_meta_used > arc_meta_limit)
- arc_do_user_prune(zfs_arc_meta_prune);
+ /*
+ * If after attempting to make the requested adjustment to the ARC
+ * the meta limit is still being exceeded then request that the
+ * higher layers drop some cached objects which have holds on ARC
+ * meta buffers. Requests to the upper layers will be made with
+ * increasingly large scan sizes until the ARC is below the limit.
+ */
+ if (arc_meta_used > arc_meta_limit) {
+ if (type == ARC_BUFC_DATA) {
+ type = ARC_BUFC_METADATA;
+ } else {
+ type = ARC_BUFC_DATA;
+
+ if (zfs_arc_meta_prune) {
+ prune += zfs_arc_meta_prune;
+ arc_do_user_prune(prune);
+ }
+ }
+
+ if (restarts > 0) {
+ restarts--;
+ goto restart;
+ }
+ }
}
/*
@@ -5609,6 +5648,10 @@ MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
module_param(zfs_arc_meta_prune, int, 0644);
MODULE_PARM_DESC(zfs_arc_meta_prune, "Meta objects to scan for prune");
+module_param(zfs_arc_meta_adjust_restarts, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_meta_adjust_restarts,
+ "Limit number of restarts in arc_adjust_meta");
+
module_param(zfs_arc_grow_retry, int, 0644);
MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");