summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/arc.h4
-rw-r--r--man/man5/zfs-module-parameters.531
-rw-r--r--module/zfs/arc.c81
-rw-r--r--module/zfs/dbuf.c14
-rw-r--r--module/zfs/dnode.c4
5 files changed, 110 insertions, 24 deletions
diff --git a/include/sys/arc.h b/include/sys/arc.h
index 8f0f6cb55..d8a85e830 100644
--- a/include/sys/arc.h
+++ b/include/sys/arc.h
@@ -134,7 +134,9 @@ typedef enum arc_space_type {
ARC_SPACE_META,
ARC_SPACE_HDRS,
ARC_SPACE_L2HDRS,
- ARC_SPACE_OTHER,
+ ARC_SPACE_DBUF,
+ ARC_SPACE_DNODE,
+ ARC_SPACE_BONUS,
ARC_SPACE_NUMTYPES
} arc_space_type_t;
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index cd92851de..41fc20deb 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -384,6 +384,37 @@ Default value: \fB2\fR.
.sp
.ne 2
.na
+\fBzfs_arc_dnode_limit\fR (ulong)
+.ad
+.RS 12n
+When the number of bytes consumed by dnodes in the ARC exceeds this number of
+bytes, try to unpin some of it in response to demand for non-metadata. This
+value acts as a floor to the amount of dnode metadata.
+
+See also \fBzfs_arc_meta_prune\fR which serves a similar purpose but is used
+when the amount of metadata in the ARC exceeds \fBzfs_arc_meta_limit\fR rather
+than in response to overall demand for non-metadata.
+
+.sp
+Default value: \fB10% of zfs_arc_meta_limit\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBzfs_arc_dnode_reduce_percent\fR (ulong)
+.ad
+.RS 12n
+Percentage of ARC dnodes to try to scan in response to demand for non-metadata
+when the number of bytes consumed by dnodes exceeds \fBzfs_arc_dnode_limit\fB.
+
+.sp
+Default value: \fB10% of the number of dnodes in the ARC\fR.
+.RE
+
+.sp
+.ne 2
+.na
\fBzfs_arc_average_blocksize\fR (int)
.ad
.RS 12n
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 2dbca8da9..6d8bd48a3 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -231,6 +231,8 @@ unsigned long zfs_arc_max = 0;
unsigned long zfs_arc_min = 0;
unsigned long zfs_arc_meta_limit = 0;
unsigned long zfs_arc_meta_min = 0;
+unsigned long zfs_arc_dnode_limit = 0;
+unsigned long zfs_arc_dnode_reduce_percent = 10;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
@@ -328,13 +330,17 @@ typedef struct arc_stats {
*/
kstat_named_t arcstat_metadata_size;
/*
- * Number of bytes consumed by various buffers and structures
- * not actually backed with ARC buffers. This includes bonus
- * buffers (allocated directly via zio_buf_* functions),
- * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t
- * cache), and dnode_t structures (allocated via dnode_t cache).
+ * Number of bytes consumed by dmu_buf_impl_t objects.
*/
- kstat_named_t arcstat_other_size;
+ kstat_named_t arcstat_dbuf_size;
+ /*
+ * Number of bytes consumed by dnode_t objects.
+ */
+ kstat_named_t arcstat_dnode_size;
+ /*
+ * Number of bytes consumed by bonus buffers.
+ */
+ kstat_named_t arcstat_bonus_size;
/*
* Total number of bytes consumed by ARC buffers residing in the
* arc_anon state. This includes *all* buffers in the arc_anon
@@ -473,6 +479,7 @@ typedef struct arc_stats {
kstat_named_t arcstat_prune;
kstat_named_t arcstat_meta_used;
kstat_named_t arcstat_meta_limit;
+ kstat_named_t arcstat_dnode_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_sync_wait_for_async;
@@ -517,7 +524,9 @@ static arc_stats_t arc_stats = {
{ "hdr_size", KSTAT_DATA_UINT64 },
{ "data_size", KSTAT_DATA_UINT64 },
{ "metadata_size", KSTAT_DATA_UINT64 },
- { "other_size", KSTAT_DATA_UINT64 },
+ { "dbuf_size", KSTAT_DATA_UINT64 },
+ { "dnode_size", KSTAT_DATA_UINT64 },
+ { "bonus_size", KSTAT_DATA_UINT64 },
{ "anon_size", KSTAT_DATA_UINT64 },
{ "anon_evictable_data", KSTAT_DATA_UINT64 },
{ "anon_evictable_metadata", KSTAT_DATA_UINT64 },
@@ -570,6 +579,7 @@ static arc_stats_t arc_stats = {
{ "arc_prune", KSTAT_DATA_UINT64 },
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
+ { "arc_dnode_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
@@ -641,9 +651,13 @@ static arc_state_t *arc_l2c_only;
#define arc_tempreserve ARCSTAT(arcstat_tempreserve)
#define arc_loaned_bytes ARCSTAT(arcstat_loaned_bytes)
#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
+#define arc_dnode_limit ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
+#define arc_dbuf_size ARCSTAT(arcstat_dbuf_size) /* dbuf metadata */
+#define arc_dnode_size ARCSTAT(arcstat_dnode_size) /* dnode metadata */
+#define arc_bonus_size ARCSTAT(arcstat_bonus_size) /* bonus buffer metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
@@ -803,6 +817,7 @@ static void arc_access(arc_buf_hdr_t *, kmutex_t *);
static boolean_t arc_is_overflowing(void);
static void arc_buf_watch(arc_buf_t *);
static void arc_tuning_update(void);
+static void arc_prune_async(int64_t);
static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
@@ -1680,8 +1695,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
case ARC_SPACE_META:
ARCSTAT_INCR(arcstat_metadata_size, space);
break;
- case ARC_SPACE_OTHER:
- ARCSTAT_INCR(arcstat_other_size, space);
+ case ARC_SPACE_BONUS:
+ ARCSTAT_INCR(arcstat_bonus_size, space);
+ break;
+ case ARC_SPACE_DNODE:
+ ARCSTAT_INCR(arcstat_dnode_size, space);
+ break;
+ case ARC_SPACE_DBUF:
+ ARCSTAT_INCR(arcstat_dbuf_size, space);
break;
case ARC_SPACE_HDRS:
ARCSTAT_INCR(arcstat_hdr_size, space);
@@ -1711,8 +1732,14 @@ arc_space_return(uint64_t space, arc_space_type_t type)
case ARC_SPACE_META:
ARCSTAT_INCR(arcstat_metadata_size, -space);
break;
- case ARC_SPACE_OTHER:
- ARCSTAT_INCR(arcstat_other_size, -space);
+ case ARC_SPACE_BONUS:
+ ARCSTAT_INCR(arcstat_bonus_size, -space);
+ break;
+ case ARC_SPACE_DNODE:
+ ARCSTAT_INCR(arcstat_dnode_size, -space);
+ break;
+ case ARC_SPACE_DBUF:
+ ARCSTAT_INCR(arcstat_dbuf_size, -space);
break;
case ARC_SPACE_HDRS:
ARCSTAT_INCR(arcstat_hdr_size, -space);
@@ -2599,6 +2626,18 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* we're evicting all available buffers.
*/
while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+ int sublist_idx = multilist_get_random_index(ml);
+ uint64_t scan_evicted = 0;
+
+ /*
+ * Try to reduce pinned dnodes with a floor of arc_dnode_limit.
+ * Request that 10% of the LRUs be scanned by the superblock
+ * shrinker.
+ */
+ if (type == ARC_BUFC_DATA && arc_dnode_size > arc_dnode_limit)
+ arc_prune_async((arc_dnode_size - arc_dnode_limit) /
+ sizeof (dnode_t) / zfs_arc_dnode_reduce_percent);
+
/*
* Start eviction using a randomly selected sublist,
* this is to try and evenly balance eviction across all
@@ -2606,9 +2645,6 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* (e.g. index 0) would cause evictions to favor certain
* sublists over others.
*/
- int sublist_idx = multilist_get_random_index(ml);
- uint64_t scan_evicted = 0;
-
for (i = 0; i < num_sublists; i++) {
uint64_t bytes_remaining;
uint64_t bytes_evicted;
@@ -5329,6 +5365,7 @@ arc_tuning_update(void)
arc_c = arc_c_max;
arc_p = (arc_c >> 1);
arc_meta_limit = MIN(arc_meta_limit, (3 * arc_c_max) / 4);
+ arc_dnode_limit = arc_meta_limit / 10;
}
/* Valid range: 32M - <arc_c_max> */
@@ -5345,6 +5382,7 @@ arc_tuning_update(void)
(zfs_arc_meta_min <= arc_c_max)) {
arc_meta_min = zfs_arc_meta_min;
arc_meta_limit = MAX(arc_meta_limit, arc_meta_min);
+ arc_dnode_limit = arc_meta_limit / 10;
}
/* Valid range: <arc_meta_min> - <arc_c_max> */
@@ -5353,6 +5391,12 @@ arc_tuning_update(void)
(zfs_arc_meta_limit <= arc_c_max))
arc_meta_limit = zfs_arc_meta_limit;
+ /* Valid range: <arc_meta_min> - <arc_c_max> */
+ if ((zfs_arc_dnode_limit) && (zfs_arc_dnode_limit != arc_dnode_limit) &&
+ (zfs_arc_dnode_limit >= zfs_arc_meta_min) &&
+ (zfs_arc_dnode_limit <= arc_c_max))
+ arc_dnode_limit = zfs_arc_dnode_limit;
+
/* Valid range: 1 - N */
if (zfs_arc_grow_retry)
arc_grow_retry = zfs_arc_grow_retry;
@@ -5451,6 +5495,8 @@ arc_init(void)
arc_meta_max = 0;
/* Set limit to 3/4 of arc_c_max with a floor of arc_meta_min */
arc_meta_limit = MAX((3 * arc_c_max) / 4, arc_meta_min);
+ /* Default dnode limit is 10% of overall meta limit */
+ arc_dnode_limit = arc_meta_limit / 10;
/* Apply user specified tunings */
arc_tuning_update();
@@ -7204,4 +7250,11 @@ MODULE_PARM_DESC(zfs_arc_lotsfree_percent,
module_param(zfs_arc_sys_free, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
+module_param(zfs_arc_dnode_limit, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc");
+
+module_param(zfs_arc_dnode_reduce_percent, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent,
+ "Percentage of excess dnodes to try to unpin");
+
#endif
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 61cc83e41..af2f20d63 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -738,7 +738,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
ASSERT3U(bonuslen, <=, db->db.db_size);
db->db.db_data = zio_buf_alloc(max_bonuslen);
- arc_space_consume(max_bonuslen, ARC_SPACE_OTHER);
+ arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
if (bonuslen < max_bonuslen)
bzero(db->db.db_data, max_bonuslen);
if (bonuslen)
@@ -969,7 +969,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
dnode_t *dn = DB_DNODE(db);
int bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
dr->dt.dl.dr_data = zio_buf_alloc(bonuslen);
- arc_space_consume(bonuslen, ARC_SPACE_OTHER);
+ arc_space_consume(bonuslen, ARC_SPACE_BONUS);
bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = db->db.db_size;
@@ -1867,7 +1867,7 @@ dbuf_clear(dmu_buf_impl_t *db)
int slots = DB_DNODE(db)->dn_num_slots;
int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
zio_buf_free(db->db.db_data, bonuslen);
- arc_space_return(bonuslen, ARC_SPACE_OTHER);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
}
db->db.db_data = NULL;
db->db_state = DB_UNCACHED;
@@ -2032,7 +2032,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db.db_offset = DMU_BONUS_BLKID;
db->db_state = DB_UNCACHED;
/* the bonus dbuf is not placed in the hash table */
- arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
return (db);
} else if (blkid == DMU_SPILL_BLKID) {
db->db.db_size = (blkptr != NULL) ?
@@ -2066,7 +2066,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
db->db_state = DB_UNCACHED;
mutex_exit(&dn->dn_dbufs_mtx);
- arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
if (parent && parent != dn->dn_dbuf)
dbuf_add_ref(parent, db);
@@ -2143,7 +2143,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
ASSERT(db->db_data_pending == NULL);
kmem_cache_free(dbuf_cache, db);
- arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
}
typedef struct dbuf_prefetch_arg {
@@ -2983,7 +2983,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
int slots = DB_DNODE(db)->dn_num_slots;
int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
zio_buf_free(*datap, bonuslen);
- arc_space_return(bonuslen, ARC_SPACE_OTHER);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
}
db->db_data_pending = NULL;
drp = &db->db_last_dirty;
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 975bd5fb8..8015f54ed 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -475,7 +475,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dnh->dnh_dnode = dn;
mutex_exit(&os->os_lock);
- arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dnode_t), ARC_SPACE_DNODE);
return (dn);
}
@@ -531,7 +531,7 @@ dnode_destroy(dnode_t *dn)
dmu_zfetch_fini(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn);
- arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
+ arc_space_return(sizeof (dnode_t), ARC_SPACE_DNODE);
if (complete_os_eviction)
dmu_objset_evict_done(os);