aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGeorge Amanakis <[email protected]>2021-11-11 21:52:16 +0100
committerGitHub <[email protected]>2021-11-11 12:52:16 -0800
commitc9d62d1356380669a8cd7ca1979d2d38e5e9777f (patch)
tree698ba85225bc160e2d6d31c8dd0c6ac1ee080d5c /module
parent420b44488ff91dc0f67c24faae5d580122b08cfb (diff)
Introduce a tunable to exclude special class buffers from L2ARC
Special allocation class or dedup vdevs may have roughly the same performance as L2ARC vdevs. Introduce a new tunable to exclude those buffers from being cacheable on L2ARC. Reviewed-by: Don Brady <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: George Amanakis <[email protected]> Closes #11761 Closes #12285
Diffstat (limited to 'module')
-rw-r--r--module/zfs/arc.c12
-rw-r--r--module/zfs/dbuf.c71
-rw-r--r--module/zfs/dmu.c2
-rw-r--r--module/zfs/dmu_objset.c34
4 files changed, 112 insertions, 7 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index f0330150f..79e2d4381 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -870,6 +870,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
/*
+ * l2arc_exclude_special : A zfs module parameter that controls whether buffers
+ * present on special vdevs are eligibile for caching in L2ARC. If
+ * set to 1, exclude dbufs on special vdevs from being cached to
+ * L2ARC.
+ */
+int l2arc_exclude_special = 0;
+
+/*
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
* metadata and data are cached from ARC into L2ARC.
*/
@@ -11097,6 +11105,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
"Cache only MFU data from ARC into L2ARC");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
+ "If set to 1 exclude dbufs on special vdevs from being cached to "
+ "L2ARC.");
+
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 289247c6e..fe54da425 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -53,6 +53,7 @@
#include <cityhash.h>
#include <sys/spa_impl.h>
#include <sys/wmsum.h>
+#include <sys/vdev_impl.h>
kstat_t *dbuf_ksp;
@@ -599,6 +600,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
}
}
+/*
+ * We want to exclude buffers that are on a special allocation class from
+ * L2ARC.
+ */
+boolean_t
+dbuf_is_l2cacheable(dmu_buf_impl_t *db)
+{
+ vdev_t *vd = NULL;
+ zfs_cache_type_t cache = db->db_objset->os_secondary_cache;
+ blkptr_t *bp = db->db_blkptr;
+
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+ vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;
+
+ if (vdev < rvd->vdev_children)
+ vd = rvd->vdev_child[vdev];
+
+ if (cache == ZFS_CACHE_ALL ||
+ (dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) {
+ if (vd == NULL)
+ return (B_TRUE);
+
+ if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+ vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+ l2arc_exclude_special == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+static inline boolean_t
+dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
+{
+ vdev_t *vd = NULL;
+ zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache;
+
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+ vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;
+
+ if (vdev < rvd->vdev_children)
+ vd = rvd->vdev_child[vdev];
+
+ if (cache == ZFS_CACHE_ALL || ((level > 0 ||
+ DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) &&
+ cache == ZFS_CACHE_METADATA)) {
+ if (vd == NULL)
+ return (B_TRUE);
+
+ if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+ vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+ l2arc_exclude_special == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
/*
* This function *must* return indices evenly distributed between all
@@ -1527,7 +1590,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
DTRACE_SET_STATE(db, "read issued");
mutex_exit(&db->db_mtx);
- if (DBUF_IS_L2CACHEABLE(db))
+ if (dbuf_is_l2cacheable(db))
aflags |= ARC_FLAG_L2CACHE;
dbuf_add_ref(db, NULL);
@@ -3370,7 +3433,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
dpa->dpa_arg = arg;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+ if (dnode_level_is_l2cacheable(&bp, dn, level))
dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
/*
@@ -3388,7 +3451,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
zbookmark_phys_t zb;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+ if (dnode_level_is_l2cacheable(&bp, dn, level))
iter_aflags |= ARC_FLAG_L2CACHE;
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
@@ -4986,7 +5049,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
children_ready_cb = dbuf_write_children_ready;
dr->dr_zio = arc_write(pio, os->os_spa, txg,
- &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
+ &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
&zp, dbuf_write_ready,
children_ready_cb, dbuf_write_physdone,
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index f12c5eda8..eee3e70bb 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -1839,7 +1839,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
- zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
+ zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index af107fb8a..b30a9d619 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -63,6 +63,8 @@
#include <sys/dmu_recv.h>
#include <sys/zfs_project.h>
#include "zfs_namecheck.h"
+#include <sys/vdev_impl.h>
+#include <sys/arc.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
multilist_get_num_sublists(ml));
}
+static inline boolean_t
+dmu_os_is_l2cacheable(objset_t *os)
+{
+ vdev_t *vd = NULL;
+ zfs_cache_type_t cache = os->os_secondary_cache;
+ blkptr_t *bp = os->os_rootbp;
+
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+ vdev_t *rvd = os->os_spa->spa_root_vdev;
+
+ if (vdev < rvd->vdev_children)
+ vd = rvd->vdev_child[vdev];
+
+ if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) {
+ if (vd == NULL)
+ return (B_TRUE);
+
+ if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+ vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+ l2arc_exclude_special == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
/*
* Instantiates the objset_t in-memory structure corresponding to the
* objset_phys_t that's pointed to by the specified blkptr_t.
@@ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
- if (DMU_OS_IS_L2CACHEABLE(os))
+ if (dmu_os_is_l2cacheable(os))
aflags |= ARC_FLAG_L2CACHE;
if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
@@ -1663,7 +1693,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
}
zio = arc_write(pio, os->os_spa, tx->tx_txg,
- blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
+ blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);