aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/Makefile.in1
-rw-r--r--module/zfs/arc.c87
-rw-r--r--module/zfs/dbuf.c4
-rw-r--r--module/zfs/dbuf_stats.c230
-rw-r--r--module/zfs/dmu.c18
5 files changed, 332 insertions, 8 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index 6f0f6ef05..5552436ad 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -8,6 +8,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/arc.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bplist.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bpobj.o
$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf.o
+$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf_stats.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bptree.o
$(MODULE)-objs += @top_srcdir@/module/zfs/ddt.o
$(MODULE)-objs += @top_srcdir@/module/zfs/ddt_zap.o
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 366f5bf9b..9098988fd 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -232,6 +232,7 @@ typedef struct arc_state {
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
uint64_t arcs_size; /* total amount of data in this state */
kmutex_t arcs_mtx;
+ arc_state_type_t arcs_state;
} arc_state_t;
/* The 6 states: */
@@ -534,6 +535,11 @@ struct arc_buf_hdr {
/* updated atomically */
clock_t b_arc_access;
+ uint32_t b_mru_hits;
+ uint32_t b_mru_ghost_hits;
+ uint32_t b_mfu_hits;
+ uint32_t b_mfu_ghost_hits;
+ uint32_t b_l2_hits;
/* self protecting */
refcount_t b_refcnt;
@@ -709,7 +715,8 @@ struct l2arc_buf_hdr {
/* compression applied to buffer data */
enum zio_compress b_compress;
/* real alloc'd buffer size depending on b_compress applied */
- int b_asize;
+ uint32_t b_asize;
+ uint32_t b_hits;
/* temporary buffer holder for in-flight compressed data */
void *b_tmp_cdata;
};
@@ -1138,6 +1145,54 @@ remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
}
/*
+ * Returns detailed information about a specific arc buffer. When the
+ * state_index argument is set the function will calculate the arc header
+ * list position for its arc state. Since this requires a linear traversal
+ * callers are strongly encourage not to do this. However, it can be helpful
+ * for targeted analysis so the functionality is provided.
+ */
+void
+arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
+{
+ arc_buf_hdr_t *hdr = ab->b_hdr;
+ arc_state_t *state = hdr->b_state;
+
+ memset(abi, 0, sizeof(arc_buf_info_t));
+ abi->abi_flags = hdr->b_flags;
+ abi->abi_datacnt = hdr->b_datacnt;
+ abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON;
+ abi->abi_state_contents = hdr->b_type;
+ abi->abi_state_index = -1;
+ abi->abi_size = hdr->b_size;
+ abi->abi_access = hdr->b_arc_access;
+ abi->abi_mru_hits = hdr->b_mru_hits;
+ abi->abi_mru_ghost_hits = hdr->b_mru_ghost_hits;
+ abi->abi_mfu_hits = hdr->b_mfu_hits;
+ abi->abi_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
+ abi->abi_holds = refcount_count(&hdr->b_refcnt);
+
+ if (hdr->b_l2hdr) {
+ abi->abi_l2arc_dattr = hdr->b_l2hdr->b_daddr;
+ abi->abi_l2arc_asize = hdr->b_l2hdr->b_asize;
+ abi->abi_l2arc_compress = hdr->b_l2hdr->b_compress;
+ abi->abi_l2arc_hits = hdr->b_l2hdr->b_hits;
+ }
+
+ if (state && state_index && list_link_active(&hdr->b_arc_node)) {
+ list_t *list = &state->arcs_list[hdr->b_type];
+ arc_buf_hdr_t *h;
+
+ mutex_enter(&state->arcs_mtx);
+ for (h = list_head(list); h != NULL; h = list_next(list, h)) {
+ abi->abi_state_index++;
+ if (h == hdr)
+ break;
+ }
+ mutex_exit(&state->arcs_mtx);
+ }
+}
+
+/*
* Move the supplied buffer to the indicated state. The mutex
* for the buffer must be held by the caller.
*/
@@ -1298,6 +1353,11 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
hdr->b_spa = spa_load_guid(spa);
hdr->b_state = arc_anon;
hdr->b_arc_access = 0;
+ hdr->b_mru_hits = 0;
+ hdr->b_mru_ghost_hits = 0;
+ hdr->b_mfu_hits = 0;
+ hdr->b_mfu_ghost_hits = 0;
+ hdr->b_l2_hits = 0;
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
@@ -2670,6 +2730,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
ASSERT(list_link_active(&buf->b_arc_node));
} else {
buf->b_flags &= ~ARC_PREFETCH;
+ atomic_inc_32(&buf->b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
}
buf->b_arc_access = now;
@@ -2691,6 +2752,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
arc_change_state(arc_mfu, buf, hash_lock);
}
+ atomic_inc_32(&buf->b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
} else if (buf->b_state == arc_mru_ghost) {
arc_state_t *new_state;
@@ -2713,6 +2775,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
buf->b_arc_access = ddi_get_lbolt();
arc_change_state(new_state, buf, hash_lock);
+ atomic_inc_32(&buf->b_mru_ghost_hits);
ARCSTAT_BUMP(arcstat_mru_ghost_hits);
} else if (buf->b_state == arc_mfu) {
/*
@@ -2728,6 +2791,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
ASSERT(refcount_count(&buf->b_refcnt) == 0);
ASSERT(list_link_active(&buf->b_arc_node));
}
+ atomic_inc_32(&buf->b_mfu_hits);
ARCSTAT_BUMP(arcstat_mfu_hits);
buf->b_arc_access = ddi_get_lbolt();
} else if (buf->b_state == arc_mfu_ghost) {
@@ -2751,6 +2815,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
arc_change_state(new_state, buf, hash_lock);
+ atomic_inc_32(&buf->b_mfu_ghost_hits);
ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
} else if (buf->b_state == arc_l2c_only) {
/*
@@ -3134,6 +3199,7 @@ top:
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
+ atomic_inc_32(&hdr->b_l2hdr->b_hits);
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_PUSHPAGE);
@@ -3469,6 +3535,11 @@ arc_release(arc_buf_t *buf, void *tag)
nhdr->b_buf = buf;
nhdr->b_state = arc_anon;
nhdr->b_arc_access = 0;
+ nhdr->b_mru_hits = 0;
+ nhdr->b_mru_ghost_hits = 0;
+ nhdr->b_mfu_hits = 0;
+ nhdr->b_mfu_ghost_hits = 0;
+ nhdr->b_l2_hits = 0;
nhdr->b_flags = flags & ARC_L2_WRITING;
nhdr->b_l2hdr = NULL;
nhdr->b_datacnt = 1;
@@ -3485,6 +3556,11 @@ arc_release(arc_buf_t *buf, void *tag)
if (hdr->b_state != arc_anon)
arc_change_state(arc_anon, hdr, hash_lock);
hdr->b_arc_access = 0;
+ hdr->b_mru_hits = 0;
+ hdr->b_mru_ghost_hits = 0;
+ hdr->b_mfu_hits = 0;
+ hdr->b_mfu_ghost_hits = 0;
+ hdr->b_l2_hits = 0;
if (hash_lock)
mutex_exit(hash_lock);
@@ -3902,6 +3978,13 @@ arc_init(void)
list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+ arc_anon->arcs_state = ARC_STATE_ANON;
+ arc_mru->arcs_state = ARC_STATE_MRU;
+ arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST;
+ arc_mfu->arcs_state = ARC_STATE_MFU;
+ arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
+ arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;
+
buf_init();
arc_thread_exit = 0;
@@ -4785,6 +4868,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
l2hdr->b_compress = ZIO_COMPRESS_OFF;
l2hdr->b_asize = ab->b_size;
l2hdr->b_tmp_cdata = ab->b_buf->b_data;
+ l2hdr->b_hits = 0;
buf_sz = ab->b_size;
ab->b_l2hdr = l2hdr;
@@ -5317,6 +5401,7 @@ l2arc_stop(void)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(arc_read);
EXPORT_SYMBOL(arc_buf_remove_ref);
+EXPORT_SYMBOL(arc_buf_info);
EXPORT_SYMBOL(arc_getbuf_func);
EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index d655d6621..44e9419b7 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -317,6 +317,8 @@ retry:
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
+
+ dbuf_stats_init(h);
}
void
@@ -325,6 +327,8 @@ dbuf_fini(void)
dbuf_hash_table_t *h = &dbuf_hash_table;
int i;
+ dbuf_stats_destroy();
+
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
#if defined(_KERNEL) && defined(HAVE_SPL)
diff --git a/module/zfs/dbuf_stats.c b/module/zfs/dbuf_stats.c
new file mode 100644
index 000000000..ef760eaba
--- /dev/null
+++ b/module/zfs/dbuf_stats.c
@@ -0,0 +1,230 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
+
+/*
+ * Calculate the index of the arc header for the state, disabled by default.
+ */
+int zfs_dbuf_state_index = 0;
+
+/*
+ * ==========================================================================
+ * Dbuf Hash Read Routines
+ * ==========================================================================
+ */
+typedef struct dbuf_stats_t {
+ kmutex_t lock;
+ kstat_t *kstat;
+ dbuf_hash_table_t *hash;
+ int idx;
+} dbuf_stats_t;
+
+static dbuf_stats_t dbuf_stats_hash_table;
+
+static int
+dbuf_stats_hash_table_headers(char *buf, size_t size)
+{
+ size = snprintf(buf, size - 1,
+ "%-88s | %-124s | %s\n"
+ "%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
+ "%-5s %-5s %-6s %-8s %-6s %-8s %-12s "
+ "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
+ "%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
+ "dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
+ "blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
+ "atype", "index", "flags", "count", "asize", "access", "mru", "gmru",
+ "mfu", "gmfu", "l2", "l2_dattr", "l2_asize", "l2_comp", "aholds",
+ "dtype", "btype", "data_bs", "meta_bs", "bsize",
+ "lvls", "dholds", "blocks", "dsize");
+ buf[size] = '\0';
+
+ return (0);
+}
+
+int
+__dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
+{
+ arc_buf_info_t abi = { 0 };
+ dmu_object_info_t doi = { 0 };
+ dnode_t *dn = DB_DNODE(db);
+
+ if (db->db_buf)
+ arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);
+
+ if (dn)
+ __dmu_object_info_from_dnode(dn, &doi);
+
+ size = snprintf(buf, size - 1,
+ "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
+ "%-5d %-5d %-6lld 0x%-6x %-6lu %-8llu %-12llu "
+ "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
+ "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
+ /* dmu_buf_impl_t */
+ spa_name(dn->dn_objset->os_spa),
+ (u_longlong_t)dmu_objset_id(db->db_objset),
+ (longlong_t)db->db.db_object,
+ (longlong_t)db->db_level,
+ (longlong_t)db->db_blkid,
+ (u_longlong_t)db->db.db_offset,
+ (u_longlong_t)db->db.db_size,
+ !!dbuf_is_metadata(db),
+ db->db_state,
+ (ulong_t)refcount_count(&db->db_holds),
+ /* arc_buf_info_t */
+ abi.abi_state_type,
+ abi.abi_state_contents,
+ (longlong_t)abi.abi_state_index,
+ abi.abi_flags,
+ (ulong_t)abi.abi_datacnt,
+ (u_longlong_t)abi.abi_size,
+ (u_longlong_t)abi.abi_access,
+ (ulong_t)abi.abi_mru_hits,
+ (ulong_t)abi.abi_mru_ghost_hits,
+ (ulong_t)abi.abi_mfu_hits,
+ (ulong_t)abi.abi_mfu_ghost_hits,
+ (ulong_t)abi.abi_l2arc_hits,
+ (u_longlong_t)abi.abi_l2arc_dattr,
+ (u_longlong_t)abi.abi_l2arc_asize,
+ abi.abi_l2arc_compress,
+ (ulong_t)abi.abi_holds,
+ /* dmu_object_info_t */
+ doi.doi_type,
+ doi.doi_bonus_type,
+ (ulong_t)doi.doi_data_block_size,
+ (ulong_t)doi.doi_metadata_block_size,
+ (u_longlong_t)doi.doi_bonus_size,
+ (ulong_t)doi.doi_indirection,
+ (ulong_t)refcount_count(&dn->dn_holds),
+ (u_longlong_t)doi.doi_fill_count,
+ (u_longlong_t)doi.doi_max_offset);
+ buf[size] = '\0';
+
+ return (size);
+}
+
+static int
+dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
+{
+ dbuf_stats_t *dsh = (dbuf_stats_t *)data;
+ dbuf_hash_table_t *h = dsh->hash;
+ dmu_buf_impl_t *db;
+ int length, error = 0;
+
+ ASSERT3S(dsh->idx, >=, 0);
+ ASSERT3S(dsh->idx, <=, h->hash_table_mask);
+ memset(buf, 0, size);
+
+ mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
+ for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
+ /*
+ * Returning ENOMEM will cause the data and header functions
+ * to be called with a larger scratch buffers.
+ */
+ if (size < 512) {
+ error = ENOMEM;
+ break;
+ }
+
+ mutex_enter(&db->db_mtx);
+ mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
+
+ length = __dbuf_stats_hash_table_data(buf, size, db);
+ buf += length;
+ size -= length;
+
+ mutex_exit(&db->db_mtx);
+ mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
+ }
+ mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
+
+ return (error);
+}
+
+static void *
+dbuf_stats_hash_table_addr(kstat_t *ksp, loff_t n)
+{
+ dbuf_stats_t *dsh = ksp->ks_private;
+
+ ASSERT(MUTEX_HELD(&dsh->lock));
+
+ if (n <= dsh->hash->hash_table_mask) {
+ dsh->idx = n;
+ return (dsh);
+ }
+
+ return (NULL);
+}
+
+static void
+dbuf_stats_hash_table_init(dbuf_hash_table_t *hash)
+{
+ dbuf_stats_t *dsh = &dbuf_stats_hash_table;
+ kstat_t *ksp;
+
+ mutex_init(&dsh->lock, NULL, MUTEX_DEFAULT, NULL);
+ dsh->hash = hash;
+
+ ksp = kstat_create("zfs", 0, "dbufs", "misc",
+ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+ dsh->kstat = ksp;
+
+ if (ksp) {
+ ksp->ks_lock = &dsh->lock;
+ ksp->ks_ndata = UINT32_MAX;
+ ksp->ks_private = dsh;
+ kstat_set_raw_ops(ksp, dbuf_stats_hash_table_headers,
+ dbuf_stats_hash_table_data, dbuf_stats_hash_table_addr);
+ kstat_install(ksp);
+ }
+}
+
+static void
+dbuf_stats_hash_table_destroy(void)
+{
+ dbuf_stats_t *dsh = &dbuf_stats_hash_table;
+ kstat_t *ksp;
+
+ ksp = dsh->kstat;
+ if (ksp)
+ kstat_delete(ksp);
+
+ mutex_destroy(&dsh->lock);
+}
+
+void
+dbuf_stats_init(dbuf_hash_table_t *hash)
+{
+ dbuf_stats_hash_table_init(hash);
+}
+
+void
+dbuf_stats_destroy(void)
+{
+ dbuf_stats_hash_table_destroy();
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_dbuf_state_index, int, 0644);
+MODULE_PARM_DESC(zfs_dbuf_state_index, "Calculate arc header index");
+#endif
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index cbf4790b1..4ec9cb46a 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -1815,16 +1815,11 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
}
void
-dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
+__dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
{
- dnode_phys_t *dnp;
+ dnode_phys_t *dnp = dn->dn_phys;
int i;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- mutex_enter(&dn->dn_mtx);
-
- dnp = dn->dn_phys;
-
doi->doi_data_block_size = dn->dn_datablksz;
doi->doi_metadata_block_size = dn->dn_indblkshift ?
1ULL << dn->dn_indblkshift : 0;
@@ -1839,6 +1834,15 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
doi->doi_fill_count = 0;
for (i = 0; i < dnp->dn_nblkptr; i++)
doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill;
+}
+
+void
+dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
+{
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ mutex_enter(&dn->dn_mtx);
+
+ __dmu_object_info_from_dnode(dn, doi);
mutex_exit(&dn->dn_mtx);
rw_exit(&dn->dn_struct_rwlock);