diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/Makefile.in | 1 | ||||
-rw-r--r-- | module/zfs/arc.c | 87 | ||||
-rw-r--r-- | module/zfs/dbuf.c | 4 | ||||
-rw-r--r-- | module/zfs/dbuf_stats.c | 230 | ||||
-rw-r--r-- | module/zfs/dmu.c | 18 |
5 files changed, 332 insertions, 8 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 6f0f6ef05..5552436ad 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -8,6 +8,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/arc.o $(MODULE)-objs += @top_srcdir@/module/zfs/bplist.o $(MODULE)-objs += @top_srcdir@/module/zfs/bpobj.o $(MODULE)-objs += @top_srcdir@/module/zfs/dbuf.o +$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf_stats.o $(MODULE)-objs += @top_srcdir@/module/zfs/bptree.o $(MODULE)-objs += @top_srcdir@/module/zfs/ddt.o $(MODULE)-objs += @top_srcdir@/module/zfs/ddt_zap.o diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 366f5bf9b..9098988fd 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -232,6 +232,7 @@ typedef struct arc_state { uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */ uint64_t arcs_size; /* total amount of data in this state */ kmutex_t arcs_mtx; + arc_state_type_t arcs_state; } arc_state_t; /* The 6 states: */ @@ -534,6 +535,11 @@ struct arc_buf_hdr { /* updated atomically */ clock_t b_arc_access; + uint32_t b_mru_hits; + uint32_t b_mru_ghost_hits; + uint32_t b_mfu_hits; + uint32_t b_mfu_ghost_hits; + uint32_t b_l2_hits; /* self protecting */ refcount_t b_refcnt; @@ -709,7 +715,8 @@ struct l2arc_buf_hdr { /* compression applied to buffer data */ enum zio_compress b_compress; /* real alloc'd buffer size depending on b_compress applied */ - int b_asize; + uint32_t b_asize; + uint32_t b_hits; /* temporary buffer holder for in-flight compressed data */ void *b_tmp_cdata; }; @@ -1138,6 +1145,54 @@ remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) } /* + * Returns detailed information about a specific arc buffer. When the + * state_index argument is set the function will calculate the arc header + * list position for its arc state. Since this requires a linear traversal + * callers are strongly encourage not to do this. However, it can be helpful + * for targeted analysis so the functionality is provided. + */ +void +arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index) +{ + arc_buf_hdr_t *hdr = ab->b_hdr; + arc_state_t *state = hdr->b_state; + + memset(abi, 0, sizeof(arc_buf_info_t)); + abi->abi_flags = hdr->b_flags; + abi->abi_datacnt = hdr->b_datacnt; + abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON; + abi->abi_state_contents = hdr->b_type; + abi->abi_state_index = -1; + abi->abi_size = hdr->b_size; + abi->abi_access = hdr->b_arc_access; + abi->abi_mru_hits = hdr->b_mru_hits; + abi->abi_mru_ghost_hits = hdr->b_mru_ghost_hits; + abi->abi_mfu_hits = hdr->b_mfu_hits; + abi->abi_mfu_ghost_hits = hdr->b_mfu_ghost_hits; + abi->abi_holds = refcount_count(&hdr->b_refcnt); + + if (hdr->b_l2hdr) { + abi->abi_l2arc_dattr = hdr->b_l2hdr->b_daddr; + abi->abi_l2arc_asize = hdr->b_l2hdr->b_asize; + abi->abi_l2arc_compress = hdr->b_l2hdr->b_compress; + abi->abi_l2arc_hits = hdr->b_l2hdr->b_hits; + } + + if (state && state_index && list_link_active(&hdr->b_arc_node)) { + list_t *list = &state->arcs_list[hdr->b_type]; + arc_buf_hdr_t *h; + + mutex_enter(&state->arcs_mtx); + for (h = list_head(list); h != NULL; h = list_next(list, h)) { + abi->abi_state_index++; + if (h == hdr) + break; + } + mutex_exit(&state->arcs_mtx); + } +} + +/* * Move the supplied buffer to the indicated state. The mutex * for the buffer must be held by the caller. */ @@ -1298,6 +1353,11 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type) hdr->b_spa = spa_load_guid(spa); hdr->b_state = arc_anon; hdr->b_arc_access = 0; + hdr->b_mru_hits = 0; + hdr->b_mru_ghost_hits = 0; + hdr->b_mfu_hits = 0; + hdr->b_mfu_ghost_hits = 0; + hdr->b_l2_hits = 0; buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; buf->b_data = NULL; @@ -2670,6 +2730,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) ASSERT(list_link_active(&buf->b_arc_node)); } else { buf->b_flags &= ~ARC_PREFETCH; + atomic_inc_32(&buf->b_mru_hits); ARCSTAT_BUMP(arcstat_mru_hits); } buf->b_arc_access = now; @@ -2691,6 +2752,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); arc_change_state(arc_mfu, buf, hash_lock); } + atomic_inc_32(&buf->b_mru_hits); ARCSTAT_BUMP(arcstat_mru_hits); } else if (buf->b_state == arc_mru_ghost) { arc_state_t *new_state; @@ -2713,6 +2775,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) buf->b_arc_access = ddi_get_lbolt(); arc_change_state(new_state, buf, hash_lock); + atomic_inc_32(&buf->b_mru_ghost_hits); ARCSTAT_BUMP(arcstat_mru_ghost_hits); } else if (buf->b_state == arc_mfu) { /* @@ -2728,6 +2791,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) ASSERT(refcount_count(&buf->b_refcnt) == 0); ASSERT(list_link_active(&buf->b_arc_node)); } + atomic_inc_32(&buf->b_mfu_hits); ARCSTAT_BUMP(arcstat_mfu_hits); buf->b_arc_access = ddi_get_lbolt(); } else if (buf->b_state == arc_mfu_ghost) { @@ -2751,6 +2815,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); arc_change_state(new_state, buf, hash_lock); + atomic_inc_32(&buf->b_mfu_ghost_hits); ARCSTAT_BUMP(arcstat_mfu_ghost_hits); } else if (buf->b_state == arc_l2c_only) { /* @@ -3134,6 +3199,7 @@ top: DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_hits); + atomic_inc_32(&hdr->b_l2hdr->b_hits); cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_PUSHPAGE); @@ -3469,6 +3535,11 @@ arc_release(arc_buf_t *buf, void *tag) nhdr->b_buf = buf; nhdr->b_state = arc_anon; nhdr->b_arc_access = 0; + nhdr->b_mru_hits = 0; + nhdr->b_mru_ghost_hits = 0; + nhdr->b_mfu_hits = 0; + nhdr->b_mfu_ghost_hits = 0; + nhdr->b_l2_hits = 0; nhdr->b_flags = flags & ARC_L2_WRITING; nhdr->b_l2hdr = NULL; nhdr->b_datacnt = 1; @@ -3485,6 +3556,11 @@ arc_release(arc_buf_t *buf, void *tag) if (hdr->b_state != arc_anon) arc_change_state(arc_anon, hdr, hash_lock); hdr->b_arc_access = 0; + hdr->b_mru_hits = 0; + hdr->b_mru_ghost_hits = 0; + hdr->b_mfu_hits = 0; + hdr->b_mfu_ghost_hits = 0; + hdr->b_l2_hits = 0; if (hash_lock) mutex_exit(hash_lock); @@ -3902,6 +3978,13 @@ arc_init(void) list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); + arc_anon->arcs_state = ARC_STATE_ANON; + arc_mru->arcs_state = ARC_STATE_MRU; + arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST; + arc_mfu->arcs_state = ARC_STATE_MFU; + arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST; + arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY; + buf_init(); arc_thread_exit = 0; @@ -4785,6 +4868,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, l2hdr->b_compress = ZIO_COMPRESS_OFF; l2hdr->b_asize = ab->b_size; l2hdr->b_tmp_cdata = ab->b_buf->b_data; + l2hdr->b_hits = 0; buf_sz = ab->b_size; ab->b_l2hdr = l2hdr; @@ -5317,6 +5401,7 @@ l2arc_stop(void) #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(arc_read); EXPORT_SYMBOL(arc_buf_remove_ref); +EXPORT_SYMBOL(arc_buf_info); EXPORT_SYMBOL(arc_getbuf_func); EXPORT_SYMBOL(arc_add_prune_callback); EXPORT_SYMBOL(arc_remove_prune_callback); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index d655d6621..44e9419b7 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -317,6 +317,8 @@ retry: for (i = 0; i < DBUF_MUTEXES; i++) mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); + + dbuf_stats_init(h); } void @@ -325,6 +327,8 @@ dbuf_fini(void) dbuf_hash_table_t *h = &dbuf_hash_table; int i; + dbuf_stats_destroy(); + for (i = 0; i < DBUF_MUTEXES; i++) mutex_destroy(&h->hash_mutexes[i]); #if defined(_KERNEL) && defined(HAVE_SPL) diff --git a/module/zfs/dbuf_stats.c b/module/zfs/dbuf_stats.c new file mode 100644 index 000000000..ef760eaba --- /dev/null +++ b/module/zfs/dbuf_stats.c @@ -0,0 +1,230 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include <sys/zfs_context.h> +#include <sys/dbuf.h> +#include <sys/dmu_objset.h> + +/* + * Calculate the index of the arc header for the state, disabled by default. + */ +int zfs_dbuf_state_index = 0; + +/* + * ========================================================================== + * Dbuf Hash Read Routines + * ========================================================================== + */ +typedef struct dbuf_stats_t { + kmutex_t lock; + kstat_t *kstat; + dbuf_hash_table_t *hash; + int idx; +} dbuf_stats_t; + +static dbuf_stats_t dbuf_stats_hash_table; + +static int +dbuf_stats_hash_table_headers(char *buf, size_t size) +{ + size = snprintf(buf, size - 1, + "%-88s | %-124s | %s\n" + "%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | " + "%-5s %-5s %-6s %-8s %-6s %-8s %-12s " + "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | " + "%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n", + "dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level", + "blkid", "offset", "dbsize", "meta", "state", "dbholds", "list", + "atype", "index", "flags", "count", "asize", "access", "mru", "gmru", + "mfu", "gmfu", "l2", "l2_dattr", "l2_asize", "l2_comp", "aholds", + "dtype", "btype", "data_bs", "meta_bs", "bsize", + "lvls", "dholds", "blocks", "dsize"); + buf[size] = '\0'; + + return (0); +} + +int +__dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db) +{ + arc_buf_info_t abi = { 0 }; + dmu_object_info_t doi = { 0 }; + dnode_t *dn = DB_DNODE(db); + + if (db->db_buf) + arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index); + + if (dn) + __dmu_object_info_from_dnode(dn, &doi); + + size = snprintf(buf, size - 1, + "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | " + "%-5d %-5d %-6lld 0x%-6x %-6lu %-8llu %-12llu " + "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | " + "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n", + /* dmu_buf_impl_t */ + spa_name(dn->dn_objset->os_spa), + (u_longlong_t)dmu_objset_id(db->db_objset), + (longlong_t)db->db.db_object, + (longlong_t)db->db_level, + (longlong_t)db->db_blkid, + (u_longlong_t)db->db.db_offset, + (u_longlong_t)db->db.db_size, + !!dbuf_is_metadata(db), + db->db_state, + (ulong_t)refcount_count(&db->db_holds), + /* arc_buf_info_t */ + abi.abi_state_type, + abi.abi_state_contents, + (longlong_t)abi.abi_state_index, + abi.abi_flags, + (ulong_t)abi.abi_datacnt, + (u_longlong_t)abi.abi_size, + (u_longlong_t)abi.abi_access, + (ulong_t)abi.abi_mru_hits, + (ulong_t)abi.abi_mru_ghost_hits, + (ulong_t)abi.abi_mfu_hits, + (ulong_t)abi.abi_mfu_ghost_hits, + (ulong_t)abi.abi_l2arc_hits, + (u_longlong_t)abi.abi_l2arc_dattr, + (u_longlong_t)abi.abi_l2arc_asize, + abi.abi_l2arc_compress, + (ulong_t)abi.abi_holds, + /* dmu_object_info_t */ + doi.doi_type, + doi.doi_bonus_type, + (ulong_t)doi.doi_data_block_size, + (ulong_t)doi.doi_metadata_block_size, + (u_longlong_t)doi.doi_bonus_size, + (ulong_t)doi.doi_indirection, + (ulong_t)refcount_count(&dn->dn_holds), + (u_longlong_t)doi.doi_fill_count, + (u_longlong_t)doi.doi_max_offset); + buf[size] = '\0'; + + return (size); +} + +static int +dbuf_stats_hash_table_data(char *buf, size_t size, void *data) +{ + dbuf_stats_t *dsh = (dbuf_stats_t *)data; + dbuf_hash_table_t *h = dsh->hash; + dmu_buf_impl_t *db; + int length, error = 0; + + ASSERT3S(dsh->idx, >=, 0); + ASSERT3S(dsh->idx, <=, h->hash_table_mask); + memset(buf, 0, size); + + mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx)); + for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) { + /* + * Returning ENOMEM will cause the data and header functions + * to be called with a larger scratch buffers. + */ + if (size < 512) { + error = ENOMEM; + break; + } + + mutex_enter(&db->db_mtx); + mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx)); + + length = __dbuf_stats_hash_table_data(buf, size, db); + buf += length; + size -= length; + + mutex_exit(&db->db_mtx); + mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx)); + } + mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx)); + + return (error); +} + +static void * +dbuf_stats_hash_table_addr(kstat_t *ksp, loff_t n) +{ + dbuf_stats_t *dsh = ksp->ks_private; + + ASSERT(MUTEX_HELD(&dsh->lock)); + + if (n <= dsh->hash->hash_table_mask) { + dsh->idx = n; + return (dsh); + } + + return (NULL); +} + +static void +dbuf_stats_hash_table_init(dbuf_hash_table_t *hash) +{ + dbuf_stats_t *dsh = &dbuf_stats_hash_table; + kstat_t *ksp; + + mutex_init(&dsh->lock, NULL, MUTEX_DEFAULT, NULL); + dsh->hash = hash; + + ksp = kstat_create("zfs", 0, "dbufs", "misc", + KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); + dsh->kstat = ksp; + + if (ksp) { + ksp->ks_lock = &dsh->lock; + ksp->ks_ndata = UINT32_MAX; + ksp->ks_private = dsh; + kstat_set_raw_ops(ksp, dbuf_stats_hash_table_headers, + dbuf_stats_hash_table_data, dbuf_stats_hash_table_addr); + kstat_install(ksp); + } +} + +static void +dbuf_stats_hash_table_destroy(void) +{ + dbuf_stats_t *dsh = &dbuf_stats_hash_table; + kstat_t *ksp; + + ksp = dsh->kstat; + if (ksp) + kstat_delete(ksp); + + mutex_destroy(&dsh->lock); +} + +void +dbuf_stats_init(dbuf_hash_table_t *hash) +{ + dbuf_stats_hash_table_init(hash); +} + +void +dbuf_stats_destroy(void) +{ + dbuf_stats_hash_table_destroy(); +} + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(zfs_dbuf_state_index, int, 0644); +MODULE_PARM_DESC(zfs_dbuf_state_index, "Calculate arc header index"); +#endif diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index cbf4790b1..4ec9cb46a 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1815,16 +1815,11 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) } void -dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) +__dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) { - dnode_phys_t *dnp; + dnode_phys_t *dnp = dn->dn_phys; int i; - rw_enter(&dn->dn_struct_rwlock, RW_READER); - mutex_enter(&dn->dn_mtx); - - dnp = dn->dn_phys; - doi->doi_data_block_size = dn->dn_datablksz; doi->doi_metadata_block_size = dn->dn_indblkshift ? 1ULL << dn->dn_indblkshift : 0; @@ -1839,6 +1834,15 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) doi->doi_fill_count = 0; for (i = 0; i < dnp->dn_nblkptr; i++) doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill; +} + +void +dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) +{ + rw_enter(&dn->dn_struct_rwlock, RW_READER); + mutex_enter(&dn->dn_mtx); + + __dmu_object_info_from_dnode(dn, doi); mutex_exit(&dn->dn_mtx); rw_exit(&dn->dn_struct_rwlock); |