aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGiuseppe Di Natale <[email protected]>2018-01-29 10:24:52 -0800
committerBrian Behlendorf <[email protected]>2018-01-29 10:24:52 -0800
commit5e021f56d3437d3523904652fe3cc23ea1f4cb70 (patch)
tree0953de761a9f65b79e7123197076fbc77560687a /module
parent0735ecb33485e91a78357a274e47c2782858d8b9 (diff)
Add dbuf hash and dbuf cache kstats
Introduce kstats about the dbuf hash and dbuf cache to make it easier to inspect state. This should help with debugging and understanding of these portions of the codebase. Correct format of dbuf kstat file. Introduce a dbc column to dbufs kstat to indicate if a dbuf is in the dbuf cache. Introduce field filtering in the dbufstat python script. Introduce a no header option to the dbufstat python script. Introduce a test case to test basic mru->mfu list movement in the ARC. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Giuseppe Di Natale <[email protected]> Closes #6906
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dbuf.c194
-rw-r--r--module/zfs/dbuf_stats.c23
2 files changed, 192 insertions, 25 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 517a284de..87b9ba461 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -48,6 +48,87 @@
#include <sys/callb.h>
#include <sys/abd.h>
+kstat_t *dbuf_ksp;
+
+typedef struct dbuf_stats {
+ /*
+ * Various statistics about the size of the dbuf cache.
+ */
+ kstat_named_t cache_count;
+ kstat_named_t cache_size_bytes;
+ kstat_named_t cache_size_bytes_max;
+ /*
+ * Statistics regarding the bounds on the dbuf cache size.
+ */
+ kstat_named_t cache_target_bytes;
+ kstat_named_t cache_lowater_bytes;
+ kstat_named_t cache_hiwater_bytes;
+ /*
+ * Total number of dbuf cache evictions that have occurred.
+ */
+ kstat_named_t cache_total_evicts;
+ /*
+ * The distribution of dbuf levels in the dbuf cache and
+ * the total size of all dbufs at each level.
+ */
+ kstat_named_t cache_levels[DN_MAX_LEVELS];
+ kstat_named_t cache_levels_bytes[DN_MAX_LEVELS];
+ /*
+ * Statistics about the dbuf hash table.
+ */
+ kstat_named_t hash_hits;
+ kstat_named_t hash_misses;
+ kstat_named_t hash_collisions;
+ kstat_named_t hash_elements;
+ kstat_named_t hash_elements_max;
+ /*
+ * Number of sublists containing more than one dbuf in the dbuf
+ * hash table. Keep track of the longest hash chain.
+ */
+ kstat_named_t hash_chains;
+ kstat_named_t hash_chain_max;
+ /*
+ * Number of times a dbuf_create() discovers that a dbuf was
+ * already created and in the dbuf hash table.
+ */
+ kstat_named_t hash_insert_race;
+} dbuf_stats_t;
+
+dbuf_stats_t dbuf_stats = {
+ { "cache_count", KSTAT_DATA_UINT64 },
+ { "cache_size_bytes", KSTAT_DATA_UINT64 },
+ { "cache_size_bytes_max", KSTAT_DATA_UINT64 },
+ { "cache_target_bytes", KSTAT_DATA_UINT64 },
+ { "cache_lowater_bytes", KSTAT_DATA_UINT64 },
+ { "cache_hiwater_bytes", KSTAT_DATA_UINT64 },
+ { "cache_total_evicts", KSTAT_DATA_UINT64 },
+ { { "cache_levels_N", KSTAT_DATA_UINT64 } },
+ { { "cache_levels_bytes_N", KSTAT_DATA_UINT64 } },
+ { "hash_hits", KSTAT_DATA_UINT64 },
+ { "hash_misses", KSTAT_DATA_UINT64 },
+ { "hash_collisions", KSTAT_DATA_UINT64 },
+ { "hash_elements", KSTAT_DATA_UINT64 },
+ { "hash_elements_max", KSTAT_DATA_UINT64 },
+ { "hash_chains", KSTAT_DATA_UINT64 },
+ { "hash_chain_max", KSTAT_DATA_UINT64 },
+ { "hash_insert_race", KSTAT_DATA_UINT64 }
+};
+
+#define DBUF_STAT_INCR(stat, val) \
+ atomic_add_64(&dbuf_stats.stat.value.ui64, (val));
+#define DBUF_STAT_DECR(stat, val) \
+ DBUF_STAT_INCR(stat, -(val));
+#define DBUF_STAT_BUMP(stat) \
+ DBUF_STAT_INCR(stat, 1);
+#define DBUF_STAT_BUMPDOWN(stat) \
+ DBUF_STAT_INCR(stat, -1);
+#define DBUF_STAT_MAX(stat, v) { \
+ uint64_t _m; \
+ while ((v) > (_m = dbuf_stats.stat.value.ui64) && \
+ (_m != atomic_cas_64(&dbuf_stats.stat.value.ui64, _m, (v))))\
+ continue; \
+}
+
struct dbuf_hold_impl_data {
/* Function arguments */
dnode_t *dh_dn;
@@ -272,13 +353,15 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
int level = db->db_level;
uint64_t blkid, hv, idx;
dmu_buf_impl_t *dbf;
+ uint32_t i;
blkid = db->db_blkid;
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
mutex_enter(DBUF_HASH_MUTEX(h, idx));
- for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) {
+ for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
+ dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) {
@@ -289,11 +372,20 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
}
}
+ if (i > 0) {
+ DBUF_STAT_BUMP(hash_collisions);
+ if (i == 1)
+ DBUF_STAT_BUMP(hash_chains);
+
+ DBUF_STAT_MAX(hash_chain_max, i);
+ }
+
mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_inc_64(&dbuf_hash_count);
+ DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count);
return (NULL);
}
@@ -328,6 +420,9 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
}
*dbp = db->db_hash_next;
db->db_hash_next = NULL;
+ if (h->hash_table[idx] &&
+ h->hash_table[idx]->db_hash_next == NULL)
+ DBUF_STAT_BUMPDOWN(hash_chains);
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_hash_count);
}
@@ -469,28 +564,32 @@ dbuf_cache_target_bytes(void)
arc_target_bytes() >> dbuf_cache_max_shift);
}
-static inline boolean_t
-dbuf_cache_above_hiwater(void)
+static inline uint64_t
+dbuf_cache_hiwater_bytes(void)
{
uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+ return (dbuf_cache_target +
+ (dbuf_cache_target * dbuf_cache_hiwater_pct) / 100);
+}
- uint64_t dbuf_cache_hiwater_bytes =
- (dbuf_cache_target * dbuf_cache_hiwater_pct) / 100;
+static inline uint64_t
+dbuf_cache_lowater_bytes(void)
+{
+ uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+ return (dbuf_cache_target -
+ (dbuf_cache_target * dbuf_cache_lowater_pct) / 100);
+}
- return (refcount_count(&dbuf_cache_size) >
- dbuf_cache_target + dbuf_cache_hiwater_bytes);
+static inline boolean_t
+dbuf_cache_above_hiwater(void)
+{
+ return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes());
}
static inline boolean_t
dbuf_cache_above_lowater(void)
{
- uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
-
- uint64_t dbuf_cache_lowater_bytes =
- (dbuf_cache_target * dbuf_cache_lowater_pct) / 100;
-
- return (refcount_count(&dbuf_cache_size) >
- dbuf_cache_target - dbuf_cache_lowater_bytes);
+ return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes());
}
/*
@@ -525,7 +624,14 @@ dbuf_evict_one(void)
multilist_sublist_unlock(mls);
(void) refcount_remove_many(&dbuf_cache_size,
db->db.db_size, db);
+ DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
dbuf_destroy(db);
+ DBUF_STAT_MAX(cache_size_bytes_max,
+ refcount_count(&dbuf_cache_size));
+ DBUF_STAT_BUMP(cache_total_evicts);
} else {
multilist_sublist_unlock(mls);
}
@@ -618,7 +724,24 @@ dbuf_evict_notify(void)
}
}
+static int
+dbuf_kstat_update(kstat_t *ksp, int rw)
+{
+ dbuf_stats_t *ds = ksp->ks_data;
+ if (rw == KSTAT_WRITE) {
+ return (SET_ERROR(EACCES));
+ } else {
+ ds->cache_size_bytes.value.ui64 =
+ refcount_count(&dbuf_cache_size);
+ ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
+ ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
+ ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
+ ds->hash_elements.value.ui64 = dbuf_hash_count;
+ }
+
+ return (0);
+}
void
dbuf_init(void)
@@ -687,6 +810,26 @@ retry:
cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL);
dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
NULL, 0, &p0, TS_RUN, minclsyspri);
+
+ dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc",
+ KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (dbuf_ksp != NULL) {
+ dbuf_ksp->ks_data = &dbuf_stats;
+ dbuf_ksp->ks_update = dbuf_kstat_update;
+ kstat_install(dbuf_ksp);
+
+ for (i = 0; i < DN_MAX_LEVELS; i++) {
+ snprintf(dbuf_stats.cache_levels[i].name,
+ KSTAT_STRLEN, "cache_level_%d", i);
+ dbuf_stats.cache_levels[i].data_type =
+ KSTAT_DATA_UINT64;
+ snprintf(dbuf_stats.cache_levels_bytes[i].name,
+ KSTAT_STRLEN, "cache_level_%d_bytes", i);
+ dbuf_stats.cache_levels_bytes[i].data_type =
+ KSTAT_DATA_UINT64;
+ }
+ }
}
void
@@ -725,6 +868,11 @@ dbuf_fini(void)
refcount_destroy(&dbuf_cache_size);
multilist_destroy(dbuf_cache);
+
+ if (dbuf_ksp != NULL) {
+ kstat_delete(dbuf_ksp);
+ dbuf_ksp = NULL;
+ }
}
/*
@@ -1268,6 +1416,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
+ DBUF_STAT_BUMP(hash_hits);
} else if (db->db_state == DB_UNCACHED) {
spa_t *spa = dn->dn_objset->os_spa;
boolean_t need_wait = B_FALSE;
@@ -1287,6 +1436,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
+ DBUF_STAT_BUMP(hash_misses);
if (!err && need_wait)
err = zio_wait(zio);
@@ -1305,6 +1455,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
+ DBUF_STAT_BUMP(hash_misses);
/* Skip the wait per the caller's request. */
mutex_enter(&db->db_mtx);
@@ -2231,6 +2382,10 @@ dbuf_destroy(dmu_buf_impl_t *db)
multilist_remove(dbuf_cache, db);
(void) refcount_remove_many(&dbuf_cache_size,
db->db.db_size, db);
+ DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
}
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
@@ -2458,6 +2613,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
/* someone else inserted it first */
kmem_cache_free(dbuf_kmem_cache, db);
mutex_exit(&dn->dn_dbufs_mtx);
+ DBUF_STAT_BUMP(hash_insert_race);
return (odb);
}
avl_add(&dn->dn_dbufs, db);
@@ -2847,6 +3003,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
multilist_remove(dbuf_cache, dh->dh_db);
(void) refcount_remove_many(&dbuf_cache_size,
dh->dh_db->db.db_size, dh->dh_db);
+ DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
+ dh->dh_db->db.db_size);
}
(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
DBUF_VERIFY(dh->dh_db);
@@ -3118,6 +3278,12 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
multilist_insert(dbuf_cache, db);
(void) refcount_add_many(&dbuf_cache_size,
db->db.db_size, db);
+ DBUF_STAT_BUMP(cache_levels[db->db_level]);
+ DBUF_STAT_BUMP(cache_count);
+ DBUF_STAT_INCR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
+ DBUF_STAT_MAX(cache_size_bytes_max,
+ refcount_count(&dbuf_cache_size));
mutex_exit(&db->db_mtx);
dbuf_evict_notify();
diff --git a/module/zfs/dbuf_stats.c b/module/zfs/dbuf_stats.c
index 985bbd3e9..6c26718f2 100644
--- a/module/zfs/dbuf_stats.c
+++ b/module/zfs/dbuf_stats.c
@@ -46,14 +46,14 @@ static int
dbuf_stats_hash_table_headers(char *buf, size_t size)
{
(void) snprintf(buf, size,
- "%-88s | %-124s | %s\n"
- "%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
- "%-5s %-5s %-8s %-6s %-8s %-12s "
- "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
- "%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
+ "%-96s | %-119s | %s\n"
+ "%-16s %-8s %-8s %-8s %-8s %-10s %-8s %-5s %-5s %-7s %3s | "
+ "%-5s %-5s %-9s %-6s %-8s %-12s "
+ "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-6s | "
+ "%-6s %-6s %-8s %-8s %-6s %-6s %-6s %-8s %-8s\n",
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
- "blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
- "atype", "flags", "count", "asize", "access",
+ "blkid", "offset", "dbsize", "meta", "state", "dbholds", "dbc",
+ "list", "atype", "flags", "count", "asize", "access",
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
"bsize", "lvls", "dholds", "blocks", "dsize");
@@ -75,10 +75,10 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
__dmu_object_info_from_dnode(dn, &doi);
nwritten = snprintf(buf, size,
- "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
- "%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu "
- "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
- "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
+ "%-16s %-8llu %-8lld %-8lld %-8lld %-10llu %-8llu %-5d %-5d "
+ "%-7lu %-3d | %-5d %-5d 0x%-7x %-6lu %-8llu %-12llu "
+ "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-6lu | "
+ "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-6lu %-8llu %-8llu\n",
/* dmu_buf_impl_t */
spa_name(dn->dn_objset->os_spa),
(u_longlong_t)dmu_objset_id(db->db_objset),
@@ -90,6 +90,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
!!dbuf_is_metadata(db),
db->db_state,
(ulong_t)refcount_count(&db->db_holds),
+ multilist_link_active(&db->db_cache_link),
/* arc_buf_info_t */
abi.abi_state_type,
abi.abi_state_contents,