aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dbuf.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/dbuf.c')
-rw-r--r--module/zfs/dbuf.c194
1 files changed, 180 insertions, 14 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 517a284de..87b9ba461 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -48,6 +48,87 @@
#include <sys/callb.h>
#include <sys/abd.h>
+kstat_t *dbuf_ksp;
+
+typedef struct dbuf_stats {
+ /*
+ * Various statistics about the size of the dbuf cache.
+ */
+ kstat_named_t cache_count;
+ kstat_named_t cache_size_bytes;
+ kstat_named_t cache_size_bytes_max;
+ /*
+ * Statistics regarding the bounds on the dbuf cache size.
+ */
+ kstat_named_t cache_target_bytes;
+ kstat_named_t cache_lowater_bytes;
+ kstat_named_t cache_hiwater_bytes;
+ /*
+ * Total number of dbuf cache evictions that have occurred.
+ */
+ kstat_named_t cache_total_evicts;
+ /*
+ * The distribution of dbuf levels in the dbuf cache and
+ * the total size of all dbufs at each level.
+ */
+ kstat_named_t cache_levels[DN_MAX_LEVELS];
+ kstat_named_t cache_levels_bytes[DN_MAX_LEVELS];
+ /*
+ * Statistics about the dbuf hash table.
+ */
+ kstat_named_t hash_hits;
+ kstat_named_t hash_misses;
+ kstat_named_t hash_collisions;
+ kstat_named_t hash_elements;
+ kstat_named_t hash_elements_max;
+ /*
+ * Number of sublists containing more than one dbuf in the dbuf
+ * hash table. Keep track of the longest hash chain.
+ */
+ kstat_named_t hash_chains;
+ kstat_named_t hash_chain_max;
+ /*
+ * Number of times a dbuf_create() discovers that a dbuf was
+ * already created and in the dbuf hash table.
+ */
+ kstat_named_t hash_insert_race;
+} dbuf_stats_t;
+
+dbuf_stats_t dbuf_stats = {
+ { "cache_count", KSTAT_DATA_UINT64 },
+ { "cache_size_bytes", KSTAT_DATA_UINT64 },
+ { "cache_size_bytes_max", KSTAT_DATA_UINT64 },
+ { "cache_target_bytes", KSTAT_DATA_UINT64 },
+ { "cache_lowater_bytes", KSTAT_DATA_UINT64 },
+ { "cache_hiwater_bytes", KSTAT_DATA_UINT64 },
+ { "cache_total_evicts", KSTAT_DATA_UINT64 },
+ { { "cache_levels_N", KSTAT_DATA_UINT64 } },
+ { { "cache_levels_bytes_N", KSTAT_DATA_UINT64 } },
+ { "hash_hits", KSTAT_DATA_UINT64 },
+ { "hash_misses", KSTAT_DATA_UINT64 },
+ { "hash_collisions", KSTAT_DATA_UINT64 },
+ { "hash_elements", KSTAT_DATA_UINT64 },
+ { "hash_elements_max", KSTAT_DATA_UINT64 },
+ { "hash_chains", KSTAT_DATA_UINT64 },
+ { "hash_chain_max", KSTAT_DATA_UINT64 },
+ { "hash_insert_race", KSTAT_DATA_UINT64 }
+};
+
+#define DBUF_STAT_INCR(stat, val) \
+ atomic_add_64(&dbuf_stats.stat.value.ui64, (val));
+#define DBUF_STAT_DECR(stat, val) \
+ DBUF_STAT_INCR(stat, -(val));
+#define DBUF_STAT_BUMP(stat) \
+ DBUF_STAT_INCR(stat, 1);
+#define DBUF_STAT_BUMPDOWN(stat) \
+ DBUF_STAT_INCR(stat, -1);
+#define DBUF_STAT_MAX(stat, v) { \
+ uint64_t _m; \
+ while ((v) > (_m = dbuf_stats.stat.value.ui64) && \
+ (_m != atomic_cas_64(&dbuf_stats.stat.value.ui64, _m, (v))))\
+ continue; \
+}
+
struct dbuf_hold_impl_data {
/* Function arguments */
dnode_t *dh_dn;
@@ -272,13 +353,15 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
int level = db->db_level;
uint64_t blkid, hv, idx;
dmu_buf_impl_t *dbf;
+ uint32_t i;
blkid = db->db_blkid;
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
mutex_enter(DBUF_HASH_MUTEX(h, idx));
- for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) {
+ for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
+ dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) {
@@ -289,11 +372,20 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
}
}
+ if (i > 0) {
+ DBUF_STAT_BUMP(hash_collisions);
+ if (i == 1)
+ DBUF_STAT_BUMP(hash_chains);
+
+ DBUF_STAT_MAX(hash_chain_max, i);
+ }
+
mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_inc_64(&dbuf_hash_count);
+ DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count);
return (NULL);
}
@@ -328,6 +420,9 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
}
*dbp = db->db_hash_next;
db->db_hash_next = NULL;
+ if (h->hash_table[idx] &&
+ h->hash_table[idx]->db_hash_next == NULL)
+ DBUF_STAT_BUMPDOWN(hash_chains);
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_hash_count);
}
@@ -469,28 +564,32 @@ dbuf_cache_target_bytes(void)
arc_target_bytes() >> dbuf_cache_max_shift);
}
-static inline boolean_t
-dbuf_cache_above_hiwater(void)
+static inline uint64_t
+dbuf_cache_hiwater_bytes(void)
{
uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+ return (dbuf_cache_target +
+ (dbuf_cache_target * dbuf_cache_hiwater_pct) / 100);
+}
- uint64_t dbuf_cache_hiwater_bytes =
- (dbuf_cache_target * dbuf_cache_hiwater_pct) / 100;
+static inline uint64_t
+dbuf_cache_lowater_bytes(void)
+{
+ uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+ return (dbuf_cache_target -
+ (dbuf_cache_target * dbuf_cache_lowater_pct) / 100);
+}
- return (refcount_count(&dbuf_cache_size) >
- dbuf_cache_target + dbuf_cache_hiwater_bytes);
+static inline boolean_t
+dbuf_cache_above_hiwater(void)
+{
+ return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes());
}
static inline boolean_t
dbuf_cache_above_lowater(void)
{
- uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
-
- uint64_t dbuf_cache_lowater_bytes =
- (dbuf_cache_target * dbuf_cache_lowater_pct) / 100;
-
- return (refcount_count(&dbuf_cache_size) >
- dbuf_cache_target - dbuf_cache_lowater_bytes);
+ return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes());
}
/*
@@ -525,7 +624,14 @@ dbuf_evict_one(void)
multilist_sublist_unlock(mls);
(void) refcount_remove_many(&dbuf_cache_size,
db->db.db_size, db);
+ DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
dbuf_destroy(db);
+ DBUF_STAT_MAX(cache_size_bytes_max,
+ refcount_count(&dbuf_cache_size));
+ DBUF_STAT_BUMP(cache_total_evicts);
} else {
multilist_sublist_unlock(mls);
}
@@ -618,7 +724,24 @@ dbuf_evict_notify(void)
}
}
+static int
+dbuf_kstat_update(kstat_t *ksp, int rw)
+{
+ dbuf_stats_t *ds = ksp->ks_data;
+ if (rw == KSTAT_WRITE) {
+ return (SET_ERROR(EACCES));
+ } else {
+ ds->cache_size_bytes.value.ui64 =
+ refcount_count(&dbuf_cache_size);
+ ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
+ ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
+ ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
+ ds->hash_elements.value.ui64 = dbuf_hash_count;
+ }
+
+ return (0);
+}
void
dbuf_init(void)
@@ -687,6 +810,26 @@ retry:
cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL);
dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
NULL, 0, &p0, TS_RUN, minclsyspri);
+
+ dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc",
+ KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (dbuf_ksp != NULL) {
+ dbuf_ksp->ks_data = &dbuf_stats;
+ dbuf_ksp->ks_update = dbuf_kstat_update;
+ kstat_install(dbuf_ksp);
+
+ for (i = 0; i < DN_MAX_LEVELS; i++) {
+ snprintf(dbuf_stats.cache_levels[i].name,
+ KSTAT_STRLEN, "cache_level_%d", i);
+ dbuf_stats.cache_levels[i].data_type =
+ KSTAT_DATA_UINT64;
+ snprintf(dbuf_stats.cache_levels_bytes[i].name,
+ KSTAT_STRLEN, "cache_level_%d_bytes", i);
+ dbuf_stats.cache_levels_bytes[i].data_type =
+ KSTAT_DATA_UINT64;
+ }
+ }
}
void
@@ -725,6 +868,11 @@ dbuf_fini(void)
refcount_destroy(&dbuf_cache_size);
multilist_destroy(dbuf_cache);
+
+ if (dbuf_ksp != NULL) {
+ kstat_delete(dbuf_ksp);
+ dbuf_ksp = NULL;
+ }
}
/*
@@ -1268,6 +1416,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
+ DBUF_STAT_BUMP(hash_hits);
} else if (db->db_state == DB_UNCACHED) {
spa_t *spa = dn->dn_objset->os_spa;
boolean_t need_wait = B_FALSE;
@@ -1287,6 +1436,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
+ DBUF_STAT_BUMP(hash_misses);
if (!err && need_wait)
err = zio_wait(zio);
@@ -1305,6 +1455,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
+ DBUF_STAT_BUMP(hash_misses);
/* Skip the wait per the caller's request. */
mutex_enter(&db->db_mtx);
@@ -2231,6 +2382,10 @@ dbuf_destroy(dmu_buf_impl_t *db)
multilist_remove(dbuf_cache, db);
(void) refcount_remove_many(&dbuf_cache_size,
db->db.db_size, db);
+ DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
}
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
@@ -2458,6 +2613,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
/* someone else inserted it first */
kmem_cache_free(dbuf_kmem_cache, db);
mutex_exit(&dn->dn_dbufs_mtx);
+ DBUF_STAT_BUMP(hash_insert_race);
return (odb);
}
avl_add(&dn->dn_dbufs, db);
@@ -2847,6 +3003,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
multilist_remove(dbuf_cache, dh->dh_db);
(void) refcount_remove_many(&dbuf_cache_size,
dh->dh_db->db.db_size, dh->dh_db);
+ DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
+ dh->dh_db->db.db_size);
}
(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
DBUF_VERIFY(dh->dh_db);
@@ -3118,6 +3278,12 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
multilist_insert(dbuf_cache, db);
(void) refcount_add_many(&dbuf_cache_size,
db->db.db_size, db);
+ DBUF_STAT_BUMP(cache_levels[db->db_level]);
+ DBUF_STAT_BUMP(cache_count);
+ DBUF_STAT_INCR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
+ DBUF_STAT_MAX(cache_size_bytes_max,
+ refcount_count(&dbuf_cache_size));
mutex_exit(&db->db_mtx);
dbuf_evict_notify();