aboutsummaryrefslogtreecommitdiffstats
path: root/cmd/zdb
diff options
context:
space:
mode:
authorRob Norris <[email protected]>2023-06-22 17:46:22 +1000
committerBrian Behlendorf <[email protected]>2024-08-16 12:03:35 -0700
commitcd69ba3d49cdb939cba87e7fd6814608532df92f (patch)
tree32d51c27ae62b5145e5b9fd99b00930ff3c22f95 /cmd/zdb
parentcbb9ef0a4c8e04358f7d5ddae0eb99d0f703ee21 (diff)
ddt: dedup log
Adds a log/journal to dedup. At the end of txg, instead of writing the entry directly to the ZAP, instead its adding to an in-memory tree and appended to an on-disk object. The on-disk object is only read at import, to reload the in-memory tree. Lookups first go the the log tree before going to the ZAP, so recently-used entries will remain close by in memory. This vastly reduces overhead from dedup IO, as it will not have to do so many read/update/write cycles on ZAP leaf nodes. A flushing facility is added at end of txg, to push logged entries out to the ZAP. There's actually two separate "logs" (in-memory tree and on-disk object), one active (recieving updated entries) and one flushing (writing out to disk). These are swapped (ie flushing begins) based on memory used by the in-memory log trees and time since we last flushed something. The flushing facility monitors the amount of entries coming in and being flushed out, and calibrates itself to try to flush enough each txg to keep up with the ingest rate without competing too much with other IO. Multiple tuneables are provided to control the flushing facility. All the histograms and stats are update to accomodate the log as a separate entry store. zdb gains knowledge of how to count them and dump them. Documentation included! Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Co-authored-by: Allan Jude <[email protected]> Signed-off-by: Rob Norris <[email protected]> Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15895
Diffstat (limited to 'cmd/zdb')
-rw-r--r--cmd/zdb/zdb.c33
1 files changed, 32 insertions, 1 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 250052adf..c72df3909 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -1959,6 +1959,32 @@ dump_dedup_ratio(const ddt_stat_t *dds)
}
static void
+dump_ddt_log(ddt_t *ddt)
+{
+ for (int n = 0; n < 2; n++) {
+ ddt_log_t *ddl = &ddt->ddt_log[n];
+
+ uint64_t count = avl_numnodes(&ddl->ddl_tree);
+ if (count == 0)
+ continue;
+
+ printf(DMU_POOL_DDT_LOG ": %lu log entries\n",
+ zio_checksum_table[ddt->ddt_checksum].ci_name, n, count);
+
+ if (dump_opt['D'] < 4)
+ continue;
+
+ ddt_lightweight_entry_t ddlwe;
+ uint64_t index = 0;
+ for (ddt_log_entry_t *ddle = avl_first(&ddl->ddl_tree);
+ ddle; ddle = AVL_NEXT(&ddl->ddl_tree, ddle)) {
+ DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &ddlwe);
+ dump_ddt_entry(ddt, &ddlwe, index++);
+ }
+ }
+}
+
+static void
dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
char name[DDT_NAMELEN];
@@ -2027,6 +2053,7 @@ dump_all_ddts(spa_t *spa)
dump_ddt(ddt, type, class);
}
}
+ dump_ddt_log(ddt);
}
ddt_get_dedup_stats(spa, &dds_total);
@@ -5743,7 +5770,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
(void *)(((uintptr_t)dde->dde_io) | (1 << v));
/* Consume a reference for this block. */
- VERIFY3U(ddt_phys_total_refcnt(ddt, dde), >, 0);
+ VERIFY3U(ddt_phys_total_refcnt(ddt, dde->dde_phys), >, 0);
ddt_phys_decref(dde->dde_phys, v);
/*
@@ -8120,6 +8147,10 @@ dump_mos_leaks(spa_t *spa)
/* FDT container */
mos_obj_refd(ddt->ddt_dir_object);
+
+ /* FDT log objects */
+ mos_obj_refd(ddt->ddt_log[0].ddl_object);
+ mos_obj_refd(ddt->ddt_log[1].ddl_object);
}
if (spa->spa_brt != NULL) {