aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2020-07-14 17:51:05 -0700
committerGitHub <[email protected]>2020-07-14 17:51:05 -0700
commit6774931dfa9e90a88d77d54108eccc26cc2af893 (patch)
tree12c26da757773da0e5377ba0a1af5af0369fe677 /cmd
parent38e2e9ce8327284b2d516874308300aa96102fe9 (diff)
Extend zdb to print inconsistencies in livelists and metaslabs
Livelists and spacemaps are data structures that are logs of allocations and frees. Livelists entries are block pointers (blkptr_t). Spacemaps entries are ranges of numbers, most often used as to track allocated/freed regions of metaslabs/vdevs. These data structures can become self-inconsistent, for example if a block or range can be "double allocated" (two allocation records without an intervening free) or "double freed" (two free records without an intervening allocation). ZDB (as well as zfs running in the kernel) can detect these inconsistencies when loading livelists and metaslab. However, it generally halts processing when the error is detected. When analyzing an on-disk problem, we often want to know the entire set of inconsistencies, which is not possible with the current behavior. This commit adds a new flag, `zdb -y`, which analyzes the livelist and metaslab data structures and displays all of their inconsistencies. Note that this is different from the leak detection performed by `zdb -b`, which checks for inconsistencies between the spacemaps and the tree of block pointers, but assumes the spacemaps are self-consistent. The specific checks added are: Verify livelists by iterating through each sublivelists and: - report leftover FREEs - report double ALLOCs and double FREEs - record leftover ALLOCs together with their TXG [see Cross Check] Verify spacemaps by iterating over each metaslab and: - iterate over spacemap and then the metaslab's entries in the spacemap log, then report any double FREEs and double ALLOCs Verify that livelists are consistenet with spacemaps. The space referenced by livelists (after using the FREE's to cancel out corresponding ALLOCs) should be allocated, according to the spacemaps. Reviewed-by: Serapheim Dimitropoulos <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Co-authored-by: Sara Hartse <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> External-issue: DLPX-66031 Closes #10515
Diffstat (limited to 'cmd')
-rw-r--r--cmd/zdb/zdb.c654
-rw-r--r--cmd/ztest/ztest.c2
2 files changed, 599 insertions, 57 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index a329e4a83..59b17132f 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -69,6 +69,7 @@
#include <sys/blkptr.h>
#include <sys/dsl_crypt.h>
#include <sys/dsl_scan.h>
+#include <sys/btree.h>
#include <zfs_comutil.h>
#include <libnvpair.h>
@@ -151,6 +152,571 @@ static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
boolean_t);
static void mos_obj_refd(uint64_t);
static void mos_obj_refd_multiple(uint64_t);
+static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
+ dmu_tx_t *tx);
+
+typedef struct sublivelist_verify {
+ /* all ALLOC'd blkptr_t in one sub-livelist */
+ zfs_btree_t sv_all_allocs;
+
+ /* all FREE'd blkptr_t in one sub-livelist */
+ zfs_btree_t sv_all_frees;
+
+ /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
+ zfs_btree_t sv_pair;
+
+ /* ALLOC's without a matching FREE, accumulates across sub-livelists */
+ zfs_btree_t sv_leftover;
+} sublivelist_verify_t;
+
+static int
+livelist_compare(const void *larg, const void *rarg)
+{
+ const blkptr_t *l = larg;
+ const blkptr_t *r = rarg;
+
+ /* Sort them according to dva[0] */
+ uint64_t l_dva0_vdev, r_dva0_vdev;
+ l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
+ r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
+ if (l_dva0_vdev < r_dva0_vdev)
+ return (-1);
+ else if (l_dva0_vdev > r_dva0_vdev)
+ return (+1);
+
+ /* if vdevs are equal, sort by offsets. */
+ uint64_t l_dva0_offset;
+ uint64_t r_dva0_offset;
+ l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
+ r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
+ if (l_dva0_offset < r_dva0_offset) {
+ return (-1);
+ } else if (l_dva0_offset > r_dva0_offset) {
+ return (+1);
+ }
+
+ /*
+ * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
+ * it's possible the offsets are equal. In that case, sort by txg
+ */
+ if (l->blk_birth < r->blk_birth) {
+ return (-1);
+ } else if (l->blk_birth > r->blk_birth) {
+ return (+1);
+ }
+ return (0);
+}
+
+typedef struct sublivelist_verify_block {
+ dva_t svb_dva;
+
+ /*
+ * We need this to check if the block marked as allocated
+ * in the livelist was freed (and potentially reallocated)
+ * in the metaslab spacemaps at a later TXG.
+ */
+ uint64_t svb_allocated_txg;
+} sublivelist_verify_block_t;
+
+static void zdb_print_blkptr(const blkptr_t *bp, int flags);
+
+static int
+sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
+ dmu_tx_t *tx)
+{
+ ASSERT3P(tx, ==, NULL);
+ struct sublivelist_verify *sv = arg;
+ char blkbuf[BP_SPRINTF_LEN];
+ zfs_btree_index_t where;
+ if (free) {
+ zfs_btree_add(&sv->sv_pair, bp);
+ /* Check if the FREE is a duplicate */
+ if (zfs_btree_find(&sv->sv_all_frees, bp, &where) != NULL) {
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp,
+ free);
+ (void) printf("\tERROR: Duplicate FREE: %s\n", blkbuf);
+ } else {
+ zfs_btree_add_idx(&sv->sv_all_frees, bp, &where);
+ }
+ } else {
+ /* Check if the ALLOC has been freed */
+ if (zfs_btree_find(&sv->sv_pair, bp, &where) != NULL) {
+ zfs_btree_remove_idx(&sv->sv_pair, &where);
+ } else {
+ for (int i = 0; i < SPA_DVAS_PER_BP; i++) {
+ if (DVA_IS_EMPTY(&bp->blk_dva[i]))
+ break;
+ sublivelist_verify_block_t svb = {
+ .svb_dva = bp->blk_dva[i],
+ .svb_allocated_txg = bp->blk_birth
+ };
+
+ if (zfs_btree_find(&sv->sv_leftover, &svb,
+ &where) == NULL) {
+ zfs_btree_add_idx(&sv->sv_leftover,
+ &svb, &where);
+ }
+ }
+ }
+ /* Check if the ALLOC is a duplicate */
+ if (zfs_btree_find(&sv->sv_all_allocs, bp, &where) != NULL) {
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp,
+ free);
+ (void) printf("\tERROR: Duplicate ALLOC: %s\n", blkbuf);
+ } else {
+ zfs_btree_add_idx(&sv->sv_all_allocs, bp, &where);
+ }
+ }
+ return (0);
+}
+
+static int
+sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
+{
+ int err;
+ char blkbuf[BP_SPRINTF_LEN];
+ struct sublivelist_verify *sv = args;
+
+ zfs_btree_create(&sv->sv_all_allocs, livelist_compare,
+ sizeof (blkptr_t));
+
+ zfs_btree_create(&sv->sv_all_frees, livelist_compare,
+ sizeof (blkptr_t));
+
+ zfs_btree_create(&sv->sv_pair, livelist_compare,
+ sizeof (blkptr_t));
+
+ err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
+ sv, NULL);
+
+ zfs_btree_clear(&sv->sv_all_allocs);
+ zfs_btree_destroy(&sv->sv_all_allocs);
+
+ zfs_btree_clear(&sv->sv_all_frees);
+ zfs_btree_destroy(&sv->sv_all_frees);
+
+ blkptr_t *e;
+ zfs_btree_index_t *cookie = NULL;
+ while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) {
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), e, B_TRUE);
+ (void) printf("\tERROR: Unmatched FREE: %s\n", blkbuf);
+ }
+ zfs_btree_destroy(&sv->sv_pair);
+
+ return (err);
+}
+
+static int
+livelist_block_compare(const void *larg, const void *rarg)
+{
+ const sublivelist_verify_block_t *l = larg;
+ const sublivelist_verify_block_t *r = rarg;
+
+ if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva))
+ return (-1);
+ else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva))
+ return (+1);
+
+ if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva))
+ return (-1);
+ else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva))
+ return (+1);
+
+ if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva))
+ return (-1);
+ else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva))
+ return (+1);
+
+ return (0);
+}
+
+/*
+ * Check for errors in a livelist while tracking all unfreed ALLOCs in the
+ * sublivelist_verify_t: sv->sv_leftover
+ */
+static void
+livelist_verify(dsl_deadlist_t *dl, void *arg)
+{
+ sublivelist_verify_t *sv = arg;
+ dsl_deadlist_iterate(dl, sublivelist_verify_func, sv);
+}
+
+/*
+ * Check for errors in the livelist entry and discard the intermediary
+ * data structures
+ */
+/* ARGSUSED */
+static int
+sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
+{
+ sublivelist_verify_t sv;
+ zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+ sizeof (sublivelist_verify_block_t));
+ int err = sublivelist_verify_func(&sv, dle);
+ zfs_btree_clear(&sv.sv_leftover);
+ zfs_btree_destroy(&sv.sv_leftover);
+ return (err);
+}
+
+typedef struct metaslab_verify {
+ /*
+ * Tree containing all the leftover ALLOCs from the livelists
+ * that are part of this metaslab.
+ */
+ zfs_btree_t mv_livelist_allocs;
+
+ /*
+ * Metaslab information.
+ */
+ uint64_t mv_vdid;
+ uint64_t mv_msid;
+ uint64_t mv_start;
+ uint64_t mv_end;
+
+ /*
+ * What's currently allocated for this metaslab.
+ */
+ range_tree_t *mv_allocated;
+} metaslab_verify_t;
+
+typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
+
+typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg,
+ void *arg);
+
+typedef struct unflushed_iter_cb_arg {
+ spa_t *uic_spa;
+ uint64_t uic_txg;
+ void *uic_arg;
+ zdb_log_sm_cb_t uic_cb;
+} unflushed_iter_cb_arg_t;
+
+static int
+iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
+{
+ unflushed_iter_cb_arg_t *uic = arg;
+ return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
+}
+
+static void
+iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
+{
+ if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
+ return;
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+ for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+ sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+ space_map_t *sm = NULL;
+ VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
+ sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
+
+ unflushed_iter_cb_arg_t uic = {
+ .uic_spa = spa,
+ .uic_txg = sls->sls_txg,
+ .uic_arg = arg,
+ .uic_cb = cb
+ };
+ VERIFY0(space_map_iterate(sm, space_map_length(sm),
+ iterate_through_spacemap_logs_cb, &uic));
+ space_map_close(sm);
+ }
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+}
+
+static void
+verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
+ uint64_t offset, uint64_t size)
+{
+ sublivelist_verify_block_t svb;
+ DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
+ DVA_SET_OFFSET(&svb.svb_dva, offset);
+ DVA_SET_ASIZE(&svb.svb_dva, size);
+ zfs_btree_index_t where;
+ uint64_t end_offset = offset + size;
+
+ /*
+ * Look for an exact match for spacemap entry in the livelist entries.
+ * Then, look for other livelist entries that fall within the range
+ * of the spacemap entry as it may have been condensed
+ */
+ sublivelist_verify_block_t *found =
+ zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where);
+ if (found == NULL) {
+ found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where);
+ }
+ for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid &&
+ DVA_GET_OFFSET(&found->svb_dva) < end_offset;
+ found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
+ if (found->svb_allocated_txg <= txg) {
+ (void) printf("ERROR: Livelist ALLOC [%llx:%llx] "
+ "from TXG %llx FREED at TXG %llx\n",
+ (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
+ (u_longlong_t)found->svb_allocated_txg,
+ (u_longlong_t)txg);
+ }
+ }
+}
+
+static int
+metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
+{
+ metaslab_verify_t *mv = arg;
+ uint64_t offset = sme->sme_offset;
+ uint64_t size = sme->sme_run;
+ uint64_t txg = sme->sme_txg;
+
+ if (sme->sme_type == SM_ALLOC) {
+ if (range_tree_contains(mv->mv_allocated,
+ offset, size)) {
+ (void) printf("ERROR: DOUBLE ALLOC: "
+ "%llu [%llx:%llx] "
+ "%llu:%llu LOG_SM\n",
+ (u_longlong_t)txg, (u_longlong_t)offset,
+ (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
+ (u_longlong_t)mv->mv_msid);
+ } else {
+ range_tree_add(mv->mv_allocated,
+ offset, size);
+ }
+ } else {
+ if (!range_tree_contains(mv->mv_allocated,
+ offset, size)) {
+ (void) printf("ERROR: DOUBLE FREE: "
+ "%llu [%llx:%llx] "
+ "%llu:%llu LOG_SM\n",
+ (u_longlong_t)txg, (u_longlong_t)offset,
+ (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
+ (u_longlong_t)mv->mv_msid);
+ } else {
+ range_tree_remove(mv->mv_allocated,
+ offset, size);
+ }
+ }
+
+ if (sme->sme_type != SM_ALLOC) {
+ /*
+ * If something is freed in the spacemap, verify that
+ * it is not listed as allocated in the livelist.
+ */
+ verify_livelist_allocs(mv, txg, offset, size);
+ }
+ return (0);
+}
+
+static int
+spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme,
+ uint64_t txg, void *arg)
+{
+ metaslab_verify_t *mv = arg;
+ uint64_t offset = sme->sme_offset;
+ uint64_t vdev_id = sme->sme_vdev;
+
+ vdev_t *vd = vdev_lookup_top(spa, vdev_id);
+
+ /* skip indirect vdevs */
+ if (!vdev_is_concrete(vd))
+ return (0);
+
+ if (vdev_id != mv->mv_vdid)
+ return (0);
+
+ metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+ if (ms->ms_id != mv->mv_msid)
+ return (0);
+
+ if (txg < metaslab_unflushed_txg(ms))
+ return (0);
+
+
+ ASSERT3U(txg, ==, sme->sme_txg);
+ return (metaslab_spacemap_validation_cb(sme, mv));
+}
+
+static void
+spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv)
+{
+ iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv);
+}
+
+static void
+spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv)
+{
+ if (sm == NULL)
+ return;
+
+ VERIFY0(space_map_iterate(sm, space_map_length(sm),
+ metaslab_spacemap_validation_cb, mv));
+}
+
+static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg);
+
+/*
+ * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if
+ * they are part of that metaslab (mv_msid).
+ */
+static void
+mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
+{
+ zfs_btree_index_t where;
+ sublivelist_verify_block_t *svb;
+ ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0);
+ for (svb = zfs_btree_first(&sv->sv_leftover, &where);
+ svb != NULL;
+ svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) {
+ if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid)
+ continue;
+
+ if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start &&
+ (DVA_GET_OFFSET(&svb->svb_dva) +
+ DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) {
+ (void) printf("ERROR: Found block that crosses "
+ "metaslab boundary: <%llu:%llx:%llx>\n",
+ (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+ continue;
+ }
+
+ if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start)
+ continue;
+
+ if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end)
+ continue;
+
+ if ((DVA_GET_OFFSET(&svb->svb_dva) +
+ DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) {
+ (void) printf("ERROR: Found block that crosses "
+ "metaslab boundary: <%llu:%llx:%llx>\n",
+ (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+ continue;
+ }
+
+ zfs_btree_add(&mv->mv_livelist_allocs, svb);
+ }
+
+ for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where);
+ svb != NULL;
+ svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
+ zfs_btree_remove(&sv->sv_leftover, svb);
+ }
+}
+
+/*
+ * [Livelist Check]
+ * Iterate through all the sublivelists and:
+ * - report leftover frees
+ * - report double ALLOCs/FREEs
+ * - record leftover ALLOCs together with their TXG [see Cross Check]
+ *
+ * [Spacemap Check]
+ * for each metaslab:
+ * - iterate over spacemap and then the metaslab's entries in the
+ * spacemap log, then report any double FREEs and ALLOCs (do not
+ * blow up).
+ *
+ * [Cross Check]
+ * After finishing the Livelist Check phase and while being in the
+ * Spacemap Check phase, we find all the recorded leftover ALLOCs
+ * of the livelist check that are part of the metaslab that we are
+ * currently looking at in the Spacemap Check. We report any entries
+ * that are marked as ALLOCs in the livelists but have been actually
+ * freed (and potentially allocated again) after their TXG stamp in
+ * the spacemaps. Also report any ALLOCs from the livelists that
+ * belong to indirect vdevs (e.g. their vdev completed removal).
+ *
+ * Note that this will miss Log Spacemap entries that cancelled each other
+ * out before being flushed to the metaslab, so we are not guaranteed
+ * to match all erroneous ALLOCs.
+ */
+static void
+livelist_metaslab_validate(spa_t *spa)
+{
+ (void) printf("Verifying deleted livelist entries\n");
+
+ sublivelist_verify_t sv;
+ zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
+ sizeof (sublivelist_verify_block_t));
+ iterate_deleted_livelists(spa, livelist_verify, &sv);
+
+ (void) printf("Verifying metaslab entries\n");
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+
+ if (!vdev_is_concrete(vd))
+ continue;
+
+ for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) {
+ metaslab_t *m = vd->vdev_ms[mid];
+
+ (void) fprintf(stderr,
+ "\rverifying concrete vdev %llu, "
+ "metaslab %llu of %llu ...",
+ (longlong_t)vd->vdev_id,
+ (longlong_t)mid,
+ (longlong_t)vd->vdev_ms_count);
+
+ uint64_t shift, start;
+ range_seg_type_t type =
+ metaslab_calculate_range_tree_type(vd, m,
+ &start, &shift);
+ metaslab_verify_t mv;
+ mv.mv_allocated = range_tree_create(NULL,
+ type, NULL, start, shift);
+ mv.mv_vdid = vd->vdev_id;
+ mv.mv_msid = m->ms_id;
+ mv.mv_start = m->ms_start;
+ mv.mv_end = m->ms_start + m->ms_size;
+ zfs_btree_create(&mv.mv_livelist_allocs,
+ livelist_block_compare,
+ sizeof (sublivelist_verify_block_t));
+
+ mv_populate_livelist_allocs(&mv, &sv);
+
+ spacemap_check_ms_sm(m->ms_sm, &mv);
+ spacemap_check_sm_log(spa, &mv);
+
+ range_tree_vacate(mv.mv_allocated, NULL, NULL);
+ range_tree_destroy(mv.mv_allocated);
+ zfs_btree_clear(&mv.mv_livelist_allocs);
+ zfs_btree_destroy(&mv.mv_livelist_allocs);
+ }
+ }
+ (void) fprintf(stderr, "\n");
+
+ /*
+ * If there are any segments in the leftover tree after we walked
+ * through all the metaslabs in the concrete vdevs then this means
+ * that we have segments in the livelists that belong to indirect
+ * vdevs and are marked as allocated.
+ */
+ if (zfs_btree_numnodes(&sv.sv_leftover) == 0) {
+ zfs_btree_destroy(&sv.sv_leftover);
+ return;
+ }
+ (void) printf("ERROR: Found livelist blocks marked as allocated "
+ "for indirect vdevs:\n");
+
+ zfs_btree_index_t *where = NULL;
+ sublivelist_verify_block_t *svb;
+ while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) !=
+ NULL) {
+ int vdev_id = DVA_GET_VDEV(&svb->svb_dva);
+ ASSERT3U(vdev_id, <, rvd->vdev_children);
+ vdev_t *vd = rvd->vdev_child[vdev_id];
+ ASSERT(!vdev_is_concrete(vd));
+ (void) printf("<%d:%llx:%llx> TXG %llx\n",
+ vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
+ (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva),
+ (u_longlong_t)svb->svb_allocated_txg);
+ }
+ (void) printf("\n");
+ zfs_btree_destroy(&sv.sv_leftover);
+}
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -172,7 +738,7 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] "
+ "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] "
"[-I <inflight I/Os>]\n"
"\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
"\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
@@ -234,7 +800,9 @@ usage(void)
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
(void) fprintf(stderr, " -S simulate dedup to measure effect\n");
(void) fprintf(stderr, " -v verbose (applies to all "
- "others)\n\n");
+ "others)\n");
+ (void) fprintf(stderr, " -y perform livelist and metaslab "
+ "validation on any livelists being deleted\n\n");
(void) fprintf(stderr, " Below options are intended for use "
"with other options:\n");
(void) fprintf(stderr, " -A ignore assertions (-A), enable "
@@ -926,11 +1494,20 @@ dump_spacemap(objset_t *os, space_map_t *sm)
sizeof (word), &word, DMU_READ_PREFETCH));
if (sm_entry_is_debug(word)) {
- (void) printf("\t [%6llu] %s: txg %llu pass %llu\n",
- (u_longlong_t)entry_id,
- ddata[SM_DEBUG_ACTION_DECODE(word)],
- (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
- (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ uint64_t de_txg = SM_DEBUG_TXG_DECODE(word);
+ uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word);
+ if (de_txg == 0) {
+ (void) printf(
+ "\t [%6llu] PADDING\n",
+ (u_longlong_t)entry_id);
+ } else {
+ (void) printf(
+ "\t [%6llu] %s: txg %llu pass %llu\n",
+ (u_longlong_t)entry_id,
+ ddata[SM_DEBUG_ACTION_DECODE(word)],
+ (u_longlong_t)de_txg,
+ (u_longlong_t)de_sync_pass);
+ }
entry_id++;
continue;
}
@@ -2214,6 +2791,11 @@ verify_dd_livelist(objset_t *os)
ASSERT(!dmu_objset_is_snapshot(os));
if (!dsl_deadlist_is_open(&dd->dd_livelist))
return (0);
+
+ /* Iterate through the livelist to check for duplicates */
+ dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight,
+ NULL);
+
dsl_pool_config_enter(dp, FTAG);
dsl_deadlist_space(&dd->dd_livelist, &ll_used,
&ll_comp, &ll_uncomp);
@@ -4652,50 +5234,6 @@ static metaslab_ops_t zdb_metaslab_ops = {
NULL /* alloc */
};
-typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme,
- uint64_t txg, void *arg);
-
-typedef struct unflushed_iter_cb_arg {
- spa_t *uic_spa;
- uint64_t uic_txg;
- void *uic_arg;
- zdb_log_sm_cb_t uic_cb;
-} unflushed_iter_cb_arg_t;
-
-static int
-iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
-{
- unflushed_iter_cb_arg_t *uic = arg;
- return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
-}
-
-static void
-iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
-{
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
-
- spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
- for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
- sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
- space_map_t *sm = NULL;
- VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
- sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
-
- unflushed_iter_cb_arg_t uic = {
- .uic_spa = spa,
- .uic_txg = sls->sls_txg,
- .uic_arg = arg,
- .uic_cb = cb
- };
-
- VERIFY0(space_map_iterate(sm, space_map_length(sm),
- iterate_through_spacemap_logs_cb, &uic));
- space_map_close(sm);
- }
- spa_config_exit(spa, SCL_CONFIG, FTAG);
-}
-
/* ARGSUSED */
static int
load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme,
@@ -5443,8 +5981,6 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
* Iterate over livelists which have been destroyed by the user but
* are still present in the MOS, waiting to be freed
*/
-typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
-
static void
iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg)
{
@@ -5515,6 +6051,7 @@ dump_livelist_cb(dsl_deadlist_t *ll, void *arg)
ASSERT3P(arg, ==, NULL);
global_feature_count[SPA_FEATURE_LIVELIST]++;
dump_blkptr_list(ll, "Deleted Livelist");
+ dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL);
}
/*
@@ -6780,6 +7317,10 @@ dump_zpool(spa_t *spa)
dsl_pool_t *dp = spa_get_dsl(spa);
int rc = 0;
+ if (dump_opt['y']) {
+ livelist_metaslab_validate(spa);
+ }
+
if (dump_opt['S']) {
dump_simulated_ddt(spa);
return;
@@ -6925,7 +7466,7 @@ static int flagbits[256];
static char flagbitstr[16];
static void
-zdb_print_blkptr(blkptr_t *bp, int flags)
+zdb_print_blkptr(const blkptr_t *bp, int flags)
{
char blkbuf[BP_SPRINTF_LEN];
@@ -7537,7 +8078,7 @@ main(int argc, char **argv)
zfs_btree_verify_intensity = 3;
while ((c = getopt(argc, argv,
- "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XY")) != -1) {
+ "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYy")) != -1) {
switch (c) {
case 'b':
case 'c':
@@ -7556,6 +8097,7 @@ main(int argc, char **argv)
case 's':
case 'S':
case 'u':
+ case 'y':
dump_opt[c]++;
dump_all = 0;
break;
@@ -7698,7 +8240,7 @@ main(int argc, char **argv)
verbose = MAX(verbose, 1);
for (c = 0; c < 256; c++) {
- if (dump_all && strchr("AeEFklLOPRSX", c) == NULL)
+ if (dump_all && strchr("AeEFklLOPRSXy", c) == NULL)
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index ca38271cc..0a3653f7f 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -6469,7 +6469,7 @@ ztest_run_zdb(char *pool)
ztest_get_zdb_bin(bin, len);
(void) sprintf(zdb,
- "%s -bcc%s%s -G -d -Y -e -p %s %s",
+ "%s -bcc%s%s -G -d -Y -e -y -p %s %s",
bin,
ztest_opts.zo_verbose >= 3 ? "s" : "",
ztest_opts.zo_verbose >= 4 ? "v" : "",