summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerapheim Dimitropoulos <[email protected]>2019-01-30 09:54:27 -0800
committerBrian Behlendorf <[email protected]>2019-01-30 09:54:27 -0800
commit21e7cf5da89f55ce98ec1115726b150e19eefe89 (patch)
tree0355140529f133643b1cdb9a9f3da417d5a3a2e7
parent466f55334ac9b353948d4309bbcd44573e9a3024 (diff)
zdb -L should skip leak detection altogether
Currently the point of -L option in zdb is to disable leak tracing and the loading of space maps because they are expensive, yet still do leak detection in terms of space. Unfortunately, there is a scenario where this is a lie. If we are using zdb -L on a pool where a vdev is being removed, zdb_claim_removing() will open the metaslab space maps of that device. This patch makes it so zdb -L skips leak detection altogether and ensures that no space maps are loaded. Reviewed-by: Matt Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Serapheim Dimitropoulos <[email protected]> Closes #8335
-rw-r--r--cmd/zdb/zdb.c251
-rw-r--r--man/man8/zdb.84
2 files changed, 134 insertions, 121 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index c6707325c..5ef69790d 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -799,12 +799,15 @@ dump_spacemap(objset_t *os, space_map_t *sm)
(void) printf(" smp_alloc = 0x%llx\n",
(longlong_t)sm->sm_phys->smp_alloc);
+ if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
+ return;
+
/*
* Print out the freelist entries in both encoded and decoded form.
*/
uint8_t mapshift = sm->sm_shift;
int64_t alloc = 0;
- uint64_t word;
+ uint64_t word, entry_id = 0;
for (uint64_t offset = 0; offset < space_map_length(sm);
offset += sizeof (word)) {
@@ -812,11 +815,12 @@ dump_spacemap(objset_t *os, space_map_t *sm)
sizeof (word), &word, DMU_READ_PREFETCH));
if (sm_entry_is_debug(word)) {
- (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
- (u_longlong_t)(offset / sizeof (word)),
+ (void) printf("\t [%6llu] %s: txg %llu pass %llu\n",
+ (u_longlong_t)entry_id,
ddata[SM_DEBUG_ACTION_DECODE(word)],
(u_longlong_t)SM_DEBUG_TXG_DECODE(word),
(u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ entry_id++;
continue;
}
@@ -854,7 +858,7 @@ dump_spacemap(objset_t *os, space_map_t *sm)
(void) printf("\t [%6llu] %c range:"
" %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
- (u_longlong_t)(offset / sizeof (word)),
+ (u_longlong_t)entry_id,
entry_type, (u_longlong_t)entry_off,
(u_longlong_t)(entry_off + entry_run),
(u_longlong_t)entry_run,
@@ -864,6 +868,7 @@ dump_spacemap(objset_t *os, space_map_t *sm)
alloc += entry_run;
else
alloc -= entry_run;
+ entry_id++;
}
if ((uint64_t)alloc != space_map_allocated(sm)) {
(void) printf("space_map_object alloc (%lld) INCONSISTENT "
@@ -929,11 +934,8 @@ dump_metaslab(metaslab_t *msp)
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}
- if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
- ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
-
- dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
- }
+ ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
+ dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
}
static void
@@ -3599,6 +3601,9 @@ claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
static void
zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
{
+ if (dump_opt['L'])
+ return;
+
if (spa->spa_vdev_removal == NULL)
return;
@@ -3708,6 +3713,8 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
int error;
int p;
+ ASSERT(!dump_opt['L']);
+
bzero(&ddb, sizeof (ddb));
while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
blkptr_t blk;
@@ -3731,12 +3738,10 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
zcb->zcb_dedup_blocks++;
}
}
- if (!dump_opt['L']) {
- ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
- ddt_enter(ddt);
- VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
- ddt_exit(ddt);
- }
+ ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+ ddt_enter(ddt);
+ VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
+ ddt_exit(ddt);
}
ASSERT(error == ENOENT);
@@ -3840,6 +3845,8 @@ zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
static void
zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
{
+ ASSERT(!dump_opt['L']);
+
vdev_t *rvd = spa->spa_root_vdev;
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
@@ -3936,6 +3943,8 @@ load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
static void
zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
{
+ ASSERT(!dump_opt['L']);
+
vdev_t *rvd = spa->spa_root_vdev;
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
@@ -3982,67 +3991,63 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
{
zcb->zcb_spa = spa;
- if (!dump_opt['L']) {
- dsl_pool_t *dp = spa->spa_dsl_pool;
- vdev_t *rvd = spa->spa_root_vdev;
+ if (dump_opt['L'])
+ return;
- /*
- * We are going to be changing the meaning of the metaslab's
- * ms_allocatable. Ensure that the allocator doesn't try to
- * use the tree.
- */
- spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
- spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+ vdev_t *rvd = spa->spa_root_vdev;
- zcb->zcb_vd_obsolete_counts =
- umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
- UMEM_NOFAIL);
+ /*
+ * We are going to be changing the meaning of the metaslab's
+ * ms_allocatable. Ensure that the allocator doesn't try to
+ * use the tree.
+ */
+ spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
+ spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
- /*
- * For leak detection, we overload the ms_allocatable trees
- * to contain allocated segments instead of free segments.
- * As a result, we can't use the normal metaslab_load/unload
- * interfaces.
- */
- zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
- load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
+ zcb->zcb_vd_obsolete_counts =
+ umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
+ UMEM_NOFAIL);
- /*
- * On load_concrete_ms_allocatable_trees() we loaded all the
- * allocated entries from the ms_sm to the ms_allocatable for
- * each metaslab. If the pool has a checkpoint or is in the
- * middle of discarding a checkpoint, some of these blocks
- * may have been freed but their ms_sm may not have been
- * updated because they are referenced by the checkpoint. In
- * order to avoid false-positives during leak-detection, we
- * go through the vdev's checkpoint space map and exclude all
- * its entries from their relevant ms_allocatable.
- *
- * We also aggregate the space held by the checkpoint and add
- * it to zcb_checkpoint_size.
- *
- * Note that at this point we are also verifying that all the
- * entries on the checkpoint_sm are marked as allocated in
- * the ms_sm of their relevant metaslab.
- * [see comment in checkpoint_sm_exclude_entry_cb()]
- */
- zdb_leak_init_exclude_checkpoint(spa, zcb);
+ /*
+ * For leak detection, we overload the ms_allocatable trees
+ * to contain allocated segments instead of free segments.
+ * As a result, we can't use the normal metaslab_load/unload
+ * interfaces.
+ */
+ zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
+ load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
- /* for cleaner progress output */
- (void) fprintf(stderr, "\n");
+ /*
+ * On load_concrete_ms_allocatable_trees() we loaded all the
+ * allocated entries from the ms_sm to the ms_allocatable for
+ * each metaslab. If the pool has a checkpoint or is in the
+ * middle of discarding a checkpoint, some of these blocks
+ * may have been freed but their ms_sm may not have been
+ * updated because they are referenced by the checkpoint. In
+ * order to avoid false-positives during leak-detection, we
+ * go through the vdev's checkpoint space map and exclude all
+ * its entries from their relevant ms_allocatable.
+ *
+ * We also aggregate the space held by the checkpoint and add
+ * it to zcb_checkpoint_size.
+ *
+ * Note that at this point we are also verifying that all the
+ * entries on the checkpoint_sm are marked as allocated in
+ * the ms_sm of their relevant metaslab.
+ * [see comment in checkpoint_sm_exclude_entry_cb()]
+ */
+ zdb_leak_init_exclude_checkpoint(spa, zcb);
+ ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
- if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
- ASSERT(spa_feature_is_enabled(spa,
- SPA_FEATURE_DEVICE_REMOVAL));
- (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
- increment_indirect_mapping_cb, zcb, NULL);
- }
- } else {
- /*
- * If leak tracing is disabled, we still need to consider
- * any checkpointed space in our space verification.
- */
- zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
+ /* for cleaner progress output */
+ (void) fprintf(stderr, "\n");
+
+ if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
+ ASSERT(spa_feature_is_enabled(spa,
+ SPA_FEATURE_DEVICE_REMOVAL));
+ (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
+ increment_indirect_mapping_cb, zcb, NULL);
}
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
@@ -4125,51 +4130,54 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
static boolean_t
zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
{
+ if (dump_opt['L'])
+ return (B_FALSE);
+
boolean_t leaks = B_FALSE;
- if (!dump_opt['L']) {
- vdev_t *rvd = spa->spa_root_vdev;
- for (unsigned c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- ASSERTV(metaslab_group_t *mg = vd->vdev_mg);
-
- if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
- leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
- }
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (unsigned c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ ASSERTV(metaslab_group_t *mg = vd->vdev_mg);
- for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- ASSERT3P(mg, ==, msp->ms_group);
+ if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
+ leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
+ }
- /*
- * ms_allocatable has been overloaded
- * to contain allocated segments. Now that
- * we finished traversing all blocks, any
- * block that remains in the ms_allocatable
- * represents an allocated block that we
- * did not claim during the traversal.
- * Claimed blocks would have been removed
- * from the ms_allocatable. For indirect
- * vdevs, space remaining in the tree
- * represents parts of the mapping that are
- * not referenced, which is not a bug.
- */
- if (vd->vdev_ops == &vdev_indirect_ops) {
- range_tree_vacate(msp->ms_allocatable,
- NULL, NULL);
- } else {
- range_tree_vacate(msp->ms_allocatable,
- zdb_leak, vd);
- }
+ for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ ASSERT3P(mg, ==, msp->ms_group);
- if (msp->ms_loaded)
- msp->ms_loaded = B_FALSE;
+ /*
+ * ms_allocatable has been overloaded
+ * to contain allocated segments. Now that
+ * we finished traversing all blocks, any
+ * block that remains in the ms_allocatable
+ * represents an allocated block that we
+ * did not claim during the traversal.
+ * Claimed blocks would have been removed
+ * from the ms_allocatable. For indirect
+ * vdevs, space remaining in the tree
+ * represents parts of the mapping that are
+ * not referenced, which is not a bug.
+ */
+ if (vd->vdev_ops == &vdev_indirect_ops) {
+ range_tree_vacate(msp->ms_allocatable,
+ NULL, NULL);
+ } else {
+ range_tree_vacate(msp->ms_allocatable,
+ zdb_leak, vd);
}
- }
- umem_free(zcb->zcb_vd_obsolete_counts,
- rvd->vdev_children * sizeof (uint32_t *));
- zcb->zcb_vd_obsolete_counts = NULL;
+ if (msp->ms_loaded) {
+ msp->ms_loaded = B_FALSE;
+ }
+ }
}
+
+ umem_free(zcb->zcb_vd_obsolete_counts,
+ rvd->vdev_children * sizeof (uint32_t *));
+ zcb->zcb_vd_obsolete_counts = NULL;
+
return (leaks);
}
@@ -4210,12 +4218,16 @@ dump_block_stats(spa_t *spa)
!dump_opt['L'] ? "nothing leaked " : "");
/*
- * Load all space maps as SM_ALLOC maps, then traverse the pool
- * claiming each block we discover. If the pool is perfectly
- * consistent, the space maps will be empty when we're done.
- * Anything left over is a leak; any block we can't claim (because
- * it's not part of any space map) is a double allocation,
- * reference to a freed block, or an unclaimed log block.
+ * When leak detection is enabled we load all space maps as SM_ALLOC
+ * maps, then traverse the pool claiming each block we discover. If
+ * the pool is perfectly consistent, the segment trees will be empty
+ * when we're done. Anything left over is a leak; any block we can't
+ * claim (because it's not part of any space map) is a double
+ * allocation, reference to a freed block, or an unclaimed log block.
+ *
+ * When leak detection is disabled (-L option) we still traverse the
+ * pool claiming each block we discover, but we skip opening any space
+ * maps.
*/
bzero(&zcb, sizeof (zdb_cb_t));
zdb_leak_init(spa, &zcb);
@@ -4296,11 +4308,10 @@ dump_block_stats(spa_t *spa)
total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
- if (total_found == total_alloc) {
- if (!dump_opt['L'])
- (void) printf("\n\tNo leaks (block sum matches space"
- " maps exactly)\n");
- } else {
+ if (total_found == total_alloc && !dump_opt['L']) {
+ (void) printf("\n\tNo leaks (block sum matches space"
+ " maps exactly)\n");
+ } else if (!dump_opt['L']) {
(void) printf("block traversal size %llu != alloc %llu "
"(%s %lld)\n",
(u_longlong_t)total_found,
@@ -5022,6 +5033,8 @@ verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
static void
verify_checkpoint_blocks(spa_t *spa)
{
+ ASSERT(!dump_opt['L']);
+
spa_t *checkpoint_spa;
char *checkpoint_pool;
nvlist_t *config = NULL;
diff --git a/man/man8/zdb.8 b/man/man8/zdb.8
index 79d6f8af7..141a7e853 100644
--- a/man/man8/zdb.8
+++ b/man/man8/zdb.8
@@ -10,7 +10,7 @@
.\"
.\"
.\" Copyright 2012, Richard Lowe.
-.\" Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
.\" Copyright (c) 2017 Intel Corporation.
@@ -194,7 +194,7 @@ If the
option is also specified, also display the uberblocks on this device. Specify
multiple times to increase verbosity.
.It Fl L
-Disable leak tracing and the loading of space maps.
+Disable leak detection and the loading of space maps.
By default,
.Nm
verifies that all non-free blocks are referenced, which can be very expensive.