aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorSerapheim Dimitropoulos <[email protected]>2016-12-16 14:11:29 -0800
committerBrian Behlendorf <[email protected]>2018-06-26 10:07:42 -0700
commitd2734cce68cf740e015312314415f9034c67851c (patch)
treeb7a140a3cf2a19bb7c88f2d277f3b5a33c121cea /cmd
parent88eaf610d9c7056f0946e5090cba1e6288ff2b70 (diff)
OpenZFS 9166 - zfs storage pool checkpoint
Details about the motivation of this feature and its usage can be found in this blogpost: https://sdimitro.github.io/post/zpool-checkpoint/ A lightning talk of this feature can be found here: https://www.youtube.com/watch?v=fPQA8K40jAM Implementation details can be found in big block comment of spa_checkpoint.c Side-changes that are relevant to this commit but not explained elsewhere: * renames members of "struct metaslab trees to be shorter without losing meaning * space_map_{alloc,truncate}() accept a block size as a parameter. The reason is that in the current state all space maps that we allocate through the DMU use a global tunable (space_map_blksz) which defauls to 4KB. This is ok for metaslab space maps in terms of bandwirdth since they are scattered all over the disk. But for other space maps this default is probably not what we want. Examples are device removal's vdev_obsolete_sm or vdev_chedkpoint_sm from this review. Both of these have a 1:1 relationship with each vdev and could benefit from a bigger block size. Porting notes: * The part of dsl_scan_sync() which handles async destroys has been moved into the new dsl_process_async_destroys() function. * Remove "VERIFY(!(flags & FWRITE))" in "kernel.c" so zhack can write to block device backed pools. * ZTS: * Fix get_txg() in zpool_sync_001_pos due to "checkpoint_txg". * Don't use large dd block sizes on /dev/urandom under Linux in checkpoint_capacity. * Adopt Delphix-OS's setting of 4 (spa_asize_inflation = SPA_DVAS_PER_BP + 1) for the checkpoint_capacity test to speed its attempts to fill the pool * Create the base and nested pools with sync=disabled to speed up the "setup" phase. * Clear labels in test pool between checkpoint tests to avoid duplicate pool issues. * The import_rewind_device_replaced test has been marked as "known to fail" for the reasons listed in its DISCLAIMER. * New module parameters: zfs_spa_discard_memory_limit, zfs_remove_max_bytes_pause (not documented - debugging only) vdev_max_ms_count (formerly metaslabs_per_vdev) vdev_min_ms_count Authored by: Serapheim Dimitropoulos <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: John Kennedy <[email protected]> Reviewed by: Dan Kimmel <[email protected]> Reviewed by: Brian Behlendorf <[email protected]> Approved by: Richard Lowe <[email protected]> Ported-by: Tim Chase <[email protected]> Signed-off-by: Tim Chase <[email protected]> OpenZFS-issue: https://illumos.org/issues/9166 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7159fdb8 Closes #7570
Diffstat (limited to 'cmd')
-rw-r--r--cmd/zdb/zdb.c853
-rw-r--r--cmd/zdb/zdb_il.c12
-rw-r--r--cmd/zpool/zpool_main.c215
-rw-r--r--cmd/ztest/ztest.c113
4 files changed, 1053 insertions, 140 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index faac43c79..d1e77cce7 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -131,7 +131,7 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage:\t%s [-AbcdDFGhiLMPsvX] [-e [-V] [-p <path> ...]] "
+ "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] "
"[-I <inflight I/Os>]\n"
"\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
"\t\t[<poolname> [<object> ...]]\n"
@@ -168,6 +168,8 @@ usage(void)
(void) fprintf(stderr, " -h pool history\n");
(void) fprintf(stderr, " -i intent logs\n");
(void) fprintf(stderr, " -l read label contents\n");
+ (void) fprintf(stderr, " -k examine the checkpointed state "
+ "of the pool\n");
(void) fprintf(stderr, " -L disable leak tracking (do not "
"load spacemaps)\n");
(void) fprintf(stderr, " -m metaslabs\n");
@@ -731,6 +733,22 @@ get_prev_obsolete_spacemap_refcount(spa_t *spa)
}
static int
+get_checkpoint_refcount(vdev_t *vd)
+{
+ int refcount = 0;
+
+ if (vd->vdev_top == vd && vd->vdev_top_zap != 0 &&
+ zap_contains(spa_meta_objset(vd->vdev_spa),
+ vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0)
+ refcount++;
+
+ for (uint64_t c = 0; c < vd->vdev_children; c++)
+ refcount += get_checkpoint_refcount(vd->vdev_child[c]);
+
+ return (refcount);
+}
+
+static int
verify_spacemap_refcounts(spa_t *spa)
{
uint64_t expected_refcount = 0;
@@ -743,6 +761,7 @@ verify_spacemap_refcounts(spa_t *spa)
actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
+ actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
if (expected_refcount != actual_refcount) {
(void) printf("space map refcount mismatch: expected %lld != "
@@ -816,8 +835,8 @@ static void
dump_metaslab_stats(metaslab_t *msp)
{
char maxbuf[32];
- range_tree_t *rt = msp->ms_tree;
- avl_tree_t *t = &msp->ms_size_tree;
+ range_tree_t *rt = msp->ms_allocatable;
+ avl_tree_t *t = &msp->ms_allocatable_by_size;
int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
/* max sure nicenum has enough space */
@@ -853,7 +872,7 @@ dump_metaslab(metaslab_t *msp)
metaslab_load_wait(msp);
if (!msp->ms_loaded) {
VERIFY0(metaslab_load(msp));
- range_tree_stat_verify(msp->ms_tree);
+ range_tree_stat_verify(msp->ms_allocatable);
}
dump_metaslab_stats(msp);
metaslab_unload(msp);
@@ -2420,6 +2439,8 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
(void) printf("\trootbp = %s\n", blkbuf);
}
+ (void) printf("\tcheckpoint_txg = %llu\n",
+ (u_longlong_t)ub->ub_checkpoint_txg);
(void) printf("%s", footer ? footer : "");
}
@@ -3129,6 +3150,7 @@ static const char *zdb_ot_extname[] = {
typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_removing_size;
+ uint64_t zcb_checkpoint_size;
uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks;
uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
@@ -3229,7 +3251,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
}
VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
- refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
+ refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
@@ -3388,7 +3410,7 @@ claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
ASSERT(vdev_is_concrete(vd));
VERIFY0(metaslab_claim_impl(vd, offset, size,
- spa_first_txg(vd->vdev_spa)));
+ spa_min_claim_txg(vd->vdev_spa)));
}
static void
@@ -3453,70 +3475,6 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
spa_config_exit(spa, SCL_CONFIG, FTAG);
}
-/*
- * vm_idxp is an in-out parameter which (for indirect vdevs) is the
- * index in vim_entries that has the first entry in this metaslab. On
- * return, it will be set to the first entry after this metaslab.
- */
-static void
-zdb_leak_init_ms(metaslab_t *msp, uint64_t *vim_idxp)
-{
- metaslab_group_t *mg = msp->ms_group;
- vdev_t *vd = mg->mg_vd;
- vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
-
- mutex_enter(&msp->ms_lock);
- metaslab_unload(msp);
-
- /*
- * We don't want to spend the CPU manipulating the size-ordered
- * tree, so clear the range_tree ops.
- */
- msp->ms_tree->rt_ops = NULL;
-
- (void) fprintf(stderr,
- "\rloading vdev %llu of %llu, metaslab %llu of %llu ...",
- (longlong_t)vd->vdev_id,
- (longlong_t)rvd->vdev_children,
- (longlong_t)msp->ms_id,
- (longlong_t)vd->vdev_ms_count);
-
- /*
- * For leak detection, we overload the metaslab ms_tree to
- * contain allocated segments instead of free segments. As a
- * result, we can't use the normal metaslab_load/unload
- * interfaces.
- */
- if (vd->vdev_ops == &vdev_indirect_ops) {
- vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
- for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
- (*vim_idxp)++) {
- vdev_indirect_mapping_entry_phys_t *vimep =
- &vim->vim_entries[*vim_idxp];
- uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
- uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
- ASSERT3U(ent_offset, >=, msp->ms_start);
- if (ent_offset >= msp->ms_start + msp->ms_size)
- break;
-
- /*
- * Mappings do not cross metaslab boundaries,
- * because we create them by walking the metaslabs.
- */
- ASSERT3U(ent_offset + ent_len, <=,
- msp->ms_start + msp->ms_size);
- range_tree_add(msp->ms_tree, ent_offset, ent_len);
- }
- } else if (msp->ms_sm != NULL) {
- VERIFY0(space_map_load(msp->ms_sm, msp->ms_tree, SM_ALLOC));
- }
-
- if (!msp->ms_loaded) {
- msp->ms_loaded = B_TRUE;
- }
- mutex_exit(&msp->ms_lock);
-}
-
/* ARGSUSED */
static int
increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
@@ -3615,11 +3573,246 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
ASSERT(error == ENOENT);
}
+typedef struct checkpoint_sm_exclude_entry_arg {
+ vdev_t *cseea_vd;
+ uint64_t cseea_checkpoint_size;
+} checkpoint_sm_exclude_entry_arg_t;
+
+static int
+checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
+ void *arg)
+{
+ checkpoint_sm_exclude_entry_arg_t *cseea = arg;
+ vdev_t *vd = cseea->cseea_vd;
+ metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+ uint64_t end = offset + size;
+
+ ASSERT(type == SM_FREE);
+
+ /*
+ * Since the vdev_checkpoint_sm exists in the vdev level
+ * and the ms_sm space maps exist in the metaslab level,
+ * an entry in the checkpoint space map could theoretically
+ * cross the boundaries of the metaslab that it belongs.
+ *
+ * In reality, because of the way that we populate and
+ * manipulate the checkpoint's space maps currently,
+ * there shouldn't be any entries that cross metaslabs.
+ * Hence the assertion below.
+ *
+ * That said, there is no fundamental requirement that
+ * the checkpoint's space map entries should not cross
+ * metaslab boundaries. So if needed we could add code
+ * that handles metaslab-crossing segments in the future.
+ */
+ VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
+
+ /*
+ * By removing the entry from the allocated segments we
+ * also verify that the entry is there to begin with.
+ */
+ mutex_enter(&ms->ms_lock);
+ range_tree_remove(ms->ms_allocatable, offset, size);
+ mutex_exit(&ms->ms_lock);
+
+ cseea->cseea_checkpoint_size += size;
+ return (0);
+}
+
+static void
+zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
+{
+ spa_t *spa = vd->vdev_spa;
+ space_map_t *checkpoint_sm = NULL;
+ uint64_t checkpoint_sm_obj;
+
+ /*
+ * If there is no vdev_top_zap, we are in a pool whose
+ * version predates the pool checkpoint feature.
+ */
+ if (vd->vdev_top_zap == 0)
+ return;
+
+ /*
+ * If there is no reference of the vdev_checkpoint_sm in
+ * the vdev_top_zap, then one of the following scenarios
+ * is true:
+ *
+ * 1] There is no checkpoint
+ * 2] There is a checkpoint, but no checkpointed blocks
+ * have been freed yet
+ * 3] The current vdev is indirect
+ *
+ * In these cases we return immediately.
+ */
+ if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
+ VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
+ return;
+
+ VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
+ VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
+ &checkpoint_sm_obj));
+
+ checkpoint_sm_exclude_entry_arg_t cseea;
+ cseea.cseea_vd = vd;
+ cseea.cseea_checkpoint_size = 0;
+
+ VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
+ checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
+ space_map_update(checkpoint_sm);
+
+ VERIFY0(space_map_iterate(checkpoint_sm,
+ checkpoint_sm_exclude_entry_cb, &cseea));
+ space_map_close(checkpoint_sm);
+
+ zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
+}
+
+static void
+zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+ ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
+ zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
+ }
+}
+
+static void
+load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (uint64_t i = 0; i < rvd->vdev_children; i++) {
+ vdev_t *vd = rvd->vdev_child[i];
+
+ ASSERT3U(i, ==, vd->vdev_id);
+
+ if (vd->vdev_ops == &vdev_indirect_ops)
+ continue;
+
+ for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+
+ (void) fprintf(stderr,
+ "\rloading concrete vdev %llu, "
+ "metaslab %llu of %llu ...",
+ (longlong_t)vd->vdev_id,
+ (longlong_t)msp->ms_id,
+ (longlong_t)vd->vdev_ms_count);
+
+ mutex_enter(&msp->ms_lock);
+ metaslab_unload(msp);
+
+ /*
+ * We don't want to spend the CPU manipulating the
+ * size-ordered tree, so clear the range_tree ops.
+ */
+ msp->ms_allocatable->rt_ops = NULL;
+
+ if (msp->ms_sm != NULL) {
+ VERIFY0(space_map_load(msp->ms_sm,
+ msp->ms_allocatable, maptype));
+ }
+ if (!msp->ms_loaded)
+ msp->ms_loaded = B_TRUE;
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+}
+
+/*
+ * vm_idxp is an in-out parameter which (for indirect vdevs) is the
+ * index in vim_entries that has the first entry in this metaslab.
+ * On return, it will be set to the first entry after this metaslab.
+ */
+static void
+load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
+ uint64_t *vim_idxp)
+{
+ vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
+
+ mutex_enter(&msp->ms_lock);
+ metaslab_unload(msp);
+
+ /*
+ * We don't want to spend the CPU manipulating the
+ * size-ordered tree, so clear the range_tree ops.
+ */
+ msp->ms_allocatable->rt_ops = NULL;
+
+ for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
+ (*vim_idxp)++) {
+ vdev_indirect_mapping_entry_phys_t *vimep =
+ &vim->vim_entries[*vim_idxp];
+ uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
+ uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
+ ASSERT3U(ent_offset, >=, msp->ms_start);
+ if (ent_offset >= msp->ms_start + msp->ms_size)
+ break;
+
+ /*
+ * Mappings do not cross metaslab boundaries,
+ * because we create them by walking the metaslabs.
+ */
+ ASSERT3U(ent_offset + ent_len, <=,
+ msp->ms_start + msp->ms_size);
+ range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
+ }
+
+ if (!msp->ms_loaded)
+ msp->ms_loaded = B_TRUE;
+ mutex_exit(&msp->ms_lock);
+}
+
+static void
+zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (uint64_t c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+
+ ASSERT3U(c, ==, vd->vdev_id);
+
+ if (vd->vdev_ops != &vdev_indirect_ops)
+ continue;
+
+ /*
+ * Note: we don't check for mapping leaks on
+ * removing vdevs because their ms_allocatable's
+ * are used to look for leaks in allocated space.
+ */
+ zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
+
+ /*
+ * Normally, indirect vdevs don't have any
+ * metaslabs. We want to set them up for
+ * zio_claim().
+ */
+ VERIFY0(vdev_metaslab_init(vd, 0));
+
+ vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
+ uint64_t vim_idx = 0;
+ for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
+
+ (void) fprintf(stderr,
+ "\rloading indirect vdev %llu, "
+ "metaslab %llu of %llu ...",
+ (longlong_t)vd->vdev_id,
+ (longlong_t)vd->vdev_ms[m]->ms_id,
+ (longlong_t)vd->vdev_ms_count);
+
+ load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
+ &vim_idx);
+ }
+ ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
+ }
+}
+
static void
zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
{
zcb->zcb_spa = spa;
- uint64_t c;
if (!dump_opt['L']) {
dsl_pool_t *dp = spa->spa_dsl_pool;
@@ -3627,7 +3820,7 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
/*
* We are going to be changing the meaning of the metaslab's
- * ms_tree. Ensure that the allocator doesn't try to
+ * ms_allocatable. Ensure that the allocator doesn't try to
* use the tree.
*/
spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
@@ -3637,38 +3830,37 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
UMEM_NOFAIL);
- for (c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- uint64_t vim_idx = 0;
-
- ASSERT3U(c, ==, vd->vdev_id);
-
- /*
- * Note: we don't check for mapping leaks on
- * removing vdevs because their ms_tree's are
- * used to look for leaks in allocated space.
- */
- if (vd->vdev_ops == &vdev_indirect_ops) {
- zcb->zcb_vd_obsolete_counts[c] =
- zdb_load_obsolete_counts(vd);
+ /*
+ * For leak detection, we overload the ms_allocatable trees
+ * to contain allocated segments instead of free segments.
+ * As a result, we can't use the normal metaslab_load/unload
+ * interfaces.
+ */
+ zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
+ load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
- /*
- * Normally, indirect vdevs don't have any
- * metaslabs. We want to set them up for
- * zio_claim().
- */
- VERIFY0(vdev_metaslab_init(vd, 0));
- }
+ /*
+ * On load_concrete_ms_allocatable_trees() we loaded all the
+ * allocated entries from the ms_sm to the ms_allocatable for
+ * each metaslab. If the pool has a checkpoint or is in the
+ * middle of discarding a checkpoint, some of these blocks
+ * may have been freed but their ms_sm may not have been
+ * updated because they are referenced by the checkpoint. In
+ * order to avoid false-positives during leak-detection, we
+ * go through the vdev's checkpoint space map and exclude all
+ * its entries from their relevant ms_allocatable.
+ *
+ * We also aggregate the space held by the checkpoint and add
+ * it to zcb_checkpoint_size.
+ *
+ * Note that at this point we are also verifying that all the
+ * entries on the checkpoint_sm are marked as allocated in
+ * the ms_sm of their relevant metaslab.
+ * [see comment in checkpoint_sm_exclude_entry_cb()]
+ */
+ zdb_leak_init_exclude_checkpoint(spa, zcb);
- for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
- zdb_leak_init_ms(vd->vdev_ms[m], &vim_idx);
- }
- if (vd->vdev_ops == &vdev_indirect_ops) {
- ASSERT3U(vim_idx, ==,
- vdev_indirect_mapping_num_entries(
- vd->vdev_indirect_mapping));
- }
- }
+ /* for cleaner progress output */
(void) fprintf(stderr, "\n");
if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
@@ -3677,12 +3869,16 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
(void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
increment_indirect_mapping_cb, zcb, NULL);
}
+ } else {
+ /*
+ * If leak tracing is disabled, we still need to consider
+ * any checkpointed space in our space verification.
+ */
+ zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
}
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
-
zdb_ddt_leak_init(spa, zcb);
-
spa_config_exit(spa, SCL_CONFIG, FTAG);
}
@@ -3709,7 +3905,7 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
for (uint64_t inner_offset = 0;
inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
inner_offset += 1 << vd->vdev_ashift) {
- if (range_tree_contains(msp->ms_tree,
+ if (range_tree_contains(msp->ms_allocatable,
offset + inner_offset, 1 << vd->vdev_ashift)) {
obsolete_bytes += 1 << vd->vdev_ashift;
}
@@ -3775,23 +3971,23 @@ zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
ASSERT3P(mg, ==, msp->ms_group);
/*
- * The ms_tree has been overloaded to
- * contain allocated segments. Now that we
- * finished traversing all blocks, any
- * block that remains in the ms_tree
+ * ms_allocatable has been overloaded
+ * to contain allocated segments. Now that
+ * we finished traversing all blocks, any
+ * block that remains in the ms_allocatable
* represents an allocated block that we
* did not claim during the traversal.
* Claimed blocks would have been removed
- * from the ms_tree. For indirect vdevs,
- * space remaining in the tree represents
- * parts of the mapping that are not
- * referenced, which is not a bug.
+ * from the ms_allocatable. For indirect
+ * vdevs, space remaining in the tree
+ * represents parts of the mapping that are
+ * not referenced, which is not a bug.
*/
if (vd->vdev_ops == &vdev_indirect_ops) {
- range_tree_vacate(msp->ms_tree,
+ range_tree_vacate(msp->ms_allocatable,
NULL, NULL);
} else {
- range_tree_vacate(msp->ms_tree,
+ range_tree_vacate(msp->ms_allocatable,
zdb_leak, vd);
}
@@ -3923,7 +4119,7 @@ dump_block_stats(spa_t *spa)
total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
- zcb.zcb_removing_size;
+ zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
if (total_found == total_alloc) {
if (!dump_opt['L'])
@@ -4332,6 +4528,390 @@ verify_device_removal_feature_counts(spa_t *spa)
return (ret);
}
+#define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
+/*
+ * Import the checkpointed state of the pool specified by the target
+ * parameter as readonly. The function also accepts a pool config
+ * as an optional parameter, else it attempts to infer the config by
+ * the name of the target pool.
+ *
+ * Note that the checkpointed state's pool name will be the name of
+ * the original pool with the above suffix appened to it. In addition,
+ * if the target is not a pool name (e.g. a path to a dataset) then
+ * the new_path parameter is populated with the updated path to
+ * reflect the fact that we are looking into the checkpointed state.
+ *
+ * The function returns a newly-allocated copy of the name of the
+ * pool containing the checkpointed state. When this copy is no
+ * longer needed it should be freed with free(3C). Same thing
+ * applies to the new_path parameter if allocated.
+ */
+static char *
+import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
+{
+ int error = 0;
+ char *poolname, *bogus_name = NULL;
+
+ /* If the target is not a pool, the extract the pool name */
+ char *path_start = strchr(target, '/');
+ if (path_start != NULL) {
+ size_t poolname_len = path_start - target;
+ poolname = strndup(target, poolname_len);
+ } else {
+ poolname = target;
+ }
+
+ if (cfg == NULL) {
+ error = spa_get_stats(poolname, &cfg, NULL, 0);
+ if (error != 0) {
+ fatal("Tried to read config of pool \"%s\" but "
+ "spa_get_stats() failed with error %d\n",
+ poolname, error);
+ }
+ }
+
+ if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1)
+ return (NULL);
+ fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
+
+ error = spa_import(bogus_name, cfg, NULL,
+ ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT);
+ if (error != 0) {
+ fatal("Tried to import pool \"%s\" but spa_import() failed "
+ "with error %d\n", bogus_name, error);
+ }
+
+ if (new_path != NULL && path_start != NULL) {
+ if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) {
+ if (path_start != NULL)
+ free(poolname);
+ return (NULL);
+ }
+ }
+
+ if (target != poolname)
+ free(poolname);
+
+ return (bogus_name);
+}
+
+typedef struct verify_checkpoint_sm_entry_cb_arg {
+ vdev_t *vcsec_vd;
+
+ /* the following fields are only used for printing progress */
+ uint64_t vcsec_entryid;
+ uint64_t vcsec_num_entries;
+} verify_checkpoint_sm_entry_cb_arg_t;
+
+#define ENTRIES_PER_PROGRESS_UPDATE 10000
+
+static int
+verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
+ void *arg)
+{
+ verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
+ vdev_t *vd = vcsec->vcsec_vd;
+ metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+ uint64_t end = offset + size;
+
+ ASSERT(type == SM_FREE);
+
+ if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
+ (void) fprintf(stderr,
+ "\rverifying vdev %llu, space map entry %llu of %llu ...",
+ (longlong_t)vd->vdev_id,
+ (longlong_t)vcsec->vcsec_entryid,
+ (longlong_t)vcsec->vcsec_num_entries);
+ }
+ vcsec->vcsec_entryid++;
+
+ /*
+ * See comment in checkpoint_sm_exclude_entry_cb()
+ */
+ VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
+
+ /*
+ * The entries in the vdev_checkpoint_sm should be marked as
+ * allocated in the checkpointed state of the pool, therefore
+ * their respective ms_allocateable trees should not contain them.
+ */
+ mutex_enter(&ms->ms_lock);
+ range_tree_verify(ms->ms_allocatable, offset, size);
+ mutex_exit(&ms->ms_lock);
+
+ return (0);
+}
+
+/*
+ * Verify that all segments in the vdev_checkpoint_sm are allocated
+ * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
+ * ms_allocatable).
+ *
+ * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
+ * each vdev in the current state of the pool to the metaslab space maps
+ * (ms_sm) of the checkpointed state of the pool.
+ *
+ * Note that the function changes the state of the ms_allocatable
+ * trees of the current spa_t. The entries of these ms_allocatable
+ * trees are cleared out and then repopulated from with the free
+ * entries of their respective ms_sm space maps.
+ */
+static void
+verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
+{
+ vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
+ vdev_t *current_rvd = current->spa_root_vdev;
+
+ load_concrete_ms_allocatable_trees(checkpoint, SM_FREE);
+
+ for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) {
+ vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c];
+ vdev_t *current_vd = current_rvd->vdev_child[c];
+
+ space_map_t *checkpoint_sm = NULL;
+ uint64_t checkpoint_sm_obj;
+
+ if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
+ /*
+ * Since we don't allow device removal in a pool
+ * that has a checkpoint, we expect that all removed
+ * vdevs were removed from the pool before the
+ * checkpoint.
+ */
+ ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
+ continue;
+ }
+
+ /*
+ * If the checkpoint space map doesn't exist, then nothing
+ * here is checkpointed so there's nothing to verify.
+ */
+ if (current_vd->vdev_top_zap == 0 ||
+ zap_contains(spa_meta_objset(current),
+ current_vd->vdev_top_zap,
+ VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
+ continue;
+
+ VERIFY0(zap_lookup(spa_meta_objset(current),
+ current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
+ sizeof (uint64_t), 1, &checkpoint_sm_obj));
+
+ VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
+ checkpoint_sm_obj, 0, current_vd->vdev_asize,
+ current_vd->vdev_ashift));
+ space_map_update(checkpoint_sm);
+
+ verify_checkpoint_sm_entry_cb_arg_t vcsec;
+ vcsec.vcsec_vd = ckpoint_vd;
+ vcsec.vcsec_entryid = 0;
+ vcsec.vcsec_num_entries =
+ space_map_length(checkpoint_sm) / sizeof (uint64_t);
+ VERIFY0(space_map_iterate(checkpoint_sm,
+ verify_checkpoint_sm_entry_cb, &vcsec));
+ dump_spacemap(current->spa_meta_objset, checkpoint_sm);
+ space_map_close(checkpoint_sm);
+ }
+
+ /*
+ * If we've added vdevs since we took the checkpoint, ensure
+ * that their checkpoint space maps are empty.
+ */
+ if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
+ for (uint64_t c = ckpoint_rvd->vdev_children;
+ c < current_rvd->vdev_children; c++) {
+ vdev_t *current_vd = current_rvd->vdev_child[c];
+ ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL);
+ }
+ }
+
+ /* for cleaner progress output */
+ (void) fprintf(stderr, "\n");
+}
+
+/*
+ * Verifies that all space that's allocated in the checkpoint is
+ * still allocated in the current version, by checking that everything
+ * in checkpoint's ms_allocatable (which is actually allocated, not
+ * allocatable/free) is not present in current's ms_allocatable.
+ *
+ * Note that the function changes the state of the ms_allocatable
+ * trees of both spas when called. The entries of all ms_allocatable
+ * trees are cleared out and then repopulated from their respective
+ * ms_sm space maps. In the checkpointed state we load the allocated
+ * entries, and in the current state we load the free entries.
+ */
+static void
+verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
+{
+ vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
+ vdev_t *current_rvd = current->spa_root_vdev;
+
+ load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC);
+ load_concrete_ms_allocatable_trees(current, SM_FREE);
+
+ for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) {
+ vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i];
+ vdev_t *current_vd = current_rvd->vdev_child[i];
+
+ if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
+ /*
+ * See comment in verify_checkpoint_vdev_spacemaps()
+ */
+ ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
+ continue;
+ }
+
+ for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) {
+ metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m];
+ metaslab_t *current_msp = current_vd->vdev_ms[m];
+
+ (void) fprintf(stderr,
+ "\rverifying vdev %llu of %llu, "
+ "metaslab %llu of %llu ...",
+ (longlong_t)current_vd->vdev_id,
+ (longlong_t)current_rvd->vdev_children,
+ (longlong_t)current_vd->vdev_ms[m]->ms_id,
+ (longlong_t)current_vd->vdev_ms_count);
+
+ /*
+ * We walk through the ms_allocatable trees that
+ * are loaded with the allocated blocks from the
+ * ms_sm spacemaps of the checkpoint. For each
+ * one of these ranges we ensure that none of them
+ * exists in the ms_allocatable trees of the
+ * current state which are loaded with the ranges
+ * that are currently free.
+ *
+ * This way we ensure that none of the blocks that
+ * are part of the checkpoint were freed by mistake.
+ */
+ range_tree_walk(ckpoint_msp->ms_allocatable,
+ (range_tree_func_t *)range_tree_verify,
+ current_msp->ms_allocatable);
+ }
+ }
+
+ /* for cleaner progress output */
+ (void) fprintf(stderr, "\n");
+}
+
+static void
+verify_checkpoint_blocks(spa_t *spa)
+{
+ spa_t *checkpoint_spa;
+ char *checkpoint_pool;
+ nvlist_t *config = NULL;
+ int error = 0;
+
+ /*
+ * We import the checkpointed state of the pool (under a different
+ * name) so we can do verification on it against the current state
+ * of the pool.
+ */
+ checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
+ NULL);
+ ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
+
+ error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
+ if (error != 0) {
+ fatal("Tried to open pool \"%s\" but spa_open() failed with "
+ "error %d\n", checkpoint_pool, error);
+ }
+
+ /*
+ * Ensure that ranges in the checkpoint space maps of each vdev
+ * are allocated according to the checkpointed state's metaslab
+ * space maps.
+ */
+ verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa);
+
+ /*
+ * Ensure that allocated ranges in the checkpoint's metaslab
+ * space maps remain allocated in the metaslab space maps of
+ * the current state.
+ */
+ verify_checkpoint_ms_spacemaps(checkpoint_spa, spa);
+
+ /*
+ * Once we are done, we get rid of the checkpointed state.
+ */
+ spa_close(checkpoint_spa, FTAG);
+ free(checkpoint_pool);
+}
+
+static void
+dump_leftover_checkpoint_blocks(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ for (uint64_t i = 0; i < rvd->vdev_children; i++) {
+ vdev_t *vd = rvd->vdev_child[i];
+
+ space_map_t *checkpoint_sm = NULL;
+ uint64_t checkpoint_sm_obj;
+
+ if (vd->vdev_top_zap == 0)
+ continue;
+
+ if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
+ VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
+ continue;
+
+ VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
+ VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
+ sizeof (uint64_t), 1, &checkpoint_sm_obj));
+
+ VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
+ checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
+ space_map_update(checkpoint_sm);
+ dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
+ space_map_close(checkpoint_sm);
+ }
+}
+
+static int
+verify_checkpoint(spa_t *spa)
+{
+ uberblock_t checkpoint;
+ int error;
+
+ if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
+ return (0);
+
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
+ sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
+
+ if (error == ENOENT) {
+ /*
+ * If the feature is active but the uberblock is missing
+ * then we must be in the middle of discarding the
+ * checkpoint.
+ */
+ (void) printf("\nPartially discarded checkpoint "
+ "state found:\n");
+ dump_leftover_checkpoint_blocks(spa);
+ return (0);
+ } else if (error != 0) {
+ (void) printf("lookup error %d when looking for "
+ "checkpointed uberblock in MOS\n", error);
+ return (error);
+ }
+ dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n");
+
+ if (checkpoint.ub_checkpoint_txg == 0) {
+ (void) printf("\nub_checkpoint_txg not set in checkpointed "
+ "uberblock\n");
+ error = 3;
+ }
+
+ if (error == 0)
+ verify_checkpoint_blocks(spa);
+
+ return (error);
+}
+
static void
dump_zpool(spa_t *spa)
{
@@ -4435,6 +5015,9 @@ dump_zpool(spa_t *spa)
if (dump_opt['h'])
dump_history(spa);
+ if (rc == 0 && !dump_opt['L'])
+ rc = verify_checkpoint(spa);
+
if (rc != 0) {
dump_debug_buffer();
exit(rc);
@@ -4879,6 +5462,7 @@ main(int argc, char **argv)
int rewind = ZPOOL_NEVER_REWIND;
char *spa_config_path_env;
boolean_t target_is_spa = B_TRUE;
+ nvlist_t *cfg = NULL;
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
@@ -4895,7 +5479,7 @@ main(int argc, char **argv)
spa_config_path = spa_config_path_env;
while ((c = getopt(argc, argv,
- "AbcCdDeEFGhiI:lLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
+ "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
switch (c) {
case 'b':
case 'c':
@@ -4920,6 +5504,7 @@ main(int argc, char **argv)
case 'A':
case 'e':
case 'F':
+ case 'k':
case 'L':
case 'P':
case 'q':
@@ -5029,7 +5614,7 @@ main(int argc, char **argv)
verbose = MAX(verbose, 1);
for (c = 0; c < 256; c++) {
- if (dump_all && strchr("AeEFlLOPRSX", c) == NULL)
+ if (dump_all && strchr("AeEFklLOPRSX", c) == NULL)
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;
@@ -5081,6 +5666,17 @@ main(int argc, char **argv)
error = 0;
target = argv[0];
+ char *checkpoint_pool = NULL;
+ char *checkpoint_target = NULL;
+ if (dump_opt['k']) {
+ checkpoint_pool = import_checkpointed_state(target, cfg,
+ &checkpoint_target);
+
+ if (checkpoint_target != NULL)
+ target = checkpoint_target;
+
+ }
+
if (strpbrk(target, "/@") != NULL) {
size_t targetlen;
@@ -5097,7 +5693,6 @@ main(int argc, char **argv)
if (dump_opt['e']) {
importargs_t args = { 0 };
- nvlist_t *cfg = NULL;
args.paths = nsearch;
args.path = searchdirs;
@@ -5121,6 +5716,7 @@ main(int argc, char **argv)
(void) printf("\nConfiguration for import:\n");
dump_nvlist(cfg, 8);
}
+
error = spa_import(target_pool, cfg, NULL,
flags | ZFS_IMPORT_SKIP_MMP);
}
@@ -5130,7 +5726,18 @@ main(int argc, char **argv)
free(target_pool);
if (error == 0) {
- if (target_is_spa || dump_opt['R']) {
+ if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
+ ASSERT(checkpoint_pool != NULL);
+ ASSERT(checkpoint_target == NULL);
+
+ error = spa_open(checkpoint_pool, &spa, FTAG);
+ if (error != 0) {
+ fatal("Tried to open pool \"%s\" but "
+ "spa_open() failed with error %d\n",
+ checkpoint_pool, error);
+ }
+
+ } else if (target_is_spa || dump_opt['R']) {
/*
* Disable the activity check to allow examination of
* active pools.
@@ -5216,6 +5823,12 @@ main(int argc, char **argv)
zdb_read_block(argv[i], spa);
}
+ if (dump_opt['k']) {
+ free(checkpoint_pool);
+ if (!target_is_spa)
+ free(checkpoint_target);
+ }
+
if (os != NULL)
close_objset(os, FTAG);
else
diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c
index 2db9c9c0c..c12178eff 100644
--- a/cmd/zdb/zdb_il.c
+++ b/cmd/zdb/zdb_il.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/
/*
@@ -42,6 +42,7 @@
#include <sys/resource.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
+#include <sys/spa_impl.h>
#include <sys/abd.h>
#include "zdb.h"
@@ -166,7 +167,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, void *arg)
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", tab_prefix,
!BP_IS_HOLE(bp) &&
- bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
+ bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
print_log_bp(bp, tab_prefix);
@@ -361,7 +362,7 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
if (claim_txg != 0)
claim = "already claimed";
- else if (bp->blk_birth >= spa_first_txg(zilog->zl_spa))
+ else if (bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa))
claim = "will claim";
else
claim = "won't claim";
@@ -416,6 +417,11 @@ dump_intent_log(zilog_t *zilog)
for (i = 0; i < TX_MAX_TYPE; i++)
zil_rec_info[i].zri_count = 0;
+ /* see comment in zil_claim() or zil_check_log_chain() */
+ if (zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 &&
+ zh->zh_claim_txg == 0)
+ return;
+
if (verbose >= 2) {
(void) printf("\n");
(void) zil_parse(zilog, print_log_block, print_log_record, NULL,
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index ad25a2f6e..5aca0d325 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -36,6 +36,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <getopt.h>
#include <libgen.h>
#include <libintl.h>
#include <libuutil.h>
@@ -76,6 +77,8 @@ static int zpool_do_add(int, char **);
static int zpool_do_remove(int, char **);
static int zpool_do_labelclear(int, char **);
+static int zpool_do_checkpoint(int, char **);
+
static int zpool_do_list(int, char **);
static int zpool_do_iostat(int, char **);
static int zpool_do_status(int, char **);
@@ -131,6 +134,7 @@ typedef enum {
HELP_ATTACH,
HELP_CLEAR,
HELP_CREATE,
+ HELP_CHECKPOINT,
HELP_DESTROY,
HELP_DETACH,
HELP_EXPORT,
@@ -254,6 +258,8 @@ static zpool_command_t command_table[] = {
{ NULL },
{ "labelclear", zpool_do_labelclear, HELP_LABELCLEAR },
{ NULL },
+ { "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT },
+ { NULL },
{ "list", zpool_do_list, HELP_LIST },
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
{ "status", zpool_do_status, HELP_STATUS },
@@ -306,6 +312,8 @@ get_usage(zpool_help_t idx)
return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
"\t [-O file-system-property=value] ... \n"
"\t [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
+ case HELP_CHECKPOINT:
+ return (gettext("\tcheckpoint [--discard] <pool> ...\n"));
case HELP_DESTROY:
return (gettext("\tdestroy [-f] <pool>\n"));
case HELP_DETACH:
@@ -316,15 +324,13 @@ get_usage(zpool_help_t idx)
return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
- "\timport [-d dir | -c cachefile] [-F [-n]] [-l] "
- "<pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]]\n"
- "\t <pool | id> [newpool]\n"));
+ "\t [--rewind-to-checkpoint] <pool | id> [newpool]\n"));
case HELP_IOSTAT:
return (gettext("\tiostat [[[-c [script1,script2,...]"
"[-lq]]|[-rw]] [-T d | u] [-ghHLpPvy]\n"
@@ -2454,6 +2460,79 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
/*
+ * zpool checkpoint <pool>
+ * checkpoint --discard <pool>
+ *
+ * -d Discard the checkpoint from a checkpointed
+ * --discard pool.
+ *
+ * Checkpoints the specified pool, by taking a "snapshot" of its
+ * current state. A pool can only have one checkpoint at a time.
+ */
+int
+zpool_do_checkpoint(int argc, char **argv)
+{
+ boolean_t discard;
+ char *pool;
+ zpool_handle_t *zhp;
+ int c, err;
+
+ struct option long_options[] = {
+ {"discard", no_argument, NULL, 'd'},
+ {0, 0, 0, 0}
+ };
+
+ discard = B_FALSE;
+ while ((c = getopt_long(argc, argv, ":d", long_options, NULL)) != -1) {
+ switch (c) {
+ case 'd':
+ discard = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("missing pool argument\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc > 1) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ pool = argv[0];
+
+ if ((zhp = zpool_open(g_zfs, pool)) == NULL) {
+ /* As a special case, check for use of '/' in the name */
+ if (strchr(pool, '/') != NULL)
+ (void) fprintf(stderr, gettext("'zpool checkpoint' "
+ "doesn't work on datasets. To save the state "
+ "of a dataset from a specific point in time "
+ "please use 'zfs snapshot'\n"));
+ return (1);
+ }
+
+ if (discard)
+ err = (zpool_discard_checkpoint(zhp) != 0);
+ else
+ err = (zpool_checkpoint(zhp) != 0);
+
+ zpool_close(zhp);
+
+ return (err);
+}
+
+#define CHECKPOINT_OPT 1024
+
+/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] -a
@@ -2499,6 +2578,9 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* -s Scan using the default search path, the libblkid cache will
* not be consulted.
*
+ * --rewind-to-checkpoint
+ * Import the pool and revert back to the checkpoint.
+ *
* The import command scans for pools to import, and import pools based on pool
* name and GUID. The pool can also be renamed as part of the import process.
*/
@@ -2534,8 +2616,14 @@ zpool_do_import(int argc, char **argv)
importargs_t idata = { 0 };
char *endptr;
+ struct option long_options[] = {
+ {"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT},
+ {0, 0, 0, 0}
+ };
+
/* check options */
- while ((c = getopt(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX")) != -1) {
+ while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX",
+ long_options, NULL)) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
@@ -2622,6 +2710,9 @@ zpool_do_import(int argc, char **argv)
case 'X':
xtreme_rewind = B_TRUE;
break;
+ case CHECKPOINT_OPT:
+ flags |= ZFS_IMPORT_CHECKPOINT;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@@ -4977,6 +5068,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted,
switch (prop) {
case ZPOOL_PROP_EXPANDSZ:
+ case ZPOOL_PROP_CHECKPOINT:
if (value == 0)
(void) strlcpy(propval, "-", sizeof (propval));
else
@@ -5064,6 +5156,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
toplevel, format);
print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc,
scripted, toplevel, format);
+ print_one_column(ZPOOL_PROP_CHECKPOINT,
+ vs->vs_checkpoint_space, scripted, toplevel, format);
print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted,
B_TRUE, format);
print_one_column(ZPOOL_PROP_FRAGMENTATION,
@@ -5187,8 +5281,8 @@ zpool_do_list(int argc, char **argv)
int ret = 0;
list_cbdata_t cb = { 0 };
static char default_props[] =
- "name,size,allocated,free,expandsize,fragmentation,capacity,"
- "dedupratio,health,altroot";
+ "name,size,allocated,free,checkpoint,expandsize,fragmentation,"
+ "capacity,dedupratio,health,altroot";
char *props = default_props;
float interval = 0;
unsigned long count = 0;
@@ -5990,6 +6084,32 @@ typedef struct scrub_cbdata {
pool_scrub_cmd_t cb_scrub_cmd;
} scrub_cbdata_t;
+static boolean_t
+zpool_has_checkpoint(zpool_handle_t *zhp)
+{
+ nvlist_t *config, *nvroot;
+
+ config = zpool_get_config(zhp, NULL);
+
+ if (config != NULL) {
+ pool_checkpoint_stat_t *pcs = NULL;
+ uint_t c;
+
+ nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+ (void) nvlist_lookup_uint64_array(nvroot,
+ ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
+
+ if (pcs == NULL || pcs->pcs_state == CS_NONE)
+ return (B_FALSE);
+
+ assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS ||
+ pcs->pcs_state == CS_CHECKPOINT_DISCARDING);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
int
scrub_callback(zpool_handle_t *zhp, void *data)
{
@@ -6007,6 +6127,13 @@ scrub_callback(zpool_handle_t *zhp, void *data)
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
+ if (err == 0 && zpool_has_checkpoint(zhp) &&
+ cb->cb_type == POOL_SCAN_SCRUB) {
+ (void) printf(gettext("warning: will not scrub state that "
+ "belongs to the checkpoint of pool '%s'\n"),
+ zpool_get_name(zhp));
+ }
+
return (err != 0);
}
@@ -6212,6 +6339,40 @@ print_scan_status(pool_scan_stat_t *ps)
}
/*
+ * As we don't scrub checkpointed blocks, we want to warn the
+ * user that we skipped scanning some blocks if a checkpoint exists
+ * or existed at any time during the scan.
+ */
+static void
+print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs)
+{
+ if (ps == NULL || pcs == NULL)
+ return;
+
+ if (pcs->pcs_state == CS_NONE ||
+ pcs->pcs_state == CS_CHECKPOINT_DISCARDING)
+ return;
+
+ assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS);
+
+ if (ps->pss_state == DSS_NONE)
+ return;
+
+ if ((ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) &&
+ ps->pss_end_time < pcs->pcs_start_time)
+ return;
+
+ if (ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) {
+ (void) printf(gettext(" scan warning: skipped blocks "
+ "that are only referenced by the checkpoint.\n"));
+ } else {
+ assert(ps->pss_state == DSS_SCANNING);
+ (void) printf(gettext(" scan warning: skipping blocks "
+ "that are only referenced by the checkpoint.\n"));
+ }
+}
+
+/*
* Print out detailed removal status.
*/
static void
@@ -6317,6 +6478,39 @@ print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs)
}
static void
+print_checkpoint_status(pool_checkpoint_stat_t *pcs)
+{
+ time_t start;
+ char space_buf[7];
+
+ if (pcs == NULL || pcs->pcs_state == CS_NONE)
+ return;
+
+ (void) printf(gettext("checkpoint: "));
+
+ start = pcs->pcs_start_time;
+ zfs_nicenum(pcs->pcs_space, space_buf, sizeof (space_buf));
+
+ if (pcs->pcs_state == CS_CHECKPOINT_EXISTS) {
+ char *date = ctime(&start);
+
+ /*
+ * ctime() adds a newline at the end of the generated
+ * string, thus the weird format specifier and the
+ * strlen() call used to chop it off from the output.
+ */
+ (void) printf(gettext("created %.*s, consumes %s\n"),
+ (int)(strlen(date) - 1), date, space_buf);
+ return;
+ }
+
+ assert(pcs->pcs_state == CS_CHECKPOINT_DISCARDING);
+
+ (void) printf(gettext("discarding, %s remaining.\n"),
+ space_buf);
+}
+
+static void
print_error_log(zpool_handle_t *zhp)
{
nvlist_t *nverrlist = NULL;
@@ -6741,16 +6935,21 @@ status_callback(zpool_handle_t *zhp, void *data)
uint64_t nerr;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
+ pool_checkpoint_stat_t *pcs = NULL;
pool_scan_stat_t *ps = NULL;
pool_removal_stat_t *prs = NULL;
(void) nvlist_lookup_uint64_array(nvroot,
+ ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
+ (void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
- print_scan_status(ps);
-
(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);
+
+ print_scan_status(ps);
+ print_checkpoint_scan_warning(ps, pcs);
print_removal_status(zhp, prs);
+ print_checkpoint_status(pcs);
cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0,
cbp->cb_name_flags | VDEV_NAME_TYPE_ID);
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index 83a8b4f3c..78ad7e8de 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -345,6 +345,7 @@ ztest_func_t ztest_reguid;
ztest_func_t ztest_spa_upgrade;
ztest_func_t ztest_device_removal;
ztest_func_t ztest_remap_blocks;
+ztest_func_t ztest_spa_checkpoint_create_discard;
ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
@@ -397,6 +398,7 @@ ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime),
ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes),
ZTI_INIT(ztest_remap_blocks, 1, &zopt_sometimes),
+ ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
@@ -446,6 +448,7 @@ static ztest_ds_t *ztest_ds;
static kmutex_t ztest_vdev_lock;
static boolean_t ztest_device_removal_active = B_FALSE;
+static kmutex_t ztest_checkpoint_lock;
/*
* The ztest_name_lock protects the pool and dataset namespace used by
@@ -2864,6 +2867,62 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
mutex_exit(&ztest_vdev_lock);
}
+static void
+ztest_spa_checkpoint(spa_t *spa)
+{
+ ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
+
+ int error = spa_checkpoint(spa->spa_name);
+
+ switch (error) {
+ case 0:
+ case ZFS_ERR_DEVRM_IN_PROGRESS:
+ case ZFS_ERR_DISCARDING_CHECKPOINT:
+ case ZFS_ERR_CHECKPOINT_EXISTS:
+ break;
+ case ENOSPC:
+ ztest_record_enospc(FTAG);
+ break;
+ default:
+ fatal(0, "spa_checkpoint(%s) = %d", spa->spa_name, error);
+ }
+}
+
+static void
+ztest_spa_discard_checkpoint(spa_t *spa)
+{
+ ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
+
+ int error = spa_checkpoint_discard(spa->spa_name);
+
+ switch (error) {
+ case 0:
+ case ZFS_ERR_DISCARDING_CHECKPOINT:
+ case ZFS_ERR_NO_CHECKPOINT:
+ break;
+ default:
+ fatal(0, "spa_discard_checkpoint(%s) = %d",
+ spa->spa_name, error);
+ }
+
+}
+
+/* ARGSUSED */
+void
+ztest_spa_checkpoint_create_discard(ztest_ds_t *zd, uint64_t id)
+{
+ spa_t *spa = ztest_spa;
+
+ mutex_enter(&ztest_checkpoint_lock);
+ if (ztest_random(2) == 0) {
+ ztest_spa_checkpoint(spa);
+ } else {
+ ztest_spa_discard_checkpoint(spa);
+ }
+ mutex_exit(&ztest_checkpoint_lock);
+}
+
+
static vdev_t *
vdev_lookup_by_path(vdev_t *vd, const char *path)
{
@@ -2953,6 +3012,8 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
case EEXIST: /* Generic zil_reset() error */
case EBUSY: /* Replay required */
case EACCES: /* Crypto key not loaded */
+ case ZFS_ERR_CHECKPOINT_EXISTS:
+ case ZFS_ERR_DISCARDING_CHECKPOINT:
break;
default:
fatal(0, "spa_vdev_remove() = %d", error);
@@ -2971,10 +3032,15 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
error = spa_vdev_add(spa, nvroot);
nvlist_free(nvroot);
- if (error == ENOSPC)
+ switch (error) {
+ case 0:
+ break;
+ case ENOSPC:
ztest_record_enospc("spa_vdev_add");
- else if (error != 0)
+ break;
+ default:
fatal(0, "spa_vdev_add() = %d", error);
+ }
}
mutex_exit(&ztest_vdev_lock);
@@ -3048,8 +3114,13 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
(ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
error = spa_vdev_add(spa, nvroot);
- if (error != 0)
+
+ switch (error) {
+ case 0:
+ break;
+ default:
fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
+ }
nvlist_free(nvroot);
} else {
/*
@@ -3061,8 +3132,16 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
(void) vdev_online(spa, guid, 0, NULL);
error = spa_vdev_remove(spa, guid, B_FALSE);
- if (error != 0 && error != EBUSY)
+
+ switch (error) {
+ case 0:
+ case EBUSY:
+ case ZFS_ERR_CHECKPOINT_EXISTS:
+ case ZFS_ERR_DISCARDING_CHECKPOINT:
+ break;
+ default:
fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
+ }
}
mutex_exit(&ztest_vdev_lock);
@@ -3166,7 +3245,6 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
--zs->zs_mirrors;
}
mutex_exit(&ztest_vdev_lock);
-
}
/*
@@ -3271,7 +3349,8 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_ALL, FTAG);
error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
if (error != 0 && error != ENODEV && error != EBUSY &&
- error != ENOTSUP)
+ error != ENOTSUP && error != ZFS_ERR_CHECKPOINT_EXISTS &&
+ error != ZFS_ERR_DISCARDING_CHECKPOINT)
fatal(0, "detach (%s) returned %d", oldpath, error);
goto out;
}
@@ -3362,6 +3441,10 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
if (error == EOVERFLOW || error == EBUSY)
expected_error = error;
+ if (error == ZFS_ERR_CHECKPOINT_EXISTS ||
+ error == ZFS_ERR_DISCARDING_CHECKPOINT)
+ expected_error = error;
+
/* XXX workaround 6690467 */
if (error != expected_error && expected_error != EBUSY) {
fatal(0, "attach (%s %llu, %s %llu, %d) "
@@ -3556,6 +3639,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
uint64_t top;
uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
+ mutex_enter(&ztest_checkpoint_lock);
mutex_enter(&ztest_vdev_lock);
spa_config_enter(spa, SCL_STATE, spa, RW_READER);
@@ -3566,8 +3650,9 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
* when the device removal completes).
*/
if (ztest_device_removal_active) {
- spa_config_exit(spa, SCL_STATE, FTAG);
+ spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
+ mutex_exit(&ztest_checkpoint_lock);
return;
}
@@ -3597,6 +3682,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
+ mutex_exit(&ztest_checkpoint_lock);
return;
}
ASSERT(psize > 0);
@@ -3622,6 +3708,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
}
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
+ mutex_exit(&ztest_checkpoint_lock);
return;
}
@@ -3656,6 +3743,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
}
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
+ mutex_exit(&ztest_checkpoint_lock);
return;
}
@@ -3686,6 +3774,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
spa_config_exit(spa, SCL_STATE, spa);
mutex_exit(&ztest_vdev_lock);
+ mutex_exit(&ztest_checkpoint_lock);
}
/*
@@ -5722,7 +5811,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
*/
fd = open(pathrand, O_RDWR);
- if (fd == -1) /* we hit a gap in the device namespace */
+ if (fd == -1) /* we hit a gap in the device namespace */
goto out;
fsize = lseek(fd, 0, SEEK_END);
@@ -6645,6 +6734,7 @@ ztest_run(ztest_shared_t *zs)
* Initialize parent/child shared state.
*/
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
zs->zs_thread_start = gethrtime();
@@ -6696,7 +6786,7 @@ ztest_run(ztest_shared_t *zs)
spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
/*
- * Verify that we can safely inquire about about any object,
+ * Verify that we can safely inquire about any object,
* whether it's allocated or not. To make it interesting,
* we probe a 5-wide window around each power of two.
* This hits all edge cases, including zero and the max.
@@ -6804,6 +6894,7 @@ ztest_run(ztest_shared_t *zs)
mutex_destroy(&zcl.zcl_callbacks_lock);
(void) pthread_rwlock_destroy(&ztest_name_lock);
mutex_destroy(&ztest_vdev_lock);
+ mutex_destroy(&ztest_checkpoint_lock);
}
static void
@@ -6953,6 +7044,7 @@ ztest_import(ztest_shared_t *zs)
int error;
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
kernel_init(FREAD | FWRITE);
@@ -6984,6 +7076,7 @@ ztest_import(ztest_shared_t *zs)
(void) pthread_rwlock_destroy(&ztest_name_lock);
mutex_destroy(&ztest_vdev_lock);
+ mutex_destroy(&ztest_checkpoint_lock);
}
/*
@@ -6998,6 +7091,7 @@ ztest_init(ztest_shared_t *zs)
int i;
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
kernel_init(FREAD | FWRITE);
@@ -7049,6 +7143,7 @@ ztest_init(ztest_shared_t *zs)
(void) pthread_rwlock_destroy(&ztest_name_lock);
mutex_destroy(&ztest_vdev_lock);
+ mutex_destroy(&ztest_checkpoint_lock);
}
static void