aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorChristian Schwarz <[email protected]>2019-11-10 23:24:14 -0800
committerBrian Behlendorf <[email protected]>2020-02-11 13:19:12 -0800
commita73f361fdb2c0a7778e70b482e316054fc2d8630 (patch)
tree094642f07952d4149c2dab358a35910961d0c42e /module
parent7b49bbc8164a8a5cd31cf1ba7a6cd88269fec8d0 (diff)
Implement bookmark copying
This feature allows copying existing bookmarks using zfs bookmark fs#target fs#newbookmark There are some niche use cases for such functionality, e.g. when using bookmarks as markers for replication progress. Copying redaction bookmarks produces a normal bookmark that cannot be used for redacted send (we are not duplicating the redaction object). ZCP support for bookmarking (both creation and copying) will be implemented in a separate patch based on this work. Overview: - Terminology: - source = existing snapshot or bookmark - new/bmark = new bookmark - Implement bookmark copying in `dsl_bookmark.c` - create new bookmark node - copy source's `zbn_phys` to new's `zbn_phys` - zero-out redaction object id in copy - Extend existing bookmark ioctl nvlist schema to accept bookmarks as sources - => `dsl_bookmark_create_nvl_validate` is authoritative - use `dsl_dataset_is_before` check for both snapshot and bookmark sources - Adjust CLI - refactor shortname expansion logic in `zfs_do_bookmark` - Update man pages - warn about redaction bookmark handling - Add test cases - CLI - pyyzfs libzfs_core bindings Reviewed-by: Matt Ahrens <[email protected]> Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Christian Schwarz <[email protected]> Closes #9571
Diffstat (limited to 'module')
-rw-r--r--module/zcommon/zfs_namecheck.c42
-rw-r--r--module/zfs/dsl_bookmark.c314
-rw-r--r--module/zfs/zfs_ioctl.c30
3 files changed, 318 insertions, 68 deletions
diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c
index 3076b8d8b..f8625042a 100644
--- a/module/zcommon/zfs_namecheck.c
+++ b/module/zcommon/zfs_namecheck.c
@@ -183,6 +183,8 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what)
{
const char *end;
+ EQUIV(why == NULL, what == NULL);
+
/*
* Make sure the name is not too long.
*/
@@ -311,6 +313,44 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
}
/*
+ * Assert path is a valid bookmark name
+ */
+int
+bookmark_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+ int ret = entity_namecheck(path, why, what);
+
+ if (ret == 0 && strchr(path, '#') == NULL) {
+ if (why != NULL) {
+ *why = NAME_ERR_NO_POUND;
+ *what = '#';
+ }
+ return (-1);
+ }
+
+ return (ret);
+}
+
+/*
+ * Assert path is a valid snapshot name
+ */
+int
+snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+ int ret = entity_namecheck(path, why, what);
+
+ if (ret == 0 && strchr(path, '@') == NULL) {
+ if (why != NULL) {
+ *why = NAME_ERR_NO_AT;
+ *what = '@';
+ }
+ return (-1);
+ }
+
+ return (ret);
+}
+
+/*
* mountpoint names must be of the following form:
*
* /[component][/]*[component][/]
@@ -420,6 +460,8 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
EXPORT_SYMBOL(entity_namecheck);
EXPORT_SYMBOL(pool_namecheck);
EXPORT_SYMBOL(dataset_namecheck);
+EXPORT_SYMBOL(bookmark_namecheck);
+EXPORT_SYMBOL(snapshot_namecheck);
EXPORT_SYMBOL(zfs_component_namecheck);
EXPORT_SYMBOL(dataset_nestcheck);
EXPORT_SYMBOL(get_dataset_depth);
diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c
index 4d5c601d6..5a73c5300 100644
--- a/module/zfs/dsl_bookmark.c
+++ b/module/zfs/dsl_bookmark.c
@@ -16,6 +16,7 @@
/*
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
+ * Copyright 2019, 2020 by Christian Schwarz. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -55,6 +56,9 @@ dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
}
/*
+ * When reading BOOKMARK_V1 bookmarks, the BOOKMARK_V2 fields are guaranteed
+ * to be zeroed.
+ *
* Returns ESRCH if bookmark is not found.
* Note, we need to use the ZAP rather than the AVL to look up bookmarks
* by name, because only the ZAP honors the casesensitivity setting.
@@ -116,6 +120,91 @@ dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname,
return (error);
}
+/*
+ * Validates that
+ * - bmark is a full dataset path of a bookmark (bookmark_namecheck)
+ * - source is a full path of a snaphot or bookmark
+ * ({bookmark,snapshot}_namecheck)
+ *
+ * Returns 0 if valid, -1 otherwise.
+ */
+static int
+dsl_bookmark_create_nvl_validate_pair(const char *bmark, const char *source)
+{
+ if (bookmark_namecheck(bmark, NULL, NULL) != 0)
+ return (-1);
+
+ int is_bmark, is_snap;
+ is_bmark = bookmark_namecheck(source, NULL, NULL) == 0;
+ is_snap = snapshot_namecheck(source, NULL, NULL) == 0;
+ if (!is_bmark && !is_snap)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Check that the given nvlist corresponds to the following schema:
+ * { newbookmark -> source, ... }
+ * where
+ * - each pair passes dsl_bookmark_create_nvl_validate_pair
+ * - all newbookmarks are in the same pool
+ * - all newbookmarks have unique names
+ *
+ * Note that this function is only validates above schema. Callers must ensure
+ * that the bookmarks can be created, e.g. that sources exist.
+ *
+ * Returns 0 if the nvlist adheres to above schema.
+ * Returns -1 if it doesn't.
+ */
+int
+dsl_bookmark_create_nvl_validate(nvlist_t *bmarks)
+{
+ char *first;
+ size_t first_len;
+
+ first = NULL;
+ for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) {
+
+ char *bmark = nvpair_name(pair);
+ char *source;
+
+ /* list structure: values must be snapshots XOR bookmarks */
+ if (nvpair_value_string(pair, &source) != 0)
+ return (-1);
+ if (dsl_bookmark_create_nvl_validate_pair(bmark, source) != 0)
+ return (-1);
+
+ /* same pool check */
+ if (first == NULL) {
+ char *cp = strpbrk(bmark, "/#");
+ if (cp == NULL)
+ return (-1);
+ first = bmark;
+ first_len = cp - bmark;
+ }
+ if (strncmp(first, bmark, first_len) != 0)
+ return (-1);
+ switch (*(bmark + first_len)) {
+ case '/': /* fallthrough */
+ case '#':
+ break;
+ default:
+ return (-1);
+ }
+
+ /* unique newbookmark names; todo: O(n^2) */
+ for (nvpair_t *pair2 = nvlist_next_nvpair(bmarks, pair);
+ pair2 != NULL; pair2 = nvlist_next_nvpair(bmarks, pair2)) {
+ if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
+ return (-1);
+ }
+
+ }
+ return (0);
+}
+
typedef struct dsl_bookmark_create_redacted_arg {
const char *dbcra_bmark;
const char *dbcra_snap;
@@ -130,36 +219,85 @@ typedef struct dsl_bookmark_create_arg {
nvlist_t *dbca_errors;
} dsl_bookmark_create_arg_t;
+/*
+ * expects that newbm and source have been validated using
+ * dsl_bookmark_create_nvl_validate_pair
+ */
static int
-dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name,
- dmu_tx_t *tx)
+dsl_bookmark_create_check_impl(dsl_pool_t *dp,
+ const char *newbm, const char *source)
{
- dsl_pool_t *dp = dmu_tx_pool(tx);
- dsl_dataset_t *bmark_fs;
- char *shortname;
- int error;
- zfs_bookmark_phys_t bmark_phys = { 0 };
+ ASSERT0(dsl_bookmark_create_nvl_validate_pair(newbm, source));
+ /* defer source namecheck until we know it's a snapshot or bookmark */
- if (!snapds->ds_is_snapshot)
- return (SET_ERROR(EINVAL));
+ int error;
+ dsl_dataset_t *newbm_ds;
+ char *newbm_short;
+ zfs_bookmark_phys_t bmark_phys;
- error = dsl_bookmark_hold_ds(dp, bookmark_name,
- &bmark_fs, FTAG, &shortname);
+ error = dsl_bookmark_hold_ds(dp, newbm, &newbm_ds, FTAG, &newbm_short);
if (error != 0)
return (error);
- if (!dsl_dataset_is_before(bmark_fs, snapds, 0)) {
- dsl_dataset_rele(bmark_fs, FTAG);
- return (SET_ERROR(EINVAL));
+ /* Verify that the new bookmark does not already exist */
+ error = dsl_bookmark_lookup_impl(newbm_ds, newbm_short, &bmark_phys);
+ switch (error) {
+ case ESRCH:
+ /* happy path: new bmark doesn't exist, proceed after switch */
+ error = 0;
+ break;
+ case 0:
+ error = SET_ERROR(EEXIST);
+ goto eholdnewbmds;
+ default:
+ /* dsl_bookmark_lookup_impl already did SET_ERRROR */
+ goto eholdnewbmds;
}
- error = dsl_bookmark_lookup_impl(bmark_fs, shortname,
- &bmark_phys);
- dsl_dataset_rele(bmark_fs, FTAG);
- if (error == 0)
- return (SET_ERROR(EEXIST));
- if (error == ESRCH)
- return (0);
+ /* error is retval of the following if-cascade */
+ if (strchr(source, '@') != NULL) {
+ dsl_dataset_t *source_snap_ds;
+ ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0);
+ error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds);
+ if (error == 0) {
+ VERIFY(source_snap_ds->ds_is_snapshot);
+ /*
+ * Verify that source snapshot is an earlier point in
+ * newbm_ds's timeline (source may be newbm_ds's origin)
+ */
+ if (!dsl_dataset_is_before(newbm_ds, source_snap_ds, 0))
+ error = SET_ERROR(
+ ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
+ dsl_dataset_rele(source_snap_ds, FTAG);
+ }
+ } else if (strchr(source, '#') != NULL) {
+ zfs_bookmark_phys_t source_phys;
+ ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0);
+ /*
+ * Source must exists and be an earlier point in newbm_ds's
+ * timeline (newbm_ds's origin may be a snap of source's ds)
+ */
+ error = dsl_bookmark_lookup(dp, source, newbm_ds, &source_phys);
+ switch (error) {
+ case 0:
+ break; /* happy path */
+ case EXDEV:
+ error = SET_ERROR(ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
+ break;
+ default:
+ /* dsl_bookmark_lookup already did SET_ERRROR */
+ break;
+ }
+ } else {
+ /*
+ * dsl_bookmark_create_nvl_validate validates that source is
+ * either snapshot or bookmark
+ */
+ panic("unreachable code: %s", source);
+ }
+
+eholdnewbmds:
+ dsl_dataset_rele(newbm_ds, FTAG);
return (error);
}
@@ -167,33 +305,37 @@ static int
dsl_bookmark_create_check(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_arg_t *dbca = arg;
+ int rv = 0;
+ int schema_err = 0;
ASSERT3P(dbca, !=, NULL);
ASSERT3P(dbca->dbca_bmarks, !=, NULL);
+ /* dbca->dbca_errors is allowed to be NULL */
dsl_pool_t *dp = dmu_tx_pool(tx);
- int rv = 0;
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
+ if (dsl_bookmark_create_nvl_validate(dbca->dbca_bmarks) != 0)
+ rv = schema_err = SET_ERROR(EINVAL);
+
for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
- dsl_dataset_t *snapds;
- int error;
+ char *new = nvpair_name(pair);
- /* note: validity of nvlist checked by ioctl layer */
- error = dsl_dataset_hold(dp, fnvpair_value_string(pair),
- FTAG, &snapds);
+ int error = schema_err;
if (error == 0) {
- error = dsl_bookmark_create_check_impl(snapds,
- nvpair_name(pair), tx);
- dsl_dataset_rele(snapds, FTAG);
+ char *source = fnvpair_value_string(pair);
+ error = dsl_bookmark_create_check_impl(dp, new, source);
+ if (error != 0)
+ error = SET_ERROR(error);
}
+
if (error != 0) {
rv = error;
if (dbca->dbca_errors != NULL)
fnvlist_add_int32(dbca->dbca_errors,
- nvpair_name(pair), error);
+ new, error);
}
}
@@ -259,6 +401,10 @@ dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap)
}
}
+/*
+ * Add dsl_bookmark_node_t `dbn` to the given dataset and increment appropriate
+ * SPA feature counters.
+ */
void
dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn,
dmu_tx_t *tx)
@@ -308,7 +454,7 @@ dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn,
* list, and store the object number of the redaction list in redact_obj.
*/
static void
-dsl_bookmark_create_sync_impl(const char *bookmark, const char *snapshot,
+dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
dmu_tx_t *tx, uint64_t num_redact_snaps, uint64_t *redact_snaps, void *tag,
redaction_list_t **redaction_list)
{
@@ -381,7 +527,76 @@ dsl_bookmark_create_sync_impl(const char *bookmark, const char *snapshot,
dsl_dataset_rele(snapds, FTAG);
}
+
static void
+dsl_bookmark_create_sync_impl_book(
+ const char *new_name, const char *source_name, dmu_tx_t *tx)
+{
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *bmark_fs_source, *bmark_fs_new;
+ char *source_shortname, *new_shortname;
+ zfs_bookmark_phys_t source_phys;
+
+ VERIFY0(dsl_bookmark_hold_ds(dp, source_name, &bmark_fs_source, FTAG,
+ &source_shortname));
+ VERIFY0(dsl_bookmark_hold_ds(dp, new_name, &bmark_fs_new, FTAG,
+ &new_shortname));
+
+ /*
+ * create a copy of the source bookmark by copying most of its members
+ *
+ * Caveat: bookmarking a redaction bookmark yields a normal bookmark
+ * -----------------------------------------------------------------
+ * Reasoning:
+ * - The zbm_redaction_obj would be referred to by both source and new
+ * bookmark, but would be destroyed once either source or new is
+ * destroyed, resulting in use-after-free of the referrred object.
+ * - User expectation when issuing the `zfs bookmark` command is that
+ * a normal bookmark of the source is created
+ *
+ * Design Alternatives For Full Redaction Bookmark Copying:
+ * - reference-count the redaction object => would require on-disk
+ * format change for existing redaction objects
+ * - Copy the redaction object => cannot be done in syncing context
+ * because the redaction object might be too large
+ */
+
+ VERIFY0(dsl_bookmark_lookup_impl(bmark_fs_source, source_shortname,
+ &source_phys));
+ dsl_bookmark_node_t *new_dbn = dsl_bookmark_node_alloc(new_shortname);
+
+ memcpy(&new_dbn->dbn_phys, &source_phys, sizeof (source_phys));
+ new_dbn->dbn_phys.zbm_redaction_obj = 0;
+
+ /* update feature counters */
+ if (new_dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
+ spa_feature_incr(dp->dp_spa,
+ SPA_FEATURE_BOOKMARK_WRITTEN, tx);
+ }
+ /* no need for redaction bookmark counter; nulled zbm_redaction_obj */
+ /* dsl_bookmark_node_add bumps bookmarks and v2-bookmarks counter */
+
+ /*
+ * write new bookmark
+ *
+ * Note that dsl_bookmark_lookup_impl guarantees that, if source is a
+ * v1 bookmark, the v2-only fields are zeroed.
+ * And dsl_bookmark_node_add writes back a v1-sized bookmark if
+ * v2 bookmarks are disabled and/or v2-only fields are zeroed.
+ * => bookmark copying works on pre-bookmark-v2 pools
+ */
+ dsl_bookmark_node_add(bmark_fs_new, new_dbn, tx);
+
+ spa_history_log_internal_ds(bmark_fs_source, "bookmark", tx,
+ "name=%s creation_txg=%llu source_guid=%llu",
+ new_shortname, (longlong_t)new_dbn->dbn_phys.zbm_creation_txg,
+ (longlong_t)source_phys.zbm_guid);
+
+ dsl_dataset_rele(bmark_fs_source, FTAG);
+ dsl_dataset_rele(bmark_fs_new, FTAG);
+}
+
+void
dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_arg_t *dbca = arg;
@@ -391,8 +606,19 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
- dsl_bookmark_create_sync_impl(nvpair_name(pair),
- fnvpair_value_string(pair), tx, 0, NULL, NULL, NULL);
+
+ char *new = nvpair_name(pair);
+ char *source = fnvpair_value_string(pair);
+
+ if (strchr(source, '@') != NULL) {
+ dsl_bookmark_create_sync_impl_snap(new, source, tx,
+ 0, NULL, NULL, NULL);
+ } else if (strchr(source, '#') != NULL) {
+ dsl_bookmark_create_sync_impl_book(new, source, tx);
+ } else {
+ panic("unreachable code");
+ }
+
}
}
@@ -422,7 +648,6 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_redacted_arg_t *dbcra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
- dsl_dataset_t *snapds;
int rv = 0;
if (!spa_feature_is_enabled(dp->dp_spa,
@@ -436,13 +661,12 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
return (SET_ERROR(E2BIG));
- rv = dsl_dataset_hold(dp, dbcra->dbcra_snap,
- FTAG, &snapds);
- if (rv == 0) {
- rv = dsl_bookmark_create_check_impl(snapds, dbcra->dbcra_bmark,
- tx);
- dsl_dataset_rele(snapds, FTAG);
- }
+ if (dsl_bookmark_create_nvl_validate_pair(
+ dbcra->dbcra_bmark, dbcra->dbcra_snap) != 0)
+ return (SET_ERROR(EINVAL));
+
+ rv = dsl_bookmark_create_check_impl(dp,
+ dbcra->dbcra_bmark, dbcra->dbcra_snap);
return (rv);
}
@@ -450,9 +674,9 @@ static void
dsl_bookmark_create_redacted_sync(void *arg, dmu_tx_t *tx)
{
dsl_bookmark_create_redacted_arg_t *dbcra = arg;
- dsl_bookmark_create_sync_impl(dbcra->dbcra_bmark, dbcra->dbcra_snap, tx,
- dbcra->dbcra_numsnaps, dbcra->dbcra_snaps, dbcra->dbcra_tag,
- dbcra->dbcra_rl);
+ dsl_bookmark_create_sync_impl_snap(dbcra->dbcra_bmark,
+ dbcra->dbcra_snap, tx, dbcra->dbcra_numsnaps, dbcra->dbcra_snaps,
+ dbcra->dbcra_tag, dbcra->dbcra_rl);
}
int
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index b2517d84f..7ef54a098 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -37,6 +37,7 @@
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
*/
/*
@@ -3614,11 +3615,13 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
}
/*
- * Create bookmarks. Bookmark names are of the form <fs>#<bmark>.
- * All bookmarks must be in the same pool.
+ * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
+ * All bookmarks and snapshots must be in the same pool.
+ * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
*
* innvl: {
- * bookmark1 -> snapshot1, bookmark2 -> snapshot2
+ * new_bookmark1 -> existing_snapshot,
+ * new_bookmark2 -> existing_bookmark,
* }
*
* outnvl: bookmark -> error code (int32)
@@ -3632,25 +3635,6 @@ static const zfs_ioc_key_t zfs_keys_bookmark[] = {
static int
zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
{
- for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
- pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
- char *snap_name;
-
- /*
- * Verify the snapshot argument.
- */
- if (nvpair_value_string(pair, &snap_name) != 0)
- return (SET_ERROR(EINVAL));
-
-
- /* Verify that the keys (bookmarks) are unique */
- for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
- pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
- if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
- return (SET_ERROR(EINVAL));
- }
- }
-
return (dsl_bookmark_create(innvl, outnvl));
}
@@ -4164,7 +4148,7 @@ recursive_unmount(const char *fsname, void *arg)
* snapname is the snapshot to redact.
* innvl: {
* "bookname" -> (string)
- * name of the redaction bookmark to generate
+ * shortname of the redaction bookmark to generate
* "snapnv" -> (nvlist, values ignored)
* snapshots to redact snapname with respect to
* }