diff options
author | Christian Schwarz <[email protected]> | 2019-11-10 23:24:14 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2020-02-11 13:19:12 -0800 |
commit | a73f361fdb2c0a7778e70b482e316054fc2d8630 (patch) | |
tree | 094642f07952d4149c2dab358a35910961d0c42e /module | |
parent | 7b49bbc8164a8a5cd31cf1ba7a6cd88269fec8d0 (diff) |
Implement bookmark copying
This feature allows copying existing bookmarks using
zfs bookmark fs#target fs#newbookmark
There are some niche use cases for such functionality,
e.g. when using bookmarks as markers for replication progress.
Copying redaction bookmarks produces a normal bookmark that
cannot be used for redacted send (we are not duplicating
the redaction object).
ZCP support for bookmarking (both creation and copying) will be
implemented in a separate patch based on this work.
Overview:
- Terminology:
- source = existing snapshot or bookmark
- new/bmark = new bookmark
- Implement bookmark copying in `dsl_bookmark.c`
- create new bookmark node
- copy source's `zbn_phys` to new's `zbn_phys`
- zero-out redaction object id in copy
- Extend existing bookmark ioctl nvlist schema to accept
bookmarks as sources
- => `dsl_bookmark_create_nvl_validate` is authoritative
- use `dsl_dataset_is_before` check for both snapshot
and bookmark sources
- Adjust CLI
- refactor shortname expansion logic in `zfs_do_bookmark`
- Update man pages
- warn about redaction bookmark handling
- Add test cases
- CLI
- pyyzfs libzfs_core bindings
Reviewed-by: Matt Ahrens <[email protected]>
Reviewed-by: Paul Dagnelie <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Christian Schwarz <[email protected]>
Closes #9571
Diffstat (limited to 'module')
-rw-r--r-- | module/zcommon/zfs_namecheck.c | 42 | ||||
-rw-r--r-- | module/zfs/dsl_bookmark.c | 314 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 30 |
3 files changed, 318 insertions, 68 deletions
diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index 3076b8d8b..f8625042a 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -183,6 +183,8 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what) { const char *end; + EQUIV(why == NULL, what == NULL); + /* * Make sure the name is not too long. */ @@ -311,6 +313,44 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what) } /* + * Assert path is a valid bookmark name + */ +int +bookmark_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + int ret = entity_namecheck(path, why, what); + + if (ret == 0 && strchr(path, '#') == NULL) { + if (why != NULL) { + *why = NAME_ERR_NO_POUND; + *what = '#'; + } + return (-1); + } + + return (ret); +} + +/* + * Assert path is a valid snapshot name + */ +int +snapshot_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + int ret = entity_namecheck(path, why, what); + + if (ret == 0 && strchr(path, '@') == NULL) { + if (why != NULL) { + *why = NAME_ERR_NO_AT; + *what = '@'; + } + return (-1); + } + + return (ret); +} + +/* * mountpoint names must be of the following form: * * /[component][/]*[component][/] @@ -420,6 +460,8 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what) EXPORT_SYMBOL(entity_namecheck); EXPORT_SYMBOL(pool_namecheck); EXPORT_SYMBOL(dataset_namecheck); +EXPORT_SYMBOL(bookmark_namecheck); +EXPORT_SYMBOL(snapshot_namecheck); EXPORT_SYMBOL(zfs_component_namecheck); EXPORT_SYMBOL(dataset_nestcheck); EXPORT_SYMBOL(get_dataset_depth); diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index 4d5c601d6..5a73c5300 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -16,6 +16,7 @@ /* * Copyright (c) 2013, 2018 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. + * Copyright 2019, 2020 by Christian Schwarz. All rights reserved. */ #include <sys/zfs_context.h> @@ -55,6 +56,9 @@ dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, } /* + * When reading BOOKMARK_V1 bookmarks, the BOOKMARK_V2 fields are guaranteed + * to be zeroed. + * * Returns ESRCH if bookmark is not found. * Note, we need to use the ZAP rather than the AVL to look up bookmarks * by name, because only the ZAP honors the casesensitivity setting. @@ -116,6 +120,91 @@ dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname, return (error); } +/* + * Validates that + * - bmark is a full dataset path of a bookmark (bookmark_namecheck) + * - source is a full path of a snaphot or bookmark + * ({bookmark,snapshot}_namecheck) + * + * Returns 0 if valid, -1 otherwise. + */ +static int +dsl_bookmark_create_nvl_validate_pair(const char *bmark, const char *source) +{ + if (bookmark_namecheck(bmark, NULL, NULL) != 0) + return (-1); + + int is_bmark, is_snap; + is_bmark = bookmark_namecheck(source, NULL, NULL) == 0; + is_snap = snapshot_namecheck(source, NULL, NULL) == 0; + if (!is_bmark && !is_snap) + return (-1); + + return (0); +} + +/* + * Check that the given nvlist corresponds to the following schema: + * { newbookmark -> source, ... } + * where + * - each pair passes dsl_bookmark_create_nvl_validate_pair + * - all newbookmarks are in the same pool + * - all newbookmarks have unique names + * + * Note that this function is only validates above schema. Callers must ensure + * that the bookmarks can be created, e.g. that sources exist. + * + * Returns 0 if the nvlist adheres to above schema. + * Returns -1 if it doesn't. + */ +int +dsl_bookmark_create_nvl_validate(nvlist_t *bmarks) +{ + char *first; + size_t first_len; + + first = NULL; + for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL); + pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) { + + char *bmark = nvpair_name(pair); + char *source; + + /* list structure: values must be snapshots XOR bookmarks */ + if (nvpair_value_string(pair, &source) != 0) + return (-1); + if (dsl_bookmark_create_nvl_validate_pair(bmark, source) != 0) + return (-1); + + /* same pool check */ + if (first == NULL) { + char *cp = strpbrk(bmark, "/#"); + if (cp == NULL) + return (-1); + first = bmark; + first_len = cp - bmark; + } + if (strncmp(first, bmark, first_len) != 0) + return (-1); + switch (*(bmark + first_len)) { + case '/': /* fallthrough */ + case '#': + break; + default: + return (-1); + } + + /* unique newbookmark names; todo: O(n^2) */ + for (nvpair_t *pair2 = nvlist_next_nvpair(bmarks, pair); + pair2 != NULL; pair2 = nvlist_next_nvpair(bmarks, pair2)) { + if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0) + return (-1); + } + + } + return (0); +} + typedef struct dsl_bookmark_create_redacted_arg { const char *dbcra_bmark; const char *dbcra_snap; @@ -130,36 +219,85 @@ typedef struct dsl_bookmark_create_arg { nvlist_t *dbca_errors; } dsl_bookmark_create_arg_t; +/* + * expects that newbm and source have been validated using + * dsl_bookmark_create_nvl_validate_pair + */ static int -dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name, - dmu_tx_t *tx) +dsl_bookmark_create_check_impl(dsl_pool_t *dp, + const char *newbm, const char *source) { - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *bmark_fs; - char *shortname; - int error; - zfs_bookmark_phys_t bmark_phys = { 0 }; + ASSERT0(dsl_bookmark_create_nvl_validate_pair(newbm, source)); + /* defer source namecheck until we know it's a snapshot or bookmark */ - if (!snapds->ds_is_snapshot) - return (SET_ERROR(EINVAL)); + int error; + dsl_dataset_t *newbm_ds; + char *newbm_short; + zfs_bookmark_phys_t bmark_phys; - error = dsl_bookmark_hold_ds(dp, bookmark_name, - &bmark_fs, FTAG, &shortname); + error = dsl_bookmark_hold_ds(dp, newbm, &newbm_ds, FTAG, &newbm_short); if (error != 0) return (error); - if (!dsl_dataset_is_before(bmark_fs, snapds, 0)) { - dsl_dataset_rele(bmark_fs, FTAG); - return (SET_ERROR(EINVAL)); + /* Verify that the new bookmark does not already exist */ + error = dsl_bookmark_lookup_impl(newbm_ds, newbm_short, &bmark_phys); + switch (error) { + case ESRCH: + /* happy path: new bmark doesn't exist, proceed after switch */ + error = 0; + break; + case 0: + error = SET_ERROR(EEXIST); + goto eholdnewbmds; + default: + /* dsl_bookmark_lookup_impl already did SET_ERRROR */ + goto eholdnewbmds; } - error = dsl_bookmark_lookup_impl(bmark_fs, shortname, - &bmark_phys); - dsl_dataset_rele(bmark_fs, FTAG); - if (error == 0) - return (SET_ERROR(EEXIST)); - if (error == ESRCH) - return (0); + /* error is retval of the following if-cascade */ + if (strchr(source, '@') != NULL) { + dsl_dataset_t *source_snap_ds; + ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0); + error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds); + if (error == 0) { + VERIFY(source_snap_ds->ds_is_snapshot); + /* + * Verify that source snapshot is an earlier point in + * newbm_ds's timeline (source may be newbm_ds's origin) + */ + if (!dsl_dataset_is_before(newbm_ds, source_snap_ds, 0)) + error = SET_ERROR( + ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR); + dsl_dataset_rele(source_snap_ds, FTAG); + } + } else if (strchr(source, '#') != NULL) { + zfs_bookmark_phys_t source_phys; + ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0); + /* + * Source must exists and be an earlier point in newbm_ds's + * timeline (newbm_ds's origin may be a snap of source's ds) + */ + error = dsl_bookmark_lookup(dp, source, newbm_ds, &source_phys); + switch (error) { + case 0: + break; /* happy path */ + case EXDEV: + error = SET_ERROR(ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR); + break; + default: + /* dsl_bookmark_lookup already did SET_ERRROR */ + break; + } + } else { + /* + * dsl_bookmark_create_nvl_validate validates that source is + * either snapshot or bookmark + */ + panic("unreachable code: %s", source); + } + +eholdnewbmds: + dsl_dataset_rele(newbm_ds, FTAG); return (error); } @@ -167,33 +305,37 @@ static int dsl_bookmark_create_check(void *arg, dmu_tx_t *tx) { dsl_bookmark_create_arg_t *dbca = arg; + int rv = 0; + int schema_err = 0; ASSERT3P(dbca, !=, NULL); ASSERT3P(dbca->dbca_bmarks, !=, NULL); + /* dbca->dbca_errors is allowed to be NULL */ dsl_pool_t *dp = dmu_tx_pool(tx); - int rv = 0; if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS)) return (SET_ERROR(ENOTSUP)); + if (dsl_bookmark_create_nvl_validate(dbca->dbca_bmarks) != 0) + rv = schema_err = SET_ERROR(EINVAL); + for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL); pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) { - dsl_dataset_t *snapds; - int error; + char *new = nvpair_name(pair); - /* note: validity of nvlist checked by ioctl layer */ - error = dsl_dataset_hold(dp, fnvpair_value_string(pair), - FTAG, &snapds); + int error = schema_err; if (error == 0) { - error = dsl_bookmark_create_check_impl(snapds, - nvpair_name(pair), tx); - dsl_dataset_rele(snapds, FTAG); + char *source = fnvpair_value_string(pair); + error = dsl_bookmark_create_check_impl(dp, new, source); + if (error != 0) + error = SET_ERROR(error); } + if (error != 0) { rv = error; if (dbca->dbca_errors != NULL) fnvlist_add_int32(dbca->dbca_errors, - nvpair_name(pair), error); + new, error); } } @@ -259,6 +401,10 @@ dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap) } } +/* + * Add dsl_bookmark_node_t `dbn` to the given dataset and increment appropriate + * SPA feature counters. + */ void dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn, dmu_tx_t *tx) @@ -308,7 +454,7 @@ dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn, * list, and store the object number of the redaction list in redact_obj. */ static void -dsl_bookmark_create_sync_impl(const char *bookmark, const char *snapshot, +dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, dmu_tx_t *tx, uint64_t num_redact_snaps, uint64_t *redact_snaps, void *tag, redaction_list_t **redaction_list) { @@ -381,7 +527,76 @@ dsl_bookmark_create_sync_impl(const char *bookmark, const char *snapshot, dsl_dataset_rele(snapds, FTAG); } + static void +dsl_bookmark_create_sync_impl_book( + const char *new_name, const char *source_name, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *bmark_fs_source, *bmark_fs_new; + char *source_shortname, *new_shortname; + zfs_bookmark_phys_t source_phys; + + VERIFY0(dsl_bookmark_hold_ds(dp, source_name, &bmark_fs_source, FTAG, + &source_shortname)); + VERIFY0(dsl_bookmark_hold_ds(dp, new_name, &bmark_fs_new, FTAG, + &new_shortname)); + + /* + * create a copy of the source bookmark by copying most of its members + * + * Caveat: bookmarking a redaction bookmark yields a normal bookmark + * ----------------------------------------------------------------- + * Reasoning: + * - The zbm_redaction_obj would be referred to by both source and new + * bookmark, but would be destroyed once either source or new is + * destroyed, resulting in use-after-free of the referrred object. + * - User expectation when issuing the `zfs bookmark` command is that + * a normal bookmark of the source is created + * + * Design Alternatives For Full Redaction Bookmark Copying: + * - reference-count the redaction object => would require on-disk + * format change for existing redaction objects + * - Copy the redaction object => cannot be done in syncing context + * because the redaction object might be too large + */ + + VERIFY0(dsl_bookmark_lookup_impl(bmark_fs_source, source_shortname, + &source_phys)); + dsl_bookmark_node_t *new_dbn = dsl_bookmark_node_alloc(new_shortname); + + memcpy(&new_dbn->dbn_phys, &source_phys, sizeof (source_phys)); + new_dbn->dbn_phys.zbm_redaction_obj = 0; + + /* update feature counters */ + if (new_dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) { + spa_feature_incr(dp->dp_spa, + SPA_FEATURE_BOOKMARK_WRITTEN, tx); + } + /* no need for redaction bookmark counter; nulled zbm_redaction_obj */ + /* dsl_bookmark_node_add bumps bookmarks and v2-bookmarks counter */ + + /* + * write new bookmark + * + * Note that dsl_bookmark_lookup_impl guarantees that, if source is a + * v1 bookmark, the v2-only fields are zeroed. + * And dsl_bookmark_node_add writes back a v1-sized bookmark if + * v2 bookmarks are disabled and/or v2-only fields are zeroed. + * => bookmark copying works on pre-bookmark-v2 pools + */ + dsl_bookmark_node_add(bmark_fs_new, new_dbn, tx); + + spa_history_log_internal_ds(bmark_fs_source, "bookmark", tx, + "name=%s creation_txg=%llu source_guid=%llu", + new_shortname, (longlong_t)new_dbn->dbn_phys.zbm_creation_txg, + (longlong_t)source_phys.zbm_guid); + + dsl_dataset_rele(bmark_fs_source, FTAG); + dsl_dataset_rele(bmark_fs_new, FTAG); +} + +void dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx) { dsl_bookmark_create_arg_t *dbca = arg; @@ -391,8 +606,19 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx) for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL); pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) { - dsl_bookmark_create_sync_impl(nvpair_name(pair), - fnvpair_value_string(pair), tx, 0, NULL, NULL, NULL); + + char *new = nvpair_name(pair); + char *source = fnvpair_value_string(pair); + + if (strchr(source, '@') != NULL) { + dsl_bookmark_create_sync_impl_snap(new, source, tx, + 0, NULL, NULL, NULL); + } else if (strchr(source, '#') != NULL) { + dsl_bookmark_create_sync_impl_book(new, source, tx); + } else { + panic("unreachable code"); + } + } } @@ -422,7 +648,6 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx) { dsl_bookmark_create_redacted_arg_t *dbcra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *snapds; int rv = 0; if (!spa_feature_is_enabled(dp->dp_spa, @@ -436,13 +661,12 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx) sizeof (redaction_list_phys_t)) / sizeof (uint64_t)) return (SET_ERROR(E2BIG)); - rv = dsl_dataset_hold(dp, dbcra->dbcra_snap, - FTAG, &snapds); - if (rv == 0) { - rv = dsl_bookmark_create_check_impl(snapds, dbcra->dbcra_bmark, - tx); - dsl_dataset_rele(snapds, FTAG); - } + if (dsl_bookmark_create_nvl_validate_pair( + dbcra->dbcra_bmark, dbcra->dbcra_snap) != 0) + return (SET_ERROR(EINVAL)); + + rv = dsl_bookmark_create_check_impl(dp, + dbcra->dbcra_bmark, dbcra->dbcra_snap); return (rv); } @@ -450,9 +674,9 @@ static void dsl_bookmark_create_redacted_sync(void *arg, dmu_tx_t *tx) { dsl_bookmark_create_redacted_arg_t *dbcra = arg; - dsl_bookmark_create_sync_impl(dbcra->dbcra_bmark, dbcra->dbcra_snap, tx, - dbcra->dbcra_numsnaps, dbcra->dbcra_snaps, dbcra->dbcra_tag, - dbcra->dbcra_rl); + dsl_bookmark_create_sync_impl_snap(dbcra->dbcra_bmark, + dbcra->dbcra_snap, tx, dbcra->dbcra_numsnaps, dbcra->dbcra_snaps, + dbcra->dbcra_tag, dbcra->dbcra_rl); } int diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index b2517d84f..7ef54a098 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -37,6 +37,7 @@ * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. */ /* @@ -3614,11 +3615,13 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) } /* - * Create bookmarks. Bookmark names are of the form <fs>#<bmark>. - * All bookmarks must be in the same pool. + * Create bookmarks. The bookmark names are of the form <fs>#<bmark>. + * All bookmarks and snapshots must be in the same pool. + * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail. * * innvl: { - * bookmark1 -> snapshot1, bookmark2 -> snapshot2 + * new_bookmark1 -> existing_snapshot, + * new_bookmark2 -> existing_bookmark, * } * * outnvl: bookmark -> error code (int32) @@ -3632,25 +3635,6 @@ static const zfs_ioc_key_t zfs_keys_bookmark[] = { static int zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL); - pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) { - char *snap_name; - - /* - * Verify the snapshot argument. - */ - if (nvpair_value_string(pair, &snap_name) != 0) - return (SET_ERROR(EINVAL)); - - - /* Verify that the keys (bookmarks) are unique */ - for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair); - pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) { - if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0) - return (SET_ERROR(EINVAL)); - } - } - return (dsl_bookmark_create(innvl, outnvl)); } @@ -4164,7 +4148,7 @@ recursive_unmount(const char *fsname, void *arg) * snapname is the snapshot to redact. * innvl: { * "bookname" -> (string) - * name of the redaction bookmark to generate + * shortname of the redaction bookmark to generate * "snapnv" -> (nvlist, values ignored) * snapshots to redact snapname with respect to * } |