diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/Makefile.in | 1 | ||||
-rw-r--r-- | module/zfs/dmu_diff.c | 2 | ||||
-rw-r--r-- | module/zfs/dmu_send.c | 126 | ||||
-rw-r--r-- | module/zfs/dsl_bookmark.c | 459 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 48 | ||||
-rw-r--r-- | module/zfs/dsl_destroy.c | 6 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 2 | ||||
-rw-r--r-- | module/zfs/zfeature_common.c | 18 | ||||
-rw-r--r-- | module/zfs/zfs_ctldir.c | 4 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 216 |
10 files changed, 807 insertions, 75 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 9701ff2bb..56ecd4918 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -25,6 +25,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/dnode_sync.o $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_dataset.o $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_deadlist.o $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_deleg.o +$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_bookmark.o $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_dir.o $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_pool.o $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_prop.o diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index 8d7385539..a2130b131 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -187,7 +187,7 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name, return (error); } - if (!dsl_dataset_is_before(tosnap, fromsnap)) { + if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) { dsl_dataset_rele(fromsnap, FTAG); dsl_dataset_rele(tosnap, FTAG); dsl_pool_rele(dp, FTAG); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 15abdeacb..885da23ba 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -50,6 +50,7 @@ #include <sys/zfs_onexit.h> #include <sys/dmu_send.h> #include <sys/dsl_destroy.h> +#include <sys/dsl_bookmark.h> /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ int zfs_send_corrupt_data = B_FALSE; @@ -385,6 +386,12 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (zb->zb_object != DMU_META_DNODE_OBJECT && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { return (0); + } else if (zb->zb_level == ZB_ZIL_LEVEL) { + /* + * If we are sending a non-snapshot (which is allowed on + * read-only pools), it may have a ZIL, which must be ignored. + */ + return (0); } else if (BP_IS_HOLE(bp) && zb->zb_object == DMU_META_DNODE_OBJECT) { uint64_t span = BP_SPAN(dnp, zb->zb_level); @@ -433,6 +440,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); + ASSERT0(zb->zb_level); if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) { @@ -460,11 +468,12 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } /* - * Releases dp, ds, and fromds, using the specified tag. + * Releases dp using the specified tag. */ static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, - dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off) + zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, + vnode_t *vp, offset_t *off) { objset_t *os; dmu_replay_record_t *drr; @@ -472,18 +481,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, int err; uint64_t fromtxg = 0; - if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) { - dsl_dataset_rele(fromds, tag); - dsl_dataset_rele(ds, tag); - dsl_pool_rele(dp, tag); - return (SET_ERROR(EXDEV)); - } - err = dmu_objset_from_ds(ds, &os); if (err != 0) { - if (fromds != NULL) - dsl_dataset_rele(fromds, tag); - dsl_dataset_rele(ds, tag); dsl_pool_rele(dp, tag); return (err); } @@ -499,9 +498,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, uint64_t version; if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { kmem_free(drr, sizeof (dmu_replay_record_t)); - if (fromds != NULL) - dsl_dataset_rele(fromds, tag); - dsl_dataset_rele(ds, tag); dsl_pool_rele(dp, tag); return (SET_ERROR(EINVAL)); } @@ -516,20 +512,20 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); - if (fromds != NULL && ds->ds_dir != fromds->ds_dir) + if (is_clone) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; - if (fromds != NULL) - drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; + if (fromzb != NULL) { + drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; + fromtxg = fromzb->zbm_creation_txg; + } dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); - - if (fromds != NULL) { - fromtxg = fromds->ds_phys->ds_creation_txg; - dsl_dataset_rele(fromds, tag); - fromds = NULL; + if (!dsl_dataset_is_snapshot(ds)) { + (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", + sizeof (drr->drr_u.drr_begin.drr_toname)); } dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -543,7 +539,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, dsp->dsa_toguid = ds->ds_phys->ds_guid; ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); dsp->dsa_pending_op = PENDING_NONE; - dsp->dsa_incremental = (fromtxg != 0); + dsp->dsa_incremental = (fromzb != NULL); mutex_enter(&ds->ds_sendstream_lock); list_insert_head(&ds->ds_sendstreams, dsp); @@ -589,7 +585,6 @@ out: kmem_free(dsp, sizeof (dmu_sendarg_t)); dsl_dataset_long_rele(ds, FTAG); - dsl_dataset_rele(ds, tag); return (err); } @@ -614,15 +609,30 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, } if (fromsnap != 0) { + zfs_bookmark_phys_t zb; + boolean_t is_clone; + err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); if (err != 0) { dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (err); } + if (!dsl_dataset_is_before(ds, fromds, 0)) + err = SET_ERROR(EXDEV); + zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; + zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; + zb.zbm_guid = fromds->ds_phys->ds_guid; + is_clone = (fromds->ds_dir != ds->ds_dir); + dsl_dataset_rele(fromds, FTAG); + err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, + outfd, vp, off); + } else { + err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, + outfd, vp, off); } - - return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off)); + dsl_dataset_rele(ds, FTAG); + return (err); } int @@ -631,33 +641,79 @@ dmu_send(const char *tosnap, const char *fromsnap, { dsl_pool_t *dp; dsl_dataset_t *ds; - dsl_dataset_t *fromds = NULL; int err; + boolean_t owned = B_FALSE; - if (strchr(tosnap, '@') == NULL) - return (SET_ERROR(EINVAL)); - if (fromsnap != NULL && strchr(fromsnap, '@') == NULL) + if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) return (SET_ERROR(EINVAL)); err = dsl_pool_hold(tosnap, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { + /* + * We are sending a filesystem or volume. Ensure + * that it doesn't change by owning the dataset. + */ + err = dsl_dataset_own(dp, tosnap, FTAG, &ds); + owned = B_TRUE; + } else { + err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + } if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); } if (fromsnap != NULL) { - err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); + zfs_bookmark_phys_t zb; + boolean_t is_clone = B_FALSE; + int fsnamelen = strchr(tosnap, '@') - tosnap; + + /* + * If the fromsnap is in a different filesystem, then + * mark the send stream as a clone. + */ + if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || + (fromsnap[fsnamelen] != '@' && + fromsnap[fsnamelen] != '#')) { + is_clone = B_TRUE; + } + + if (strchr(fromsnap, '@')) { + dsl_dataset_t *fromds; + err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); + if (err == 0) { + if (!dsl_dataset_is_before(ds, fromds, 0)) + err = SET_ERROR(EXDEV); + zb.zbm_creation_time = + fromds->ds_phys->ds_creation_time; + zb.zbm_creation_txg = + fromds->ds_phys->ds_creation_txg; + zb.zbm_guid = fromds->ds_phys->ds_guid; + is_clone = (ds->ds_dir != fromds->ds_dir); + dsl_dataset_rele(fromds, FTAG); + } + } else { + err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); + } if (err != 0) { dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (err); } + err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, + outfd, vp, off); + } else { + err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, + outfd, vp, off); } - return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off)); + if (owned) + dsl_dataset_disown(ds, FTAG); + else + dsl_dataset_rele(ds, FTAG); + return (err); } int @@ -677,7 +733,7 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) * fromsnap must be an earlier snapshot from the same fs as tosnap, * or the origin's fs. */ - if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) + if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) return (SET_ERROR(EXDEV)); /* Get uncompressed size estimate of changed data. */ diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c new file mode 100644 index 000000000..2cae5cd4d --- /dev/null +++ b/module/zfs/dsl_bookmark.c @@ -0,0 +1,459 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> +#include <sys/dmu_impl.h> +#include <sys/dmu_tx.h> +#include <sys/arc.h> +#include <sys/zap.h> +#include <sys/zfeature.h> +#include <sys/spa.h> +#include <sys/dsl_bookmark.h> +#include <zfs_namecheck.h> + +static int +dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, + dsl_dataset_t **dsp, void *tag, char **shortnamep) +{ + char buf[MAXNAMELEN]; + char *hashp; + + if (strlen(fullname) >= MAXNAMELEN) + return (SET_ERROR(ENAMETOOLONG)); + hashp = strchr(fullname, '#'); + if (hashp == NULL) + return (SET_ERROR(EINVAL)); + + *shortnamep = hashp + 1; + if (zfs_component_namecheck(*shortnamep, NULL, NULL)) + return (SET_ERROR(EINVAL)); + (void) strlcpy(buf, fullname, hashp - fullname + 1); + return (dsl_dataset_hold(dp, buf, tag, dsp)); +} + +/* + * Returns ESRCH if bookmark is not found. + */ +static int +dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname, + zfs_bookmark_phys_t *bmark_phys) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t bmark_zapobj = ds->ds_bookmarks; + matchtype_t mt; + int err; + + if (bmark_zapobj == 0) + return (SET_ERROR(ESRCH)); + + if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + mt = MT_FIRST; + else + mt = MT_EXACT; + + err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t), + sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt, + NULL, 0, NULL); + + return (err == ENOENT ? ESRCH : err); +} + +/* + * If later_ds is non-NULL, this will return EXDEV if the the specified bookmark + * does not represents an earlier point in later_ds's timeline. + * + * Returns ENOENT if the dataset containing the bookmark does not exist. + * Returns ESRCH if the dataset exists but the bookmark was not found in it. + */ +int +dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname, + dsl_dataset_t *later_ds, zfs_bookmark_phys_t *bmp) +{ + char *shortname; + dsl_dataset_t *ds; + int error; + + error = dsl_bookmark_hold_ds(dp, fullname, &ds, FTAG, &shortname); + if (error != 0) + return (error); + + error = dsl_dataset_bmark_lookup(ds, shortname, bmp); + if (error == 0 && later_ds != NULL) { + if (!dsl_dataset_is_before(later_ds, ds, bmp->zbm_creation_txg)) + error = SET_ERROR(EXDEV); + } + dsl_dataset_rele(ds, FTAG); + return (error); +} + +typedef struct dsl_bookmark_create_arg { + nvlist_t *dbca_bmarks; + nvlist_t *dbca_errors; +} dsl_bookmark_create_arg_t; + +static int +dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name, + dmu_tx_t *tx) +{ + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *bmark_fs; + char *shortname; + int error; + zfs_bookmark_phys_t bmark_phys; + + if (!dsl_dataset_is_snapshot(snapds)) + return (SET_ERROR(EINVAL)); + + error = dsl_bookmark_hold_ds(dp, bookmark_name, + &bmark_fs, FTAG, &shortname); + if (error != 0) + return (error); + + if (!dsl_dataset_is_before(bmark_fs, snapds, 0)) { + dsl_dataset_rele(bmark_fs, FTAG); + return (SET_ERROR(EINVAL)); + } + + error = dsl_dataset_bmark_lookup(bmark_fs, shortname, + &bmark_phys); + dsl_dataset_rele(bmark_fs, FTAG); + if (error == 0) + return (SET_ERROR(EEXIST)); + if (error == ESRCH) + return (0); + return (error); +} + +static int +dsl_bookmark_create_check(void *arg, dmu_tx_t *tx) +{ + dsl_bookmark_create_arg_t *dbca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + int rv = 0; + nvpair_t *pair; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS)) + return (SET_ERROR(ENOTSUP)); + + for (pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL); + pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) { + dsl_dataset_t *snapds; + int error; + + /* note: validity of nvlist checked by ioctl layer */ + error = dsl_dataset_hold(dp, fnvpair_value_string(pair), + FTAG, &snapds); + if (error == 0) { + error = dsl_bookmark_create_check_impl(snapds, + nvpair_name(pair), tx); + dsl_dataset_rele(snapds, FTAG); + } + if (error != 0) { + fnvlist_add_int32(dbca->dbca_errors, + nvpair_name(pair), error); + rv = error; + } + } + + return (rv); +} + +static void +dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx) +{ + dsl_bookmark_create_arg_t *dbca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + nvpair_t *pair; + + ASSERT(spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS)); + + for (pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL); + pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) { + dsl_dataset_t *snapds, *bmark_fs; + zfs_bookmark_phys_t bmark_phys; + char *shortname; + + VERIFY0(dsl_dataset_hold(dp, fnvpair_value_string(pair), + FTAG, &snapds)); + VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair), + &bmark_fs, FTAG, &shortname)); + if (bmark_fs->ds_bookmarks == 0) { + bmark_fs->ds_bookmarks = + zap_create_norm(mos, U8_TEXTPREP_TOUPPER, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); + + dsl_dataset_zapify(bmark_fs, tx); + VERIFY0(zap_add(mos, bmark_fs->ds_object, + DS_FIELD_BOOKMARK_NAMES, + sizeof (bmark_fs->ds_bookmarks), 1, + &bmark_fs->ds_bookmarks, tx)); + } + + bmark_phys.zbm_guid = snapds->ds_phys->ds_guid; + bmark_phys.zbm_creation_txg = snapds->ds_phys->ds_creation_txg; + bmark_phys.zbm_creation_time = + snapds->ds_phys->ds_creation_time; + + VERIFY0(zap_add(mos, bmark_fs->ds_bookmarks, + shortname, sizeof (uint64_t), + sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t), + &bmark_phys, tx)); + + spa_history_log_internal_ds(bmark_fs, "bookmark", tx, + "name=%s creation_txg=%llu target_snap=%llu", + shortname, + (longlong_t)bmark_phys.zbm_creation_txg, + (longlong_t)snapds->ds_object); + + dsl_dataset_rele(bmark_fs, FTAG); + dsl_dataset_rele(snapds, FTAG); + } +} + +/* + * The bookmarks must all be in the same pool. + */ +int +dsl_bookmark_create(nvlist_t *bmarks, nvlist_t *errors) +{ + nvpair_t *pair; + dsl_bookmark_create_arg_t dbca; + + pair = nvlist_next_nvpair(bmarks, NULL); + if (pair == NULL) + return (0); + + dbca.dbca_bmarks = bmarks; + dbca.dbca_errors = errors; + + return (dsl_sync_task(nvpair_name(pair), dsl_bookmark_create_check, + dsl_bookmark_create_sync, &dbca, fnvlist_num_pairs(bmarks))); +} + +int +dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl) +{ + int err = 0; + zap_cursor_t zc; + zap_attribute_t attr; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + uint64_t bmark_zapobj = ds->ds_bookmarks; + if (bmark_zapobj == 0) + return (0); + + for (zap_cursor_init(&zc, dp->dp_meta_objset, bmark_zapobj); + zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_advance(&zc)) { + nvlist_t *out_props; + char *bmark_name = attr.za_name; + zfs_bookmark_phys_t bmark_phys; + + err = dsl_dataset_bmark_lookup(ds, bmark_name, &bmark_phys); + ASSERT3U(err, !=, ENOENT); + if (err != 0) + break; + + out_props = fnvlist_alloc(); + if (nvlist_exists(props, + zfs_prop_to_name(ZFS_PROP_GUID))) { + dsl_prop_nvlist_add_uint64(out_props, + ZFS_PROP_GUID, bmark_phys.zbm_guid); + } + if (nvlist_exists(props, + zfs_prop_to_name(ZFS_PROP_CREATETXG))) { + dsl_prop_nvlist_add_uint64(out_props, + ZFS_PROP_CREATETXG, bmark_phys.zbm_creation_txg); + } + if (nvlist_exists(props, + zfs_prop_to_name(ZFS_PROP_CREATION))) { + dsl_prop_nvlist_add_uint64(out_props, + ZFS_PROP_CREATION, bmark_phys.zbm_creation_time); + } + + fnvlist_add_nvlist(outnvl, bmark_name, out_props); + fnvlist_free(out_props); + } + zap_cursor_fini(&zc); + return (err); +} + +/* + * Retrieve the bookmarks that exist in the specified dataset, and the + * requested properties of each bookmark. + * + * The "props" nvlist specifies which properties are requested. + * See lzc_get_bookmarks() for the list of valid properties. + */ +int +dsl_get_bookmarks(const char *dsname, nvlist_t *props, nvlist_t *outnvl) +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + int err; + + err = dsl_pool_hold(dsname, FTAG, &dp); + if (err != 0) + return (err); + err = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + err = dsl_get_bookmarks_impl(ds, props, outnvl); + + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (err); +} + +typedef struct dsl_bookmark_destroy_arg { + nvlist_t *dbda_bmarks; + nvlist_t *dbda_success; + nvlist_t *dbda_errors; +} dsl_bookmark_destroy_arg_t; + +static int +dsl_dataset_bookmark_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t bmark_zapobj = ds->ds_bookmarks; + matchtype_t mt; + + if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + mt = MT_FIRST; + else + mt = MT_EXACT; + + return (zap_remove_norm(mos, bmark_zapobj, name, mt, tx)); +} + +static int +dsl_bookmark_destroy_check(void *arg, dmu_tx_t *tx) +{ + dsl_bookmark_destroy_arg_t *dbda = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + int rv = 0; + nvpair_t *pair; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS)) + return (0); + + for (pair = nvlist_next_nvpair(dbda->dbda_bmarks, NULL); + pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_bmarks, pair)) { + const char *fullname = nvpair_name(pair); + dsl_dataset_t *ds; + zfs_bookmark_phys_t bm; + int error; + char *shortname; + + error = dsl_bookmark_hold_ds(dp, fullname, &ds, + FTAG, &shortname); + if (error == ENOENT) { + /* ignore it; the bookmark is "already destroyed" */ + continue; + } + if (error == 0) { + error = dsl_dataset_bmark_lookup(ds, shortname, &bm); + dsl_dataset_rele(ds, FTAG); + if (error == ESRCH) { + /* + * ignore it; the bookmark is + * "already destroyed" + */ + continue; + } + } + if (error == 0) { + fnvlist_add_boolean(dbda->dbda_success, fullname); + } else { + fnvlist_add_int32(dbda->dbda_errors, fullname, error); + rv = error; + } + } + return (rv); +} + +static void +dsl_bookmark_destroy_sync(void *arg, dmu_tx_t *tx) +{ + dsl_bookmark_destroy_arg_t *dbda = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(dbda->dbda_success, NULL); + pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_success, pair)) { + dsl_dataset_t *ds; + char *shortname; + uint64_t zap_cnt; + + VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair), + &ds, FTAG, &shortname)); + VERIFY0(dsl_dataset_bookmark_remove(ds, shortname, tx)); + + /* + * If all of this dataset's bookmarks have been destroyed, + * free the zap object and decrement the feature's use count. + */ + VERIFY0(zap_count(mos, ds->ds_bookmarks, + &zap_cnt)); + if (zap_cnt == 0) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx)); + ds->ds_bookmarks = 0; + spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); + VERIFY0(zap_remove(mos, ds->ds_object, + DS_FIELD_BOOKMARK_NAMES, tx)); + } + + spa_history_log_internal_ds(ds, "remove bookmark", tx, + "name=%s", shortname); + + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * The bookmarks must all be in the same pool. + */ +int +dsl_bookmark_destroy(nvlist_t *bmarks, nvlist_t *errors) +{ + int rv; + dsl_bookmark_destroy_arg_t dbda; + nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL); + if (pair == NULL) + return (0); + + dbda.dbda_bmarks = bmarks; + dbda.dbda_errors = errors; + dbda.dbda_success = fnvlist_alloc(); + + rv = dsl_sync_task(nvpair_name(pair), dsl_bookmark_destroy_check, + dsl_bookmark_destroy_sync, &dbda, fnvlist_num_pairs(bmarks)); + fnvlist_free(dbda.dbda_success); + return (rv); +} diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index cae56534d..2fe6b858d 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -48,6 +48,7 @@ #include <sys/dsl_deadlist.h> #include <sys/dsl_destroy.h> #include <sys/dsl_userhold.h> +#include <sys/dsl_bookmark.h> #define SWITCH64(x, y) \ { \ @@ -404,6 +405,14 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev); } + if (doi.doi_type == DMU_OTN_ZAP_METADATA) { + int zaperr = zap_lookup(mos, ds->ds_object, + DS_FIELD_BOOKMARK_NAMES, + sizeof (ds->ds_bookmarks), 1, + &ds->ds_bookmarks); + if (zaperr != ENOENT) + VERIFY0(zaperr); + } } else { if (zfs_flags & ZFS_DEBUG_SNAPNAMES) err = dsl_dataset_get_snapname(ds); @@ -1734,6 +1743,8 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) dsl_dataset_t *ds; int64_t unused_refres_delta; int error; + nvpair_t *pair; + nvlist_t *proprequest, *bookmarks; error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); if (error != 0) @@ -1751,6 +1762,28 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EINVAL)); } + /* must not have any bookmarks after the most recent snapshot */ + proprequest = fnvlist_alloc(); + fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); + bookmarks = fnvlist_alloc(); + error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); + fnvlist_free(proprequest); + if (error != 0) + return (error); + for (pair = nvlist_next_nvpair(bookmarks, NULL); + pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { + nvlist_t *valuenv = + fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), + zfs_prop_to_name(ZFS_PROP_CREATETXG)); + uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); + if (createtxg > ds->ds_phys->ds_prev_snap_txg) { + fnvlist_free(bookmarks); + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EEXIST)); + } + } + fnvlist_free(bookmarks); + error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); if (error != 0) { dsl_dataset_rele(ds, FTAG); @@ -2972,9 +3005,12 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, * 'earlier' is before 'later'. Or 'earlier' could be the origin of * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's * filesystem. Or 'earlier' could be the origin's origin. + * + * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. */ boolean_t -dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, + uint64_t earlier_txg) { dsl_pool_t *dp = later->ds_dir->dd_pool; int error; @@ -2982,9 +3018,13 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) dsl_dataset_t *origin; ASSERT(dsl_pool_config_held(dp)); + ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); + + if (earlier_txg == 0) + earlier_txg = earlier->ds_phys->ds_creation_txg; - if (earlier->ds_phys->ds_creation_txg >= - later->ds_phys->ds_creation_txg) + if (dsl_dataset_is_snapshot(later) && + earlier_txg >= later->ds_phys->ds_creation_txg) return (B_FALSE); if (later->ds_dir == earlier->ds_dir) @@ -2998,7 +3038,7 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); if (error != 0) return (B_FALSE); - ret = dsl_dataset_is_before(origin, earlier); + ret = dsl_dataset_is_before(origin, earlier, earlier_txg); dsl_dataset_rele(origin, FTAG); return (ret); } diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index cbe89739f..113b7261b 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -816,6 +816,12 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx)); + if (ds->ds_bookmarks != 0) { + VERIFY0(zap_destroy(mos, + ds->ds_bookmarks, tx)); + spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); + } + spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); ASSERT0(ds->ds_phys->ds_next_clones_obj); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 88f9e34e3..02ccb13a2 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -428,7 +428,7 @@ spa_lookup(const char *name) * If it's a full dataset name, figure out the pool name and * just use that. */ - cp = strpbrk(search.spa_name, "/@"); + cp = strpbrk(search.spa_name, "/@#"); if (cp != NULL) *cp = '\0'; diff --git a/module/zfs/zfeature_common.c b/module/zfs/zfeature_common.c index 35e662ee2..2ea220245 100644 --- a/module/zfs/zfeature_common.c +++ b/module/zfs/zfeature_common.c @@ -187,8 +187,10 @@ zpool_feature_init(void) B_FALSE, NULL); { - static spa_feature_t hole_birth_deps[] = { SPA_FEATURE_ENABLED_TXG, - SPA_FEATURE_NONE }; + static const spa_feature_t hole_birth_deps[] = { + SPA_FEATURE_ENABLED_TXG, + SPA_FEATURE_NONE + }; zfeature_register(SPA_FEATURE_HOLE_BIRTH, "com.delphix:hole_birth", "hole_birth", "Retain hole birth txg for more precise zfs send", @@ -199,4 +201,16 @@ zpool_feature_init(void) "com.delphix:extensible_dataset", "extensible_dataset", "Enhanced dataset functionality, used by other features.", B_FALSE, B_FALSE, B_FALSE, NULL); + + { + static const spa_feature_t bookmarks_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + + zfeature_register(SPA_FEATURE_BOOKMARKS, + "com.delphix:bookmarks", "bookmarks", + "\"zfs bookmark\" command", + B_TRUE, B_FALSE, B_FALSE, bookmarks_deps); + } } diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 96520545a..59405de82 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -354,7 +354,7 @@ zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname) { objset_t *os = ITOZSB(ip)->z_os; - if (snapshot_namecheck(name, NULL, NULL) != 0) + if (zfs_component_namecheck(name, NULL, NULL) != 0) return (SET_ERROR(EILSEQ)); dmu_objset_name(os, zname); @@ -630,7 +630,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); - if (snapshot_namecheck(dirname, NULL, NULL) != 0) { + if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { error = SET_ERROR(EILSEQ); goto out; } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 453571598..409d2c737 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -180,6 +180,7 @@ #include <sys/dmu_send.h> #include <sys/dsl_destroy.h> +#include <sys/dsl_bookmark.h> #include <sys/dsl_userhold.h> #include <sys/zfeature.h> @@ -812,22 +813,9 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (SET_ERROR(EINVAL)); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nextpair) { - dsl_pool_t *dp; - dsl_dataset_t *ds; - - error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp); - if (error != 0) - break; nextpair = nvlist_next_nvpair(snaps, pair); - error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds); - if (error == 0) - dsl_dataset_rele(ds, FTAG); - dsl_pool_rele(dp, FTAG); - - if (error == 0) { - error = zfs_secpolicy_destroy_perms(nvpair_name(pair), - cr); - } else if (error == ENOENT) { + error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); + if (error == ENOENT) { /* * Ignore any snapshots that don't exist (we consider * them "already destroyed"). Remove the name from the @@ -986,6 +974,76 @@ zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (error); } +/* + * Check for permission to create each snapshot in the nvlist. + */ +/* ARGSUSED */ +static int +zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + int error = 0; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(innvl, NULL); + pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) { + char *name = nvpair_name(pair); + char *hashp = strchr(name, '#'); + + if (hashp == NULL) { + error = SET_ERROR(EINVAL); + break; + } + *hashp = '\0'; + error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_BOOKMARK, cr); + *hashp = '#'; + if (error != 0) + break; + } + return (error); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + nvpair_t *pair, *nextpair; + int error = 0; + + for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL; + pair = nextpair) { + char *name = nvpair_name(pair); + char *hashp = strchr(name, '#'); + nextpair = nvlist_next_nvpair(innvl, pair); + + if (hashp == NULL) { + error = SET_ERROR(EINVAL); + break; + } + + *hashp = '\0'; + error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_DESTROY, cr); + *hashp = '#'; + if (error == ENOENT) { + /* + * Ignore any filesystems that don't exist (we consider + * their bookmarks "already destroyed"). Remove + * the name from the nvl here in case the filesystem + * is created between now and when we try to destroy + * the bookmark (in which case we don't want to + * destroy it since we haven't checked for permission). + */ + fnvlist_remove_nvpair(innvl, pair); + error = 0; + } + if (error != 0) + break; + } + + return (error); +} + /* ARGSUSED */ static int zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) @@ -2551,7 +2609,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl) while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); - char *valstr; if (!zfs_prop_user(propname) || nvpair_type(pair) != DATA_TYPE_STRING) @@ -2564,8 +2621,7 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl) if (strlen(propname) >= ZAP_MAXNAMELEN) return (SET_ERROR(ENAMETOOLONG)); - VERIFY(nvpair_value_string(pair, &valstr) == 0); - if (strlen(valstr) >= ZAP_MAXVALUELEN) + if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN) return (SET_ERROR(E2BIG)); } return (0); @@ -3242,7 +3298,8 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) * The snap name must contain an @, and the part after it must * contain only valid characters. */ - if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) + if (cp == NULL || + zfs_component_namecheck(cp + 1, NULL, NULL) != 0) return (SET_ERROR(EINVAL)); /* @@ -3396,10 +3453,10 @@ zfs_destroy_unmount_origin(const char *fsname) * * outnvl: snapshot -> error code (int32) */ +/* ARGSUSED */ static int zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - int error, poollen; nvlist_t *snaps; nvpair_t *pair; boolean_t defer; @@ -3408,25 +3465,110 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) return (SET_ERROR(EINVAL)); defer = nvlist_exists(innvl, "defer"); - poollen = strlen(poolname); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { + (void) zfs_unmount_snap(nvpair_name(pair)); + (void) zvol_remove_minor(nvpair_name(pair)); + } + + return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl)); +} + +/* + * Create bookmarks. Bookmark names are of the form <fs>#<bmark>. + * All bookmarks must be in the same pool. + * + * innvl: { + * bookmark1 -> snapshot1, bookmark2 -> snapshot2 + * } + * + * outnvl: bookmark -> error code (int32) + * + */ +/* ARGSUSED */ +static int +zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + nvpair_t *pair, *pair2; + + for (pair = nvlist_next_nvpair(innvl, NULL); + pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) { + char *snap_name; + + /* + * Verify the snapshot argument. + */ + if (nvpair_value_string(pair, &snap_name) != 0) + return (SET_ERROR(EINVAL)); + + + /* Verify that the keys (bookmarks) are unique */ + for (pair2 = nvlist_next_nvpair(innvl, pair); + pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) { + if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0) + return (SET_ERROR(EINVAL)); + } + } + + return (dsl_bookmark_create(innvl, outnvl)); +} + +/* + * innvl: { + * property 1, property 2, ... + * } + * + * outnvl: { + * bookmark name 1 -> { property 1, property 2, ... }, + * bookmark name 2 -> { property 1, property 2, ... } + * } + * + */ +static int +zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + return (dsl_get_bookmarks(fsname, innvl, outnvl)); +} + +/* + * innvl: { + * bookmark name 1, bookmark name 2 + * } + * + * outnvl: bookmark -> error code (int32) + * + */ +static int +zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl, + nvlist_t *outnvl) +{ + int error, poollen; + nvpair_t *pair; + + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(innvl, NULL); + pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) { const char *name = nvpair_name(pair); + const char *cp = strchr(name, '#'); /* - * The snap must be in the specified pool. + * The bookmark name must contain an #, and the part after it + * must contain only valid characters. + */ + if (cp == NULL || + zfs_component_namecheck(cp + 1, NULL, NULL) != 0) + return (SET_ERROR(EINVAL)); + + /* + * The bookmark must be in the specified pool. */ if (strncmp(name, poolname, poollen) != 0 || - (name[poollen] != '/' && name[poollen] != '@')) + (name[poollen] != '/' && name[poollen] != '#')) return (SET_ERROR(EXDEV)); - - error = zfs_unmount_snap(name); - if (error != 0) - return (error); - (void) zvol_remove_minor(name); } - return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl)); + error = dsl_bookmark_destroy(innvl, outnvl); + return (error); } /* @@ -4097,7 +4239,8 @@ out: * zc_guid if set, estimate size of stream only. zc_cookie is ignored. * output size in zc_objset_type. * - * outputs: none + * outputs: + * zc_objset_type estimated size, if zc_guid is set */ static int zfs_ioc_send(zfs_cmd_t *zc) @@ -5272,6 +5415,19 @@ zfs_ioctl_init(void) zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE); + zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK, + zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS, + zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS, + zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks, + POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, |