diff options
author | Paul Dagnelie <[email protected]> | 2019-06-19 09:48:13 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-06-19 09:48:12 -0700 |
commit | 30af21b02569ac192f52ce6e6511015f8a8d5729 (patch) | |
tree | e5f1091c2d3a6e511bbd2414782e490c18e0f59c /lib | |
parent | c1b5801bb5af0055e5f3d263beaa07026103e212 (diff) |
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <[email protected]>
Reviewed-by: Matt Ahrens <[email protected]>
Reviewed-by: Prashanth Sreenivasa <[email protected]>
Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: George Wilson <[email protected]>
Reviewed-by: Chris Williamson <[email protected]>
Reviewed-by: Pavel Zhakarov <[email protected]>
Reviewed-by: Sebastien Roy <[email protected]>
Reviewed-by: Prakash Surya <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Paul Dagnelie <[email protected]>
Closes #7958
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libzfs/libzfs_dataset.c | 55 | ||||
-rw-r--r-- | lib/libzfs/libzfs_iter.c | 12 | ||||
-rw-r--r-- | lib/libzfs/libzfs_mount.c | 20 | ||||
-rw-r--r-- | lib/libzfs/libzfs_sendrecv.c | 909 | ||||
-rw-r--r-- | lib/libzfs/libzfs_util.c | 1 | ||||
-rw-r--r-- | lib/libzfs_core/libzfs_core.c | 128 | ||||
-rw-r--r-- | lib/libzpool/Makefile.am | 2 | ||||
-rw-r--r-- | lib/libzpool/kernel.c | 1 |
8 files changed, 949 insertions, 179 deletions
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 3be205f1f..939d4ea36 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>. * Copyright (c) 2013 Martin Matuska. All rights reserved. @@ -597,7 +597,6 @@ zfs_bookmark_exists(const char *path) int err; boolean_t rv; - (void) strlcpy(fsname, path, sizeof (fsname)); pound = strchr(fsname, '#'); if (pound == NULL) @@ -2408,6 +2407,10 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, *val = zhp->zfs_dmustats.dds_inconsistent; break; + case ZFS_PROP_REDACTED: + *val = zhp->zfs_dmustats.dds_redacted; + break; + default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2620,6 +2623,37 @@ zfs_get_clones_nvl(zfs_handle_t *zhp) return (value); } +static int +get_rsnaps_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) +{ + nvlist_t *value; + uint64_t *snaps; + uint_t nsnaps; + + if (nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &value) != 0) + return (-1); + if (nvlist_lookup_uint64_array(value, ZPROP_VALUE, &snaps, + &nsnaps) != 0) + return (-1); + if (nsnaps == 0) { + /* There's no redaction snapshots; pass a special value back */ + (void) snprintf(propbuf, proplen, "none"); + return (0); + } + propbuf[0] = '\0'; + for (int i = 0; i < nsnaps; i++) { + char buf[128]; + if (propbuf[0] != '\0') + (void) strlcat(propbuf, ",", proplen); + (void) snprintf(buf, sizeof (buf), "%llu", + (u_longlong_t)snaps[i]); + (void) strlcat(propbuf, buf, proplen); + } + + return (0); +} + /* * Accepts a property and value and checks that the value * matches the one found by the channel program. If they are @@ -2814,6 +2848,11 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, zcp_check(zhp, prop, 0, str); break; + case ZFS_PROP_REDACT_SNAPS: + if (get_rsnaps_string(zhp, propbuf, proplen) != 0) + return (-1); + break; + case ZFS_PROP_CLONES: if (get_clones_string(zhp, propbuf, proplen) != 0) return (-1); @@ -3333,6 +3372,9 @@ zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, return (0); } +/* + * propname must start with "written@" or "written#". + */ int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue) @@ -3343,8 +3385,10 @@ zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - snapname = strchr(propname, '@') + 1; - if (strchr(snapname, '@')) { + assert(zfs_prop_written(propname)); + snapname = propname + strlen("written@"); + if (strchr(snapname, '@') != NULL || strchr(snapname, '#') != NULL) { + /* full snapshot or bookmark name specified */ (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); } else { /* snapname is the short name, append it to zhp's fsname */ @@ -3355,8 +3399,7 @@ zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, cp = strchr(zc.zc_value, '@'); if (cp != NULL) *cp = '\0'; - (void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value)); - (void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value)); + (void) strlcat(zc.zc_value, snapname - 1, sizeof (zc.zc_value)); } err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc); diff --git a/lib/libzfs/libzfs_iter.c b/lib/libzfs/libzfs_iter.c index 5e9a1ecae..d3ba58a9e 100644 --- a/lib/libzfs/libzfs_iter.c +++ b/lib/libzfs/libzfs_iter.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. + * Copyright (c) 2013, 2019 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2019 Datto Inc. */ @@ -212,10 +212,12 @@ zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data) /* Setup the requested properties nvlist. */ props = fnvlist_alloc(); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_GUID)); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATETXG)); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATION)); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_IVSET_GUID)); + for (zfs_prop_t p = 0; p < ZFS_NUM_PROPS; p++) { + if (zfs_prop_valid_for_type(p, ZFS_TYPE_BOOKMARK, B_FALSE)) { + fnvlist_add_boolean(props, zfs_prop_to_name(p)); + } + } + fnvlist_add_boolean(props, "redact_complete"); if ((err = lzc_get_bookmarks(zhp->zfs_name, props, &bmarks)) != 0) goto out; diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 649c232aa..39ca2be05 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -22,7 +22,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + * Copyright (c) 2014, 2019 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov <[email protected]> * Copyright 2017 RackTop Systems. * Copyright (c) 2018 Datto Inc. @@ -306,7 +306,7 @@ zfs_is_mounted(zfs_handle_t *zhp, char **where) */ static boolean_t zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen, - zprop_source_t *source) + zprop_source_t *source, int flags) { char sourceloc[MAXNAMELEN]; zprop_source_t sourcetype; @@ -329,6 +329,13 @@ zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen, getzoneid() == GLOBAL_ZONEID) return (B_FALSE); + if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && + getzoneid() == GLOBAL_ZONEID) + return (B_FALSE); + + if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE)) + return (B_FALSE); + if (source) *source = sourcetype; @@ -495,8 +502,10 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL)) (void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts)); - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, + flags)) { return (0); + } /* * Append default mount options which apply to the mount point. @@ -868,7 +877,7 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) zprop_source_t sourcetype; int ret; - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0)) return (0); for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) { @@ -1135,8 +1144,7 @@ remove_mountpoint(zfs_handle_t *zhp) char mountpoint[ZFS_MAXPROPLEN]; zprop_source_t source; - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), - &source)) + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), &source, 0)) return; if (source == ZPROP_SRC_DEFAULT || diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index f69a46430..313da7ef8 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>. * All rights reserved @@ -74,6 +74,9 @@ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *, recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *, const char *, nvlist_t *); +static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent, + uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids, + uint64_t num_redact_snaps, char *name); static int guid_to_name(libzfs_handle_t *, const char *, uint64_t, boolean_t, char *); @@ -89,6 +92,8 @@ typedef struct progress_arg { zfs_handle_t *pa_zhp; int pa_fd; boolean_t pa_parsable; + boolean_t pa_estimate; + int pa_verbosity; } progress_arg_t; typedef struct dataref { @@ -886,7 +891,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) "exist\n"), sd->fsname, sd->tosnap, sd->recursive ? dgettext(TEXT_DOMAIN, " recursively") : "", zhp->zfs_name, sd->tosnap); - rv = -1; + rv = EZFS_NOENT; } goto out; } @@ -1064,7 +1069,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAX_DATASET_NAME_LEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; + boolean_t dryrun, parsable, progress, embed_data, std_out; boolean_t large_block, compress, raw, holds; int outfd; boolean_t err; @@ -1076,6 +1081,7 @@ typedef struct send_dump_data { nvlist_t *debugnv; char holdtag[ZFS_MAX_DATASET_NAME_LEN]; int cleanup_fd; + int verbosity; uint64_t size; } send_dump_data_t; @@ -1234,42 +1240,75 @@ gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); } +int +zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written, + uint64_t *blocks_visited) +{ + zfs_cmd_t zc = { {0} }; + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_cookie = fd; + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0) + return (errno); + if (bytes_written != NULL) + *bytes_written = zc.zc_cookie; + if (blocks_visited != NULL) + *blocks_visited = zc.zc_objset_type; + return (0); +} + static void * send_progress_thread(void *arg) { progress_arg_t *pa = arg; - zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp = pa->pa_zhp; - libzfs_handle_t *hdl = zhp->zfs_hdl; - unsigned long long bytes; + uint64_t bytes; + uint64_t blocks; char buf[16]; time_t t; struct tm *tm; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - if (!pa->pa_parsable) - (void) fprintf(stderr, "TIME SENT SNAPSHOT %s\n", - zhp->zfs_name); + boolean_t firstloop = B_TRUE; /* * Print the progress from ZFS_IOC_SEND_PROGRESS every second. */ for (;;) { + int err; (void) sleep(1); + if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes, + &blocks)) != 0) { + if (err == EINTR || err == ENOENT) + return ((void *)0); + return ((void *)(uintptr_t)err); + } - zc.zc_cookie = pa->pa_fd; - if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0) - return ((void *)-1); + if (firstloop && !pa->pa_parsable) { + (void) fprintf(stderr, + "TIME %s %sSNAPSHOT %s\n", + pa->pa_estimate ? "BYTES" : " SENT", + pa->pa_verbosity >= 2 ? " BLOCKS " : "", + zhp->zfs_name); + firstloop = B_FALSE; + } (void) time(&t); tm = localtime(&t); - bytes = zc.zc_cookie; - if (pa->pa_parsable) { + if (pa->pa_verbosity >= 2 && pa->pa_parsable) { + (void) fprintf(stderr, + "%02d:%02d:%02d\t%llu\t%llu\t%s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + (u_longlong_t)bytes, (u_longlong_t)blocks, + zhp->zfs_name); + } else if (pa->pa_verbosity >= 2) { + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, + "%02d:%02d:%02d %5s %8llu %s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + buf, (u_longlong_t)blocks, zhp->zfs_name); + } else if (pa->pa_parsable) { (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", tm->tm_hour, tm->tm_min, tm->tm_sec, - bytes, zhp->zfs_name); + (u_longlong_t)bytes, zhp->zfs_name); } else { zfs_nicebytes(bytes, buf, sizeof (buf)); (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", @@ -1411,7 +1450,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); - if (sdd->verbose) { + if (sdd->verbosity != 0) { uint64_t size = 0; char fromds[ZFS_MAX_DATASET_NAME_LEN]; @@ -1440,6 +1479,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) pa.pa_zhp = zhp; pa.pa_fd = sdd->outfd; pa.pa_parsable = sdd->parsable; + pa.pa_estimate = B_FALSE; + pa.pa_verbosity = sdd->verbosity; if ((err = pthread_create(&tid, NULL, send_progress_thread, &pa)) != 0) { @@ -1452,8 +1493,18 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) fromorigin, sdd->outfd, flags, sdd->debugnv); if (sdd->progress) { + void *status = NULL; (void) pthread_cancel(tid); - (void) pthread_join(tid, NULL); + (void) pthread_join(tid, &status); + int error = (int)(uintptr_t)status; + if (error != 0 && status != PTHREAD_CANCELED) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "progress thread exited nonzero")); + return (zfs_standard_error(zhp->zfs_hdl, error, + errbuf)); + } } } @@ -1752,6 +1803,183 @@ zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token) } return (nv); } +static enum lzc_send_flags +lzc_flags_from_sendflags(const sendflags_t *flags) +{ + enum lzc_send_flags lzc_flags = 0; + if (flags->largeblock) + lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; + if (flags->embed_data) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + if (flags->compress) + lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags->raw) + lzc_flags |= LZC_SEND_FLAG_RAW; + return (lzc_flags); +} + +static int +estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, + uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes, + const char *redactbook, char *errbuf) +{ + uint64_t size; + FILE *fout = flags->dryrun ? stdout : stderr; + progress_arg_t pa = { 0 }; + int err = 0; + pthread_t ptid; + + if (flags->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = fd; + pa.pa_parsable = flags->parsable; + pa.pa_estimate = B_TRUE; + pa.pa_verbosity = flags->verbosity; + + err = pthread_create(&ptid, NULL, + send_progress_thread, &pa); + if (err != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, + EZFS_THREADCREATEFAILED, errbuf)); + } + } + + err = lzc_send_space_resume_redacted(zhp->zfs_name, from, + lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes, + redactbook, fd, &size); + + if (flags->progress) { + void *status = NULL; + (void) pthread_cancel(ptid); + (void) pthread_join(ptid, &status); + int error = (int)(uintptr_t)status; + if (error != 0 && status != PTHREAD_CANCELED) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "progress thread exited " + "nonzero")); + return (zfs_standard_error(zhp->zfs_hdl, error, + errbuf)); + } + } + + if (err != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(err)); + return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, + errbuf)); + } + send_print_verbose(fout, zhp->zfs_name, from, size, + flags->parsable); + + if (flags->parsable) { + (void) fprintf(fout, "size\t%llu\n", (longlong_t)size); + } else { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + return (0); +} + +static boolean_t +redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid) +{ + for (int i = 0; i < num_snaps; i++) { + if (snaps[i] == guid) + return (B_TRUE); + } + return (B_FALSE); +} + +static boolean_t +redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1, + const uint64_t *snaps2, uint64_t num_snaps2) +{ + if (num_snaps1 != num_snaps2) + return (B_FALSE); + for (int i = 0; i < num_snaps1; i++) { + if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i])) + return (B_FALSE); + } + return (B_TRUE); +} + +/* + * Check that the list of redaction snapshots in the bookmark matches the send + * we're resuming, and return whether or not it's complete. + * + * Note that the caller needs to free the contents of *bookname with free() if + * this function returns successfully. + */ +static int +find_redact_book(libzfs_handle_t *hdl, const char *path, + const uint64_t *redact_snap_guids, int num_redact_snaps, + char **bookname) +{ + char errbuf[1024]; + int error = 0; + nvlist_t *props = fnvlist_alloc(); + nvlist_t *bmarks; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot resume send")); + + fnvlist_add_boolean(props, "redact_complete"); + fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS)); + error = lzc_get_bookmarks(path, props, &bmarks); + nvlist_free(props); + if (error != 0) { + if (error == ESRCH) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "nonexistent redaction bookmark provided")); + } else if (error == ENOENT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset to be sent no longer exists")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unknown error: %s"), strerror(error)); + } + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + nvpair_t *pair; + for (pair = nvlist_next_nvpair(bmarks, NULL); pair; + pair = nvlist_next_nvpair(bmarks, pair)) { + + nvlist_t *bmark = fnvpair_value_nvlist(pair); + nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, + zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS)); + uint_t len = 0; + uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist, + ZPROP_VALUE, &len); + if (redact_snaps_equal(redact_snap_guids, + num_redact_snaps, bmarksnaps, len)) { + break; + } + } + if (pair == NULL) { + fnvlist_free(bmarks); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no appropriate redaction bookmark exists")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + char *name = nvpair_name(pair); + nvlist_t *bmark = fnvpair_value_nvlist(pair); + nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete"); + boolean_t complete = fnvlist_lookup_boolean_value(vallist, + ZPROP_VALUE); + if (!complete) { + fnvlist_free(bmarks); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incomplete redaction bookmark provided")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + *bookname = strndup(name, ZFS_MAX_DATASET_NAME_LEN); + ASSERT3P(*bookname, !=, NULL); + fnvlist_free(bmarks); + return (0); +} int zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, @@ -1765,7 +1993,10 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, int error = 0; char name[ZFS_MAX_DATASET_NAME_LEN]; enum lzc_send_flags lzc_flags = 0; - FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr; + FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr; + uint64_t *redact_snap_guids = NULL; + int num_redact_snaps = 0; + char *redact_book = NULL; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot resume send")); @@ -1779,7 +2010,7 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, */ return (zfs_error(hdl, EZFS_FAULT, errbuf)); } - if (flags->verbose) { + if (flags->verbosity != 0) { (void) fprintf(fout, dgettext(TEXT_DOMAIN, "resume token contents:\n")); nvlist_print(fout, resume_nvl); @@ -1825,8 +2056,14 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, return (zfs_error(hdl, EZFS_BADPATH, errbuf)); } + if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps", + &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) { + num_redact_snaps = -1; + } + if (fromguid != 0) { - if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) { + if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE, + redact_snap_guids, num_redact_snaps, name) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incremental source %#llx no longer exists"), (longlong_t)fromguid); @@ -1835,14 +2072,39 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, fromname = name; } - if (flags->verbose) { - uint64_t size = 0; - error = lzc_send_space(zhp->zfs_name, fromname, - lzc_flags, &size); - if (error == 0) - size = MAX(0, (int64_t)(size - bytes)); - send_print_verbose(fout, zhp->zfs_name, fromname, - size, flags->parsable); + redact_snap_guids = NULL; + + if (nvlist_lookup_uint64_array(resume_nvl, + zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids, + (uint_t *)&num_redact_snaps) == 0) { + char path[ZFS_MAX_DATASET_NAME_LEN]; + + (void) strlcpy(path, toname, sizeof (path)); + char *at = strchr(path, '@'); + ASSERT3P(at, !=, NULL); + + *at = '\0'; + + if ((error = find_redact_book(hdl, path, redact_snap_guids, + num_redact_snaps, &redact_book)) != 0) { + return (error); + } + } + + if (flags->verbosity != 0) { + /* + * Some of these may have come from the resume token, set them + * here for size estimate purposes. + */ + sendflags_t tmpflags = *flags; + if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK) + tmpflags.largeblock = B_TRUE; + if (lzc_flags & LZC_SEND_FLAG_COMPRESS) + tmpflags.compress = B_TRUE; + if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA) + tmpflags.embed_data = B_TRUE; + error = estimate_size(zhp, fromname, outfd, &tmpflags, + resumeobj, resumeoff, bytes, redact_book, errbuf); } if (!flags->dryrun) { @@ -1856,21 +2118,36 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, pa.pa_zhp = zhp; pa.pa_fd = outfd; pa.pa_parsable = flags->parsable; + pa.pa_estimate = B_FALSE; + pa.pa_verbosity = flags->verbosity; error = pthread_create(&tid, NULL, send_progress_thread, &pa); if (error != 0) { + if (redact_book != NULL) + free(redact_book); zfs_close(zhp); return (error); } } - error = lzc_send_resume(zhp->zfs_name, fromname, outfd, - lzc_flags, resumeobj, resumeoff); + error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd, + lzc_flags, resumeobj, resumeoff, redact_book); + if (redact_book != NULL) + free(redact_book); if (flags->progress) { + void *status = NULL; (void) pthread_cancel(tid); - (void) pthread_join(tid, NULL); + (void) pthread_join(tid, &status); + int error = (int)(uintptr_t)status; + if (error != 0 && status != PTHREAD_CANCELED) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "progress thread exited nonzero")); + return (zfs_standard_error(hdl, error, errbuf)); + } } char errbuf[1024]; @@ -1886,6 +2163,12 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "source key must be loaded")); return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case ESRCH: + if (lzc_exists(zhp->zfs_name)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source could not be found")); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); case EXDEV: case ENOENT: @@ -1906,16 +2189,149 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, default: return (zfs_standard_error(hdl, errno, errbuf)); } + } else { + if (redact_book != NULL) + free(redact_book); } - zfs_close(zhp); return (error); } /* - * Generate a send stream for the dataset identified by the argument zhp. + * This function informs the target system that the recursive send is complete. + * The record is also expected in the case of a send -p. + */ +static int +send_conclusion_record(int fd, zio_cksum_t *zc) +{ + dmu_replay_record_t drr = { 0 }; + drr.drr_type = DRR_END; + if (zc != NULL) + drr.drr_u.drr_end.drr_checksum = *zc; + if (write(fd, &drr, sizeof (drr)) == -1) { + return (errno); + } + return (0); +} + +/* + * This function is responsible for sending the records that contain the + * necessary information for the target system's libzfs to be able to set the + * properties of the filesystem being received, or to be able to prepare for + * a recursive receive. + * + * The "zhp" argument is the handle of the snapshot we are sending + * (the "tosnap"). The "from" argument is the short snapshot name (the part + * after the @) of the incremental source. + */ +static int +send_prelim_records(zfs_handle_t *zhp, const char *from, int fd, + boolean_t gather_props, boolean_t recursive, boolean_t verbose, + boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t backup, + boolean_t holds, boolean_t props, boolean_t doall, + nvlist_t **fssp, avl_tree_t **fsavlp) +{ + int err = 0; + char *packbuf = NULL; + size_t buflen = 0; + zio_cksum_t zc = { {0} }; + int featureflags = 0; + /* name of filesystem/volume that contains snapshot we are sending */ + char tofs[ZFS_MAX_DATASET_NAME_LEN]; + /* short name of snap we are sending */ + char *tosnap = ""; + + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp, + ZFS_PROP_VERSION) >= ZPL_VERSION_SA) { + featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; + } + + if (holds) + featureflags |= DMU_BACKUP_FEATURE_HOLDS; + + (void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN); + char *at = strchr(tofs, '@'); + if (at != NULL) { + *at = '\0'; + tosnap = at + 1; + } + + if (gather_props) { + nvlist_t *hdrnv = fnvlist_alloc(); + nvlist_t *fss = NULL; + + if (from != NULL) + fnvlist_add_string(hdrnv, "fromsnap", from); + fnvlist_add_string(hdrnv, "tosnap", tosnap); + if (!recursive) + fnvlist_add_boolean(hdrnv, "not_recursive"); + + if (raw) { + VERIFY0(nvlist_add_boolean(hdrnv, "raw")); + } + + if ((err = gather_nvlist(zhp->zfs_hdl, tofs, + from, tosnap, recursive, raw, doall, replicate, verbose, + backup, holds, props, &fss, fsavlp)) != 0) { + return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, + errbuf)); + } + fnvlist_add_nvlist(hdrnv, "fss", fss); + VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR, + 0)); + if (fssp != NULL) { + *fssp = fss; + } else { + nvlist_free(fss); + } + nvlist_free(hdrnv); + } + + if (!dryrun) { + dmu_replay_record_t drr = { 0 }; + /* write first begin record */ + drr.drr_type = DRR_BEGIN; + drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; + DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin. + drr_versioninfo, DMU_COMPOUNDSTREAM); + DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin. + drr_versioninfo, featureflags); + if (snprintf(drr.drr_u.drr_begin.drr_toname, + sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs, + tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) { + return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, + errbuf)); + } + drr.drr_payloadlen = buflen; + + err = dump_record(&drr, packbuf, buflen, &zc, fd); + free(packbuf); + if (err != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(err)); + return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, + errbuf)); + } + err = send_conclusion_record(fd, &zc); + if (err != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(err)); + return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP, + errbuf)); + } + } + return (0); +} + +/* + * Generate a send stream. The "zhp" argument is the filesystem/volume + * that contains the snapshot to send. The "fromsnap" argument is the + * short name (the part after the '@') of the snapshot that is the + * incremental source to send from (if non-NULL). The "tosnap" argument + * is the short name of the snapshot to send. * * The content of the send stream is the snapshot identified by * 'tosnap'. Incremental streams are requested in two ways: @@ -1997,83 +2413,23 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, if (flags->replicate || flags->doall || flags->props || flags->holds || flags->backup) { - dmu_replay_record_t drr = { 0 }; - char *packbuf = NULL; - size_t buflen = 0; - zio_cksum_t zc; - - ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); - - if (flags->replicate || flags->props || flags->backup || - flags->holds) { - nvlist_t *hdrnv; - - VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); - if (fromsnap) { - VERIFY(0 == nvlist_add_string(hdrnv, - "fromsnap", fromsnap)); - } - VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); - if (!flags->replicate) { - VERIFY(0 == nvlist_add_boolean(hdrnv, - "not_recursive")); - } - if (flags->raw) { - VERIFY(0 == nvlist_add_boolean(hdrnv, "raw")); - } - - err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags->replicate, flags->raw, - flags->doall, flags->replicate, flags->verbose, - flags->backup, flags->holds, flags->props, &fss, - &fsavl); - if (err) - goto err_out; - VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); - err = nvlist_pack(hdrnv, &packbuf, &buflen, - NV_ENCODE_XDR, 0); - if (debugnvp) - *debugnvp = hdrnv; - else - nvlist_free(hdrnv); - if (err) - goto stderr_out; - } - - if (!flags->dryrun) { - /* write first begin record */ - drr.drr_type = DRR_BEGIN; - drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; - DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin. - drr_versioninfo, DMU_COMPOUNDSTREAM); - DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin. - drr_versioninfo, featureflags); - if (snprintf(drr.drr_u.drr_begin.drr_toname, - sizeof (drr.drr_u.drr_begin.drr_toname), - "%s@%s", zhp->zfs_name, tosnap) >= - sizeof (drr.drr_u.drr_begin.drr_toname)) { - err = EINVAL; - goto stderr_out; - } - drr.drr_payloadlen = buflen; - - err = dump_record(&drr, packbuf, buflen, &zc, outfd); - free(packbuf); - if (err != 0) - goto stderr_out; - - /* write end record */ - bzero(&drr, sizeof (drr)); - drr.drr_type = DRR_END; - drr.drr_u.drr_end.drr_checksum = zc; - err = write(outfd, &drr, sizeof (drr)); - if (err == -1) { - err = errno; - goto stderr_out; - } - - err = 0; + char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN]; + if (snprintf(full_tosnap_name, sizeof (full_tosnap_name), + "%s@%s", zhp->zfs_name, tosnap) >= + sizeof (full_tosnap_name)) { + err = EINVAL; + goto stderr_out; } + zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl, + full_tosnap_name, ZFS_TYPE_SNAPSHOT); + err = send_prelim_records(tosnap, fromsnap, outfd, + flags->replicate || flags->props || flags->holds, + flags->replicate, flags->verbosity > 0, flags->dryrun, + flags->raw, flags->replicate, flags->backup, flags->holds, + flags->props, flags->doall, &fss, &fsavl); + zfs_close(tosnap); + if (err != 0) + goto err_out; } /* dump each stream */ @@ -2088,7 +2444,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.fromorigin = flags->fromorigin; sdd.fss = fss; sdd.fsavl = fsavl; - sdd.verbose = flags->verbose; + sdd.verbosity = flags->verbosity; sdd.parsable = flags->parsable; sdd.progress = flags->progress; sdd.dryrun = flags->dryrun; @@ -2101,7 +2457,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.filter_cb_arg = cb_arg; if (debugnvp) sdd.debugnv = *debugnvp; - if (sdd.verbose && sdd.dryrun) + if (sdd.verbosity != 0 && sdd.dryrun) sdd.std_out = B_TRUE; fout = sdd.std_out ? stdout : stderr; @@ -2130,7 +2486,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.snapholds = NULL; } - if (flags->verbose || sdd.snapholds != NULL) { + if (flags->verbosity != 0 || sdd.snapholds != NULL) { /* * Do a verbose no-op dry run to get all the verbose output * or to gather snapshot hold's before generating any data, @@ -2142,7 +2498,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, if (err != 0) goto stderr_out; - if (flags->verbose) { + if (flags->verbosity != 0) { if (flags->parsable) { (void) fprintf(fout, "size\t%llu\n", (longlong_t)sdd.size); @@ -2174,7 +2530,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } sdd.dryrun = B_FALSE; - sdd.verbose = B_FALSE; + sdd.verbosity = 0; } err = dump_filesystems(zhp, &sdd); @@ -2204,12 +2560,9 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, * there was some error, because it might not be totally * failed. */ - dmu_replay_record_t drr = { 0 }; - drr.drr_type = DRR_END; - if (write(outfd, &drr, sizeof (drr)) == -1) { - return (zfs_standard_error(zhp->zfs_hdl, - errno, errbuf)); - } + err = send_conclusion_record(outfd, NULL); + if (err != 0) + return (zfs_standard_error(zhp->zfs_hdl, err, errbuf)); } return (err || sdd.err); @@ -2231,43 +2584,238 @@ err_out: return (err); } +static int +get_dedup_fd(zfs_handle_t *zhp, dedup_arg_t *dda, int fd, pthread_t *tid, + int *outfd) +{ + int pipefd[2]; + char errbuf[1024]; + int err; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, + errbuf)); + } + dda->outputfd = fd; + dda->inputfd = pipefd[1]; + dda->dedup_hdl = zhp->zfs_hdl; + if ((err = pthread_create(tid, NULL, cksummer, dda)) != 0) { + (void) close(pipefd[0]); + (void) close(pipefd[1]); + zfs_error_aux(zhp->zfs_hdl, strerror(err)); + return (zfs_error(zhp->zfs_hdl, EZFS_THREADCREATEFAILED, + errbuf)); + } + *outfd = pipefd[0]; + return (0); +} + +zfs_handle_t * +name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname) +{ + char dirname[ZFS_MAX_DATASET_NAME_LEN]; + (void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN); + char *c = strchr(dirname, '@'); + if (c != NULL) + *c = '\0'; + return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET)); +} + +/* + * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either + * an earlier snapshot in the same filesystem, or a snapshot before later's + * origin, or it's origin's origin, etc. + */ +static boolean_t +snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later) +{ + boolean_t ret; + uint64_t later_txg = + (later->zfs_type == ZFS_TYPE_FILESYSTEM || + later->zfs_type == ZFS_TYPE_VOLUME ? + UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG)); + uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG); + + if (earlier_txg >= later_txg) + return (B_FALSE); + + zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl, + earlier->zfs_name); + zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl, + later->zfs_name); + + if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) { + zfs_close(earlier_dir); + zfs_close(later_dir); + return (B_TRUE); + } + + char clonename[ZFS_MAX_DATASET_NAME_LEN]; + if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename, + ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) { + zfs_close(earlier_dir); + zfs_close(later_dir); + return (B_FALSE); + } + + zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename, + ZFS_TYPE_DATASET); + uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG); + + /* + * If "earlier" is exactly the origin, then + * snapshot_is_before(earlier, origin) will return false (because + * they're the same). + */ + if (origin_txg == earlier_txg && + strcmp(origin->zfs_name, earlier->zfs_name) == 0) { + zfs_close(earlier_dir); + zfs_close(later_dir); + zfs_close(origin); + return (B_TRUE); + } + zfs_close(earlier_dir); + zfs_close(later_dir); + + ret = snapshot_is_before(earlier, origin); + zfs_close(origin); + return (ret); +} + +/* + * The "zhp" argument is the handle of the dataset to send (typically a + * snapshot). The "from" argument is the full name of the snapshot or + * bookmark that is the incremental source. + */ int -zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags) +zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, + const char *redactbook) { - int err = 0; + int err; libzfs_handle_t *hdl = zhp->zfs_hdl; - enum lzc_send_flags lzc_flags = 0; - FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr; + int orig_fd = fd; + pthread_t ddtid, ptid; + progress_arg_t pa = { 0 }; + dedup_arg_t dda = { 0 }; + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); - if (flags.largeblock) - lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; - if (flags.embed_data) - lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; - if (flags.compress) - lzc_flags |= LZC_SEND_FLAG_COMPRESS; - if (flags.raw) - lzc_flags |= LZC_SEND_FLAG_RAW; + if (from != NULL && strchr(from, '@')) { + zfs_handle_t *from_zhp = zfs_open(hdl, from, + ZFS_TYPE_DATASET); + if (!snapshot_is_before(from_zhp, zhp)) { + zfs_close(from_zhp); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + } + zfs_close(from_zhp); + } - if (flags.verbose) { - uint64_t size = 0; - err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size); - if (err == 0) { - send_print_verbose(fout, zhp->zfs_name, from, size, - flags.parsable); - } else { - (void) fprintf(stderr, "Cannot estimate send size: " - "%s\n", strerror(errno)); + /* + * Send fs properties + */ + if (flags->props || flags->holds || flags->backup) { + /* + * Note: the header generated by send_prelim_records() + * assumes that the incremental source is in the same + * filesystem/volume as the target (which is a requirement + * when doing "zfs send -R"). But that isn't always the + * case here (e.g. send from snap in origin, or send from + * bookmark). We pass from=NULL, which will omit this + * information from the prelim records; it isn't used + * when receiving this type of stream. + */ + err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE, + flags->verbosity > 0, flags->dryrun, flags->raw, + flags->replicate, flags->backup, flags->holds, + flags->props, flags->doall, NULL, NULL); + if (err != 0) + return (err); + } + + /* + * Perform size estimate if verbose was specified. + */ + if (flags->verbosity != 0) { + err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook, + errbuf); + if (err != 0) + return (err); + } + + if (flags->dryrun) + return (0); + + /* + * If deduplication is requested, spawn a thread that will deduplicate + * the data coming out of the kernel. + */ + if (flags->dedup) { + err = get_dedup_fd(zhp, &dda, fd, &ddtid, &fd); + if (err != 0) + return (err); + } + + /* + * If progress reporting is requested, spawn a new thread to poll + * ZFS_IOC_SEND_PROGRESS at a regular interval. + */ + if (flags->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = fd; + pa.pa_parsable = flags->parsable; + pa.pa_estimate = B_FALSE; + pa.pa_verbosity = flags->verbosity; + + err = pthread_create(&ptid, NULL, + send_progress_thread, &pa); + if (err != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + if (flags->dedup) { + (void) pthread_cancel(ddtid); + (void) close(fd); + (void) pthread_join(ddtid, NULL); + } + return (zfs_error(zhp->zfs_hdl, + EZFS_THREADCREATEFAILED, errbuf)); } } - if (flags.dryrun) - return (err); + err = lzc_send_redacted(zhp->zfs_name, from, fd, + lzc_flags_from_sendflags(flags), redactbook); - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "warning: cannot send '%s'"), zhp->zfs_name); + if (flags->progress) { + void *status = NULL; + if (err != 0) + (void) pthread_cancel(ptid); + (void) pthread_join(ptid, &status); + int error = (int)(uintptr_t)status; + if (error != 0 && status != PTHREAD_CANCELED) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "progress thread exited " + "nonzero")); + return (zfs_standard_error(hdl, error, errbuf)); + } + } + if (flags->dedup) { + if (err != 0) + (void) pthread_cancel(ddtid); + (void) close(fd); + (void) pthread_join(ddtid, NULL); + } - err = lzc_send(zhp->zfs_name, from, fd, lzc_flags); + if (flags->props || flags->holds || flags->backup) { + /* Write the final end record. */ + err = send_conclusion_record(orig_fd, NULL); + if (err != 0) + return (zfs_standard_error(hdl, err, errbuf)); + } if (err != 0) { switch (errno) { case EXDEV: @@ -2621,8 +3169,38 @@ typedef struct guid_to_name_data { boolean_t bookmark_ok; char *name; char *skip; + uint64_t *redact_snap_guids; + uint64_t num_redact_snaps; } guid_to_name_data_t; +boolean_t +redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd) +{ + uint64_t *bmark_snaps; + uint_t bmark_num_snaps; + nvlist_t *nvl; + if (zhp->zfs_type != ZFS_TYPE_BOOKMARK) + return (B_FALSE); + + nvl = fnvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS)); + bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE, + &bmark_num_snaps); + if (bmark_num_snaps != gtnd->num_redact_snaps) + return (B_FALSE); + int i = 0; + for (; i < bmark_num_snaps; i++) { + int j = 0; + for (; j < bmark_num_snaps; j++) { + if (bmark_snaps[i] == gtnd->redact_snap_guids[j]) + break; + } + if (j == bmark_num_snaps) + break; + } + return (i == bmark_num_snaps); +} + static int guid_to_name_cb(zfs_handle_t *zhp, void *arg) { @@ -2637,7 +3215,8 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) return (0); } - if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) { + if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid && + (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) { (void) strcpy(gtnd->name, zhp->zfs_name); zfs_close(zhp); return (EEXIST); @@ -2656,10 +3235,19 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) * progressively larger portions of the hierarchy. This allows one to send a * tree of datasets individually and guarantee that we will find the source * guid within that hierarchy, even if there are multiple matches elsewhere. + * + * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with + * the specified number of redaction snapshots. If num_redact_snaps isn't 0 or + * -1, then redact_snap_guids will be an array of the guids of the snapshots the + * redaction bookmark was created with. If num_redact_snaps is -1, then we will + * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the + * given guid. Note that a redaction bookmark can be returned if + * num_redact_snaps == -1. */ static int -guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, - boolean_t bookmark_ok, char *name) +guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent, + uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids, + uint64_t num_redact_snaps, char *name) { char pname[ZFS_MAX_DATASET_NAME_LEN]; guid_to_name_data_t gtnd; @@ -2668,6 +3256,8 @@ guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, gtnd.bookmark_ok = bookmark_ok; gtnd.name = name; gtnd.skip = NULL; + gtnd.redact_snap_guids = redact_snap_guids; + gtnd.num_redact_snaps = num_redact_snaps; /* * Search progressively larger portions of the hierarchy, starting @@ -2706,6 +3296,14 @@ guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, return (ENOENT); } +static int +guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, + boolean_t bookmark_ok, char *name) +{ + return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL, + -1, name)); +} + /* * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if * guid1 is after guid2. @@ -3506,6 +4104,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize, 8), B_FALSE, NULL); break; + case DRR_OBJECT_RANGE: case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: @@ -3782,6 +4381,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, boolean_t toplevel = B_FALSE; boolean_t zoned = B_FALSE; boolean_t hastoken = B_FALSE; + boolean_t redacted; uint8_t *wkeydata = NULL; uint_t wkeylen = 0; @@ -4022,6 +4622,9 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) strcpy(name, destsnap); *strchr(name, '@') = '\0'; + redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_REDACTED; + if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) { zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp; @@ -4494,7 +5097,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * receive (indicated by stream_avl being non-NULL). */ cp = strchr(destsnap, '@'); - if (cp && (ioctl_err == 0 || !newfs)) { + if (cp && (ioctl_err == 0 || !newfs) && !redacted) { zfs_handle_t *h; *cp = '\0'; diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 19bb57ad4..455849596 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -468,6 +468,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case EREMOTEIO: zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); break; + case ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE: case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 99fc84d04..9e8968879 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -632,13 +632,43 @@ int lzc_send(const char *snapname, const char *from, int fd, enum lzc_send_flags flags) { - return (lzc_send_resume(snapname, from, fd, flags, 0, 0)); + return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, + NULL)); +} + +int +lzc_send_redacted(const char *snapname, const char *from, int fd, + enum lzc_send_flags flags, const char *redactbook) +{ + return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, + redactbook)); } int lzc_send_resume(const char *snapname, const char *from, int fd, enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) { + return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj, + resumeoff, NULL)); +} + +/* + * snapname: The name of the "tosnap", or the snapshot whose contents we are + * sending. + * from: The name of the "fromsnap", or the incremental source. + * fd: File descriptor to write the stream to. + * flags: flags that determine features to be used by the stream. + * resumeobj: Object to resume from, for resuming send + * resumeoff: Offset to resume from, for resuming send. + * redactnv: nvlist of string -> boolean(ignored) containing the names of all + * the snapshots that we should redact with respect to. + * redactbook: Name of the redaction bookmark to create. + */ +int +lzc_send_resume_redacted(const char *snapname, const char *from, int fd, + enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, + const char *redactbook) +{ nvlist_t *args; int err; @@ -658,6 +688,9 @@ lzc_send_resume(const char *snapname, const char *from, int fd, fnvlist_add_uint64(args, "resume_object", resumeobj); fnvlist_add_uint64(args, "resume_offset", resumeoff); } + if (redactbook != NULL) + fnvlist_add_string(args, "redactbook", redactbook); + err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); nvlist_free(args); return (err); @@ -676,11 +709,13 @@ lzc_send_resume(const char *snapname, const char *from, int fd, * are traversed, looking for blocks with a birth time since the creation TXG of * the snapshot this bookmark was created from. This will result in * significantly more I/O and be less efficient than a send space estimation on - * an equivalent snapshot. + * an equivalent snapshot. This process is also used if redact_snaps is + * non-null. */ int -lzc_send_space(const char *snapname, const char *from, - enum lzc_send_flags flags, uint64_t *spacep) +lzc_send_space_resume_redacted(const char *snapname, const char *from, + enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, + uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) { nvlist_t *args; nvlist_t *result; @@ -697,6 +732,16 @@ lzc_send_space(const char *snapname, const char *from, fnvlist_add_boolean(args, "compressok"); if (flags & LZC_SEND_FLAG_RAW) fnvlist_add_boolean(args, "rawok"); + if (resumeobj != 0 || resumeoff != 0) { + fnvlist_add_uint64(args, "resume_object", resumeobj); + fnvlist_add_uint64(args, "resume_offset", resumeoff); + fnvlist_add_uint64(args, "bytes", resume_bytes); + } + if (redactbook != NULL) + fnvlist_add_string(args, "redactbook", redactbook); + if (fd != -1) + fnvlist_add_int32(args, "fd", fd); + err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); nvlist_free(args); if (err == 0) @@ -705,6 +750,14 @@ lzc_send_space(const char *snapname, const char *from, return (err); } +int +lzc_send_space(const char *snapname, const char *from, + enum lzc_send_flags flags, uint64_t *spacep) +{ + return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0, + NULL, -1, spacep)); +} + static int recv_read(int fd, void *buf, int ilen) { @@ -744,6 +797,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, char fsname[MAXPATHLEN]; char *atp; int error; + boolean_t payload = B_FALSE; ASSERT3S(g_refcount, >, 0); VERIFY3S(g_fd, !=, -1); @@ -774,13 +828,13 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, return (error); } else { drr = *begin_record; + payload = (begin_record->drr_payloadlen != 0); } /* - * Raw receives, resumable receives, and receives that include a - * wrapping key all use the new interface. + * All recives with a payload should use the new interface. */ - if (resumable || raw || wkeydata != NULL) { + if (resumable || raw || wkeydata != NULL || payload) { nvlist_t *outnvl = NULL; nvlist_t *innvl = fnvlist_alloc(); @@ -1118,19 +1172,33 @@ lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) * parameter is an nvlist of property names (with no values) that will be * returned for each bookmark. * - * The following are valid properties on bookmarks, all of which are numbers - * (represented as uint64 in the nvlist) + * The following are valid properties on bookmarks, most of which are numbers + * (represented as uint64 in the nvlist), except redact_snaps, which is a + * uint64 array, and redact_complete, which is a boolean * * "guid" - globally unique identifier of the snapshot it refers to * "createtxg" - txg when the snapshot it refers to was created * "creation" - timestamp when the snapshot it refers to was created * "ivsetguid" - IVset guid for identifying encrypted snapshots + * "redact_snaps" - list of guids of the redaction snapshots for the specified + * bookmark. If the bookmark is not a redaction bookmark, the nvlist will + * not contain an entry for this value. If it is redacted with respect to + * no snapshots, it will contain value -> NULL uint64 array + * "redact_complete" - boolean value; true if the redaction bookmark is + * complete, false otherwise. * * The format of the returned nvlist as follows: * <short name of bookmark> -> { * <name of property> -> { * "value" -> uint64 * } + * ... + * "redact_snaps" -> { + * "value" -> uint64 array + * } + * "redact_complete" -> { + * "value" -> boolean value + * } * } */ int @@ -1140,6 +1208,33 @@ lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) } /* + * Get bookmark properties. + * + * Given a bookmark's full name, retrieve all properties for the bookmark. + * + * The format of the returned property list is as follows: + * { + * <name of property> -> { + * "value" -> uint64 + * } + * ... + * "redact_snaps" -> { + * "value" -> uint64 array + * } + */ +int +lzc_get_bookmark_props(const char *bookmark, nvlist_t **props) +{ + int error; + + nvlist_t *innvl = fnvlist_alloc(); + error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props); + fnvlist_free(innvl); + + return (error); +} + +/* * Destroys bookmarks. * * The keys in the bmarks nvlist are the bookmarks to be destroyed. @@ -1479,3 +1574,18 @@ lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate, return (error); } + +/* + * Create a redaction bookmark named bookname by redacting snapshot with respect + * to all the snapshots in snapnv. + */ +int +lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv) +{ + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "bookname", bookname); + fnvlist_add_nvlist(args, "snapnv", snapnv); + int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL); + fnvlist_free(args); + return (error); +} diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 91f47503a..2cf5cec65 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -59,6 +59,7 @@ KERNEL_C = \ dmu_object.c \ dmu_objset.c \ dmu_recv.c \ + dmu_redact.c \ dmu_send.c \ dmu_traverse.c \ dmu_tx.c \ @@ -86,6 +87,7 @@ KERNEL_C = \ metaslab.c \ mmp.c \ multilist.c \ + objlist.c \ pathname.c \ range_tree.c \ refcount.c \ diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 0f39e0d72..da45356a6 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ |