summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorSteven Hartland <[email protected]>2013-05-25 02:06:23 +0000
committerBrian Behlendorf <[email protected]>2013-11-04 11:17:48 -0800
commit95fd54a1c5b93bb2aa3e7dffc28c784b1e21a8bb (patch)
tree601d05db488f2e7ea24913dfa4028681928d64eb /lib
parent7bc7f25040e68d6094a6c46fc300a3c4d66d2970 (diff)
Illumos #3740
3740 Poor ZFS send / receive performance due to snapshot hold / release processing Reviewed by: Matthew Ahrens <[email protected]> Approved by: Christopher Siden <[email protected]> References: https://www.illumos.org/issues/3740 illumos/illumos-gate@a7a845e4bf22fd1b2a284729ccd95c7370a0438c Ported-by: Richard Yao <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Issue #1775 Porting notes: 1. 13fe019870c8779bf2f5b3ff731b512cf89133ef introduced a merge conflict in dsl_dataset_user_release_tmp where some variables were moved outside of the preprocessor directive. 2. dea9dfefdd747534b3846845629d2200f0616dad made the previous merge conflict worse by switching KM_SLEEP to KM_PUSHPAGE. This is notable because this commit refactors the code, adding a new KM_SLEEP allocation. It is not clear to me whether this should be converted to KM_PUSHPAGE. 3. We had a merge conflict in libzfs_sendrecv.c because of copyright notices. 4. Several small C99 compatibility fixed were made.
Diffstat (limited to 'lib')
-rw-r--r--lib/libzfs/libzfs_dataset.c91
-rw-r--r--lib/libzfs/libzfs_sendrecv.c148
-rw-r--r--lib/libzfs_core/libzfs_core.c47
3 files changed, 153 insertions, 133 deletions
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index ede0d91f8..4b4f8d8c9 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -26,6 +26,7 @@
* Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <ctype.h>
@@ -3153,18 +3154,14 @@ static int
zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
{
struct destroydata *dd = arg;
- zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, dd->snapname);
- szhp = make_dataset_handle(zhp->zfs_hdl, name);
- if (szhp) {
+ if (lzc_exists(name))
verify(nvlist_add_boolean(dd->nvl, name) == 0);
- zfs_close(szhp);
- }
if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
(void) zvol_remove_link(zhp->zfs_hdl, name);
@@ -3193,7 +3190,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
- if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) {
+ if (nvlist_empty(dd.nvl)) {
ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
zhp->zfs_name, snapname);
@@ -3219,7 +3216,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
if (ret == 0)
return (0);
- if (nvlist_next_nvpair(errlist, NULL) == NULL) {
+ if (nvlist_empty(errlist)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
@@ -4421,18 +4418,14 @@ static int
zfs_hold_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
- zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
- szhp = make_dataset_handle(zhp->zfs_hdl, name);
- if (szhp) {
+ if (lzc_exists(name))
fnvlist_add_string(ha->nvl, name, ha->tag);
- zfs_close(szhp);
- }
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
@@ -4442,14 +4435,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg)
int
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
- boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
+ boolean_t recursive, int cleanup_fd)
{
int ret;
struct holdarg ha;
- nvlist_t *errors;
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
- nvpair_t *elem;
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
@@ -4457,26 +4446,44 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
ha.recursive = recursive;
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
- if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+ if (nvlist_empty(ha.nvl)) {
+ char errbuf[1024];
+
fnvlist_free(ha.nvl);
ret = ENOENT;
- if (!enoent_ok) {
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN,
- "cannot hold snapshot '%s@%s'"),
- zhp->zfs_name, snapname);
- (void) zfs_standard_error(hdl, ret, errbuf);
- }
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN,
+ "cannot hold snapshot '%s@%s'"),
+ zhp->zfs_name, snapname);
+ (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf);
return (ret);
}
- ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
+ ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl);
fnvlist_free(ha.nvl);
- if (ret == 0)
+ return (ret);
+}
+
+int
+zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
+{
+ int ret;
+ nvlist_t *errors;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ char errbuf[1024];
+ nvpair_t *elem;
+
+ errors = NULL;
+ ret = lzc_hold(holds, cleanup_fd, &errors);
+
+ if (ret == 0) {
+ /* There may be errors even in the success case. */
+ fnvlist_free(errors);
return (0);
+ }
- if (nvlist_next_nvpair(errors, NULL) == NULL) {
+ if (nvlist_empty(errors)) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot hold"));
@@ -4516,10 +4523,6 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
case EEXIST:
(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
break;
- case ENOENT:
- if (enoent_ok)
- return (ENOENT);
- /* FALLTHROUGH */
default:
(void) zfs_standard_error(hdl,
fnvpair_value_int32(elem), errbuf);
@@ -4530,30 +4533,21 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
return (ret);
}
-struct releasearg {
- nvlist_t *nvl;
- const char *snapname;
- const char *tag;
- boolean_t recursive;
-};
-
static int
zfs_release_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
- zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
- szhp = make_dataset_handle(zhp->zfs_hdl, name);
- if (szhp) {
+ if (lzc_exists(name)) {
nvlist_t *holds = fnvlist_alloc();
fnvlist_add_boolean(holds, ha->tag);
fnvlist_add_nvlist(ha->nvl, name, holds);
- zfs_close(szhp);
+ fnvlist_free(holds);
}
if (ha->recursive)
@@ -4568,7 +4562,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
{
int ret;
struct holdarg ha;
- nvlist_t *errors;
+ nvlist_t *errors = NULL;
nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
@@ -4579,7 +4573,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
ha.recursive = recursive;
(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
- if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+ if (nvlist_empty(ha.nvl)) {
fnvlist_free(ha.nvl);
ret = ENOENT;
(void) snprintf(errbuf, sizeof (errbuf),
@@ -4593,10 +4587,13 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
ret = lzc_release(ha.nvl, &errors);
fnvlist_free(ha.nvl);
- if (ret == 0)
+ if (ret == 0) {
+ /* There may be errors even in the success case. */
+ fnvlist_free(errors);
return (0);
+ }
- if (nvlist_next_nvpair(errors, NULL) == NULL) {
+ if (nvlist_empty(errors)) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot release"));
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 28751b215..6f0d27f5b 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -22,9 +22,10 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>.
* All rights reserved
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <assert.h>
@@ -799,6 +800,7 @@ typedef struct send_dump_data {
int outfd;
boolean_t err;
nvlist_t *fss;
+ nvlist_t *snapholds;
avl_tree_t *fsavl;
snapfilter_cb_t *filter_cb;
void *filter_cb_arg;
@@ -948,41 +950,19 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
return (0);
}
-static int
-hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
+static void
+gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
{
- zfs_handle_t *pzhp;
- int error = 0;
- char *thissnap;
-
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
- if (sdd->dryrun)
- return (0);
-
/*
- * zfs_send() only opens a cleanup_fd for sends that need it,
+ * zfs_send() only sets snapholds for sends that need them,
* e.g. replication and doall.
*/
- if (sdd->cleanup_fd == -1)
- return (0);
-
- thissnap = strchr(zhp->zfs_name, '@') + 1;
- *(thissnap - 1) = '\0';
- pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
- *(thissnap - 1) = '@';
-
- /*
- * It's OK if the parent no longer exists. The send code will
- * handle that error.
- */
- if (pzhp) {
- error = zfs_hold(pzhp, thissnap, sdd->holdtag,
- B_FALSE, B_TRUE, sdd->cleanup_fd);
- zfs_close(pzhp);
- }
+ if (sdd->snapholds == NULL)
+ return;
- return (error);
+ fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
}
static void *
@@ -1038,28 +1018,23 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
send_dump_data_t *sdd = arg;
progress_arg_t pa = { 0 };
pthread_t tid;
-
char *thissnap;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
+ err = 0;
thissnap = strchr(zhp->zfs_name, '@') + 1;
isfromsnap = (sdd->fromsnap != NULL &&
strcmp(sdd->fromsnap, thissnap) == 0);
if (!sdd->seenfrom && isfromsnap) {
- err = hold_for_send(zhp, sdd);
- if (err == 0) {
- sdd->seenfrom = B_TRUE;
- (void) strcpy(sdd->prevsnap, thissnap);
- sdd->prevsnap_obj = zfs_prop_get_int(zhp,
- ZFS_PROP_OBJSETID);
- } else if (err == ENOENT) {
- err = 0;
- }
+ gather_holds(zhp, sdd);
+ sdd->seenfrom = B_TRUE;
+ (void) strcpy(sdd->prevsnap, thissnap);
+ sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zfs_close(zhp);
- return (err);
+ return (0);
}
if (sdd->seento || !sdd->seenfrom) {
@@ -1110,14 +1085,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
return (0);
}
- err = hold_for_send(zhp, sdd);
- if (err) {
- if (err == ENOENT)
- err = 0;
- zfs_close(zhp);
- return (err);
- }
-
+ gather_holds(zhp, sdd);
fromorigin = sdd->prevsnap[0] == '\0' &&
(sdd->fromorigin || sdd->replicate);
@@ -1385,7 +1353,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
avl_tree_t *fsavl = NULL;
static uint64_t holdseq;
int spa_version;
- pthread_t tid;
+ pthread_t tid = 0;
int pipefd[2];
dedup_arg_t dda = { 0 };
int featureflags = 0;
@@ -1458,11 +1426,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
*debugnvp = hdrnv;
else
nvlist_free(hdrnv);
- if (err) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
+ if (err)
goto stderr_out;
- }
}
if (!flags->dryrun) {
@@ -1486,8 +1451,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
}
free(packbuf);
if (err == -1) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
err = errno;
goto stderr_out;
}
@@ -1498,8 +1461,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
drr.drr_u.drr_end.drr_checksum = zc;
err = write(outfd, &drr, sizeof (drr));
if (err == -1) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
err = errno;
goto stderr_out;
}
@@ -1511,7 +1472,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
- if (flags->dedup)
+ if (tid != 0)
sdd.outfd = pipefd[0];
else
sdd.outfd = outfd;
@@ -1548,36 +1509,71 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
err = errno;
goto stderr_out;
}
+ sdd.snapholds = fnvlist_alloc();
} else {
sdd.cleanup_fd = -1;
+ sdd.snapholds = NULL;
}
- if (flags->verbose) {
+ if (flags->verbose || sdd.snapholds != NULL) {
/*
* Do a verbose no-op dry run to get all the verbose output
- * before generating any data. Then do a non-verbose real
- * run to generate the streams.
+ * or to gather snapshot hold's before generating any data,
+ * then do a non-verbose real run to generate the streams.
*/
sdd.dryrun = B_TRUE;
err = dump_filesystems(zhp, &sdd);
- sdd.dryrun = flags->dryrun;
- sdd.verbose = B_FALSE;
- if (flags->parsable) {
- (void) fprintf(stderr, "size\t%llu\n",
- (longlong_t)sdd.size);
- } else {
- char buf[16];
- zfs_nicenum(sdd.size, buf, sizeof (buf));
- (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
- "total estimated size is %s\n"), buf);
+
+ if (err != 0)
+ goto stderr_out;
+
+ if (flags->verbose) {
+ if (flags->parsable) {
+ (void) fprintf(stderr, "size\t%llu\n",
+ (longlong_t)sdd.size);
+ } else {
+ char buf[16];
+ zfs_nicenum(sdd.size, buf, sizeof (buf));
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "total estimated size is %s\n"), buf);
+ }
+ }
+
+ /* Ensure no snaps found is treated as an error. */
+ if (!sdd.seento) {
+ err = ENOENT;
+ goto err_out;
}
+
+ /* Skip the second run if dryrun was requested. */
+ if (flags->dryrun)
+ goto err_out;
+
+ if (sdd.snapholds != NULL) {
+ err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
+ if (err != 0)
+ goto stderr_out;
+
+ fnvlist_free(sdd.snapholds);
+ sdd.snapholds = NULL;
+ }
+
+ sdd.dryrun = B_FALSE;
+ sdd.verbose = B_FALSE;
}
+
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
- if (flags->dedup) {
- (void) close(pipefd[0]);
+ /* Ensure no snaps found is treated as an error. */
+ if (err == 0 && !sdd.seento)
+ err = ENOENT;
+
+ if (tid != 0) {
+ if (err != 0)
+ (void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
+ (void) close(pipefd[0]);
}
if (sdd.cleanup_fd != -1) {
@@ -1605,9 +1601,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
stderr_out:
err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
err_out:
+ fsavl_destroy(fsavl);
+ nvlist_free(fss);
+ fnvlist_free(sdd.snapholds);
+
if (sdd.cleanup_fd != -1)
VERIFY(0 == close(sdd.cleanup_fd));
- if (flags->dedup) {
+ if (tid != 0) {
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
index 44a2070d6..d3918a4ed 100644
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
/*
@@ -254,8 +255,11 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
* marked for deferred destruction, and will be destroyed when the last hold
* or clone is removed/destroyed.
*
+ * The return value will be ENOENT if none of the snapshots existed.
+ *
* The return value will be 0 if all snapshots were destroyed (or marked for
- * later destruction if 'defer' is set) or didn't exist to begin with.
+ * later destruction if 'defer' is set) or didn't exist to begin with and
+ * at least one snapshot was destroyed.
*
* Otherwise the return value will be the errno of a (unspecified) snapshot
* that failed, no snapshots will be destroyed, and the errlist will have an
@@ -286,7 +290,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
nvlist_free(args);
return (error);
-
}
int
@@ -346,11 +349,22 @@ lzc_exists(const char *dataset)
* uncleanly, the holds will be released when the pool is next opened
* or imported.
*
- * The return value will be 0 if all holds were created. Otherwise the return
- * value will be the errno of a (unspecified) hold that failed, no holds will
- * be created, and the errlist will have an entry for each hold that
- * failed (name = snapshot). The value in the errlist will be the error
- * code (int32).
+ * Holds for snapshots which don't exist will be skipped and have an entry
+ * added to errlist, but will not cause an overall failure, except in the
+ * case that all holds where skipped.
+ *
+ * The return value will be ENOENT if none of the snapshots for the requested
+ * holds existed.
+ *
+ * The return value will be 0 if the nvl holds was empty or all holds, for
+ * snapshots that existed, were succesfully created and at least one hold
+ * was created.
+ *
+ * Otherwise the return value will be the errno of a (unspecified) hold that
+ * failed and no holds will be created.
+ *
+ * In all cases the errlist will have an entry for each hold that failed
+ * (name = snapshot), with its value being the error code (int32).
*/
int
lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
@@ -387,11 +401,20 @@ lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
* The snapshots must all be in the same pool.
* The value is a nvlist whose keys are the holds to remove.
*
- * The return value will be 0 if all holds were removed.
- * Otherwise the return value will be the errno of a (unspecified) release
- * that failed, no holds will be released, and the errlist will have an
- * entry for each snapshot that has failed releases (name = snapshot).
- * The value in the errlist will be the error code (int32) of a failed release.
+ * Holds which failed to release because they didn't exist will have an entry
+ * added to errlist, but will not cause an overall failure, except in the
+ * case that all releases where skipped.
+ *
+ * The return value will be ENOENT if none of the specified holds existed.
+ *
+ * The return value will be 0 if the nvl holds was empty or all holds that
+ * existed, were successfully removed and at least one hold was removed.
+ *
+ * Otherwise the return value will be the errno of a (unspecified) hold that
+ * failed to release and no holds will be released.
+ *
+ * In all cases the errlist will have an entry for each hold that failed to
+ * to release.
*/
int
lzc_release(nvlist_t *holds, nvlist_t **errlist)