diff options
author | Steven Hartland <[email protected]> | 2013-05-25 02:06:23 +0000 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2013-11-04 11:17:48 -0800 |
commit | 95fd54a1c5b93bb2aa3e7dffc28c784b1e21a8bb (patch) | |
tree | 601d05db488f2e7ea24913dfa4028681928d64eb /lib | |
parent | 7bc7f25040e68d6094a6c46fc300a3c4d66d2970 (diff) |
Illumos #3740
3740 Poor ZFS send / receive performance due to snapshot
hold / release processing
Reviewed by: Matthew Ahrens <[email protected]>
Approved by: Christopher Siden <[email protected]>
References:
https://www.illumos.org/issues/3740
illumos/illumos-gate@a7a845e4bf22fd1b2a284729ccd95c7370a0438c
Ported-by: Richard Yao <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #1775
Porting notes:
1. 13fe019870c8779bf2f5b3ff731b512cf89133ef introduced a merge conflict
in dsl_dataset_user_release_tmp where some variables were moved
outside of the preprocessor directive.
2. dea9dfefdd747534b3846845629d2200f0616dad made the previous merge
conflict worse by switching KM_SLEEP to KM_PUSHPAGE. This is notable
because this commit refactors the code, adding a new KM_SLEEP
allocation. It is not clear to me whether this should be converted
to KM_PUSHPAGE.
3. We had a merge conflict in libzfs_sendrecv.c because of copyright
notices.
4. Several small C99 compatibility fixed were made.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libzfs/libzfs_dataset.c | 91 | ||||
-rw-r--r-- | lib/libzfs/libzfs_sendrecv.c | 148 | ||||
-rw-r--r-- | lib/libzfs_core/libzfs_core.c | 47 |
3 files changed, 153 insertions, 133 deletions
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index ede0d91f8..4b4f8d8c9 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -26,6 +26,7 @@ * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <ctype.h> @@ -3153,18 +3154,14 @@ static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, dd->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) verify(nvlist_add_boolean(dd->nvl, name) == 0); - zfs_close(szhp); - } if (zhp->zfs_type == ZFS_TYPE_VOLUME) { (void) zvol_remove_link(zhp->zfs_hdl, name); @@ -3193,7 +3190,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) { + if (nvlist_empty(dd.nvl)) { ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); @@ -3219,7 +3216,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) if (ret == 0) return (0); - if (nvlist_next_nvpair(errlist, NULL) == NULL) { + if (nvlist_empty(errlist)) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); @@ -4421,18 +4418,14 @@ static int zfs_hold_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) fnvlist_add_string(ha->nvl, name, ha->tag); - zfs_close(szhp); - } if (ha->recursive) rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); @@ -4442,14 +4435,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg) int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t enoent_ok, int cleanup_fd) + boolean_t recursive, int cleanup_fd) { int ret; struct holdarg ha; - nvlist_t *errors; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - nvpair_t *elem; ha.nvl = fnvlist_alloc(); ha.snapname = snapname; @@ -4457,26 +4446,44 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, ha.recursive = recursive; (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { + char errbuf[1024]; + fnvlist_free(ha.nvl); ret = ENOENT; - if (!enoent_ok) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot hold snapshot '%s@%s'"), - zhp->zfs_name, snapname); - (void) zfs_standard_error(hdl, ret, errbuf); - } + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); return (ret); } - ret = lzc_hold(ha.nvl, cleanup_fd, &errors); + ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); fnvlist_free(ha.nvl); - if (ret == 0) + return (ret); +} + +int +zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) +{ + int ret; + nvlist_t *errors; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; + + errors = NULL; + ret = lzc_hold(holds, cleanup_fd, &errors); + + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold")); @@ -4516,10 +4523,6 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, case EEXIST: (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); break; - case ENOENT: - if (enoent_ok) - return (ENOENT); - /* FALLTHROUGH */ default: (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); @@ -4530,30 +4533,21 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, return (ret); } -struct releasearg { - nvlist_t *nvl; - const char *snapname; - const char *tag; - boolean_t recursive; -}; - static int zfs_release_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) { nvlist_t *holds = fnvlist_alloc(); fnvlist_add_boolean(holds, ha->tag); fnvlist_add_nvlist(ha->nvl, name, holds); - zfs_close(szhp); + fnvlist_free(holds); } if (ha->recursive) @@ -4568,7 +4562,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, { int ret; struct holdarg ha; - nvlist_t *errors; + nvlist_t *errors = NULL; nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4579,7 +4573,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, ha.recursive = recursive; (void) zfs_release_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { fnvlist_free(ha.nvl); ret = ENOENT; (void) snprintf(errbuf, sizeof (errbuf), @@ -4593,10 +4587,13 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, ret = lzc_release(ha.nvl, &errors); fnvlist_free(ha.nvl); - if (ret == 0) + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release")); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 28751b215..6f0d27f5b 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -22,9 +22,10 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>. * All rights reserved + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <assert.h> @@ -799,6 +800,7 @@ typedef struct send_dump_data { int outfd; boolean_t err; nvlist_t *fss; + nvlist_t *snapholds; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; @@ -948,41 +950,19 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, return (0); } -static int -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) +static void +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) { - zfs_handle_t *pzhp; - int error = 0; - char *thissnap; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - if (sdd->dryrun) - return (0); - /* - * zfs_send() only opens a cleanup_fd for sends that need it, + * zfs_send() only sets snapholds for sends that need them, * e.g. replication and doall. */ - if (sdd->cleanup_fd == -1) - return (0); - - thissnap = strchr(zhp->zfs_name, '@') + 1; - *(thissnap - 1) = '\0'; - pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); - *(thissnap - 1) = '@'; - - /* - * It's OK if the parent no longer exists. The send code will - * handle that error. - */ - if (pzhp) { - error = zfs_hold(pzhp, thissnap, sdd->holdtag, - B_FALSE, B_TRUE, sdd->cleanup_fd); - zfs_close(pzhp); - } + if (sdd->snapholds == NULL) + return; - return (error); + fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); } static void * @@ -1038,28 +1018,23 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) send_dump_data_t *sdd = arg; progress_arg_t pa = { 0 }; pthread_t tid; - char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; + err = 0; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { - err = hold_for_send(zhp, sdd); - if (err == 0) { - sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->prevsnap, thissnap); - sdd->prevsnap_obj = zfs_prop_get_int(zhp, - ZFS_PROP_OBJSETID); - } else if (err == ENOENT) { - err = 0; - } + gather_holds(zhp, sdd); + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); - return (err); + return (0); } if (sdd->seento || !sdd->seenfrom) { @@ -1110,14 +1085,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) return (0); } - err = hold_for_send(zhp, sdd); - if (err) { - if (err == ENOENT) - err = 0; - zfs_close(zhp); - return (err); - } - + gather_holds(zhp, sdd); fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); @@ -1385,7 +1353,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; - pthread_t tid; + pthread_t tid = 0; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; @@ -1458,11 +1426,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, *debugnvp = hdrnv; else nvlist_free(hdrnv); - if (err) { - fsavl_destroy(fsavl); - nvlist_free(fss); + if (err) goto stderr_out; - } } if (!flags->dryrun) { @@ -1486,8 +1451,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } free(packbuf); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1498,8 +1461,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1511,7 +1472,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - if (flags->dedup) + if (tid != 0) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; @@ -1548,36 +1509,71 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, err = errno; goto stderr_out; } + sdd.snapholds = fnvlist_alloc(); } else { sdd.cleanup_fd = -1; + sdd.snapholds = NULL; } - if (flags->verbose) { + if (flags->verbose || sdd.snapholds != NULL) { /* * Do a verbose no-op dry run to get all the verbose output - * before generating any data. Then do a non-verbose real - * run to generate the streams. + * or to gather snapshot hold's before generating any data, + * then do a non-verbose real run to generate the streams. */ sdd.dryrun = B_TRUE; err = dump_filesystems(zhp, &sdd); - sdd.dryrun = flags->dryrun; - sdd.verbose = B_FALSE; - if (flags->parsable) { - (void) fprintf(stderr, "size\t%llu\n", - (longlong_t)sdd.size); - } else { - char buf[16]; - zfs_nicenum(sdd.size, buf, sizeof (buf)); - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "total estimated size is %s\n"), buf); + + if (err != 0) + goto stderr_out; + + if (flags->verbose) { + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + } + + /* Ensure no snaps found is treated as an error. */ + if (!sdd.seento) { + err = ENOENT; + goto err_out; } + + /* Skip the second run if dryrun was requested. */ + if (flags->dryrun) + goto err_out; + + if (sdd.snapholds != NULL) { + err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); + if (err != 0) + goto stderr_out; + + fnvlist_free(sdd.snapholds); + sdd.snapholds = NULL; + } + + sdd.dryrun = B_FALSE; + sdd.verbose = B_FALSE; } + err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (flags->dedup) { - (void) close(pipefd[0]); + /* Ensure no snaps found is treated as an error. */ + if (err == 0 && !sdd.seento) + err = ENOENT; + + if (tid != 0) { + if (err != 0) + (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); + (void) close(pipefd[0]); } if (sdd.cleanup_fd != -1) { @@ -1605,9 +1601,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: + fsavl_destroy(fsavl); + nvlist_free(fss); + fnvlist_free(sdd.snapholds); + if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); - if (flags->dedup) { + if (tid != 0) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 44a2070d6..d3918a4ed 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -254,8 +255,11 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) * marked for deferred destruction, and will be destroyed when the last hold * or clone is removed/destroyed. * + * The return value will be ENOENT if none of the snapshots existed. + * * The return value will be 0 if all snapshots were destroyed (or marked for - * later destruction if 'defer' is set) or didn't exist to begin with. + * later destruction if 'defer' is set) or didn't exist to begin with and + * at least one snapshot was destroyed. * * Otherwise the return value will be the errno of a (unspecified) snapshot * that failed, no snapshots will be destroyed, and the errlist will have an @@ -286,7 +290,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) nvlist_free(args); return (error); - } int @@ -346,11 +349,22 @@ lzc_exists(const char *dataset) * uncleanly, the holds will be released when the pool is next opened * or imported. * - * The return value will be 0 if all holds were created. Otherwise the return - * value will be the errno of a (unspecified) hold that failed, no holds will - * be created, and the errlist will have an entry for each hold that - * failed (name = snapshot). The value in the errlist will be the error - * code (int32). + * Holds for snapshots which don't exist will be skipped and have an entry + * added to errlist, but will not cause an overall failure, except in the + * case that all holds where skipped. + * + * The return value will be ENOENT if none of the snapshots for the requested + * holds existed. + * + * The return value will be 0 if the nvl holds was empty or all holds, for + * snapshots that existed, were succesfully created and at least one hold + * was created. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed and no holds will be created. + * + * In all cases the errlist will have an entry for each hold that failed + * (name = snapshot), with its value being the error code (int32). */ int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) @@ -387,11 +401,20 @@ lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) * The snapshots must all be in the same pool. * The value is a nvlist whose keys are the holds to remove. * - * The return value will be 0 if all holds were removed. - * Otherwise the return value will be the errno of a (unspecified) release - * that failed, no holds will be released, and the errlist will have an - * entry for each snapshot that has failed releases (name = snapshot). - * The value in the errlist will be the error code (int32) of a failed release. + * Holds which failed to release because they didn't exist will have an entry + * added to errlist, but will not cause an overall failure, except in the + * case that all releases where skipped. + * + * The return value will be ENOENT if none of the specified holds existed. + * + * The return value will be 0 if the nvl holds was empty or all holds that + * existed, were successfully removed and at least one hold was removed. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed to release and no holds will be released. + * + * In all cases the errlist will have an entry for each hold that failed to + * to release. */ int lzc_release(nvlist_t *holds, nvlist_t **errlist) |