summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMax Grossman <max.grossman@delphix.com>2015-04-08 11:37:13 -0700
committerBrian Behlendorf <behlendorf1@llnl.gov>2015-05-13 09:03:59 -0700
commit5dc8b7365ff1932bfd969bc71cd49db9b3a6dc87 (patch)
tree03bbb6a4ffbe7b71a5d4f69e3cec42c75deb62ad
parent19b3b1d2a256ecac1f27278c593983f772322f09 (diff)
Illumos 5765 - add support for estimating send stream size with lzc_send_space when source is a bookmark
5765 add support for estimating send stream size with lzc_send_space when source is a bookmark Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Steven Hartland <killing@multiplay.co.uk> Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com> Approved by: Albert Lee <trisk@nexenta.com> References: https://www.illumos.org/issues/5765 https://github.com/illumos/illumos-gate/commit/643da460 Porting notes: * Unused variable 'recordsize' in dmu_send_estimate() dropped Ported-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3397
-rw-r--r--include/sys/dmu_send.h4
-rw-r--r--include/sys/dsl_dataset.h3
-rw-r--r--lib/libzfs_core/libzfs_core.c22
-rw-r--r--module/zfs/dmu_send.c106
-rw-r--r--module/zfs/zfs_ioctl.c52
5 files changed, 147 insertions, 40 deletions
diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h
index 3a8dc89ab..2442a1f8a 100644
--- a/include/sys/dmu_send.h
+++ b/include/sys/dmu_send.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
@@ -42,6 +42,8 @@ int dmu_send(const char *tosnap, const char *fromsnap,
int outfd, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
+int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
+ uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok,
int outfd, struct vnode *vp, offset_t *off);
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 1985ce824..d6da5dcfd 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -201,6 +201,9 @@ dsl_dataset_phys(dsl_dataset_t *ds)
*/
#define MAX_TAG_PREFIX_LEN 17
+#define dsl_dataset_is_snapshot(ds) \
+ (dsl_dataset_phys(ds)->ds_num_children != 0)
+
#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
index 69df5579b..b706e6f6b 100644
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -485,18 +485,30 @@ lzc_send(const char *snapname, const char *from, int fd,
}
/*
- * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
+ * "from" can be NULL, a snapshot, or a bookmark.
+ *
+ * If from is NULL, a full (non-incremental) stream will be estimated. This
+ * is calculated very efficiently.
+ *
+ * If from is a snapshot, lzc_send_space uses the deadlists attached to
+ * each snapshot to efficiently estimate the stream size.
+ *
+ * If from is a bookmark, the indirect blocks in the destination snapshot
+ * are traversed, looking for blocks with a birth time since the creation TXG of
+ * the snapshot this bookmark was created from. This will result in
+ * significantly more I/O and be less efficient than a send space estimation on
+ * an equivalent snapshot.
*/
int
-lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
+lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
int err;
args = fnvlist_alloc();
- if (fromsnap != NULL)
- fnvlist_add_string(args, "fromsnap", fromsnap);
+ if (from != NULL)
+ fnvlist_add_string(args, "from", from);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index e26d344d6..7fd29c68d 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -836,11 +836,45 @@ dmu_send(const char *tosnap, const char *fromsnap,
return (err);
}
+static int
+dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
+ uint64_t *sizep)
+{
+ int err;
+ /*
+ * Assume that space (both on-disk and in-stream) is dominated by
+ * data. We will adjust for indirect blocks and the copies property,
+ * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
+ */
+
+ /*
+ * Subtract out approximate space used by indirect blocks.
+ * Assume most space is used by data blocks (non-indirect, non-dnode).
+ * Assume all blocks are recordsize. Assume ditto blocks and
+ * internal fragmentation counter out compression.
+ *
+ * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
+ * block, which we observe in practice.
+ */
+ uint64_t recordsize;
+ err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
+ if (err != 0)
+ return (err);
+ size -= size / recordsize * sizeof (blkptr_t);
+
+ /* Add in the space for the record associated with each block. */
+ size += size / recordsize * sizeof (dmu_replay_record_t);
+
+ *sizep = size;
+
+ return (0);
+}
+
int
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
{
int err;
- uint64_t size, recordsize;
+ uint64_t size;
ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
ASSERT(dsl_pool_config_held(dp));
@@ -867,32 +901,60 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
return (err);
}
- /*
- * Assume that space (both on-disk and in-stream) is dominated by
- * data. We will adjust for indirect blocks and the copies property,
- * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
- */
+ err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+ return (err);
+}
+/*
+ * Simple callback used to traverse the blocks of a snapshot and sum their
+ * uncompressed size
+ */
+/* ARGSUSED */
+static int
+dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ uint64_t *spaceptr = arg;
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ *spaceptr += BP_GET_UCSIZE(bp);
+ }
+ return (0);
+}
+
+/*
+ * Given a desination snapshot and a TXG, calculate the approximate size of a
+ * send stream sent from that TXG. from_txg may be zero, indicating that the
+ * whole snapshot will be sent.
+ */
+int
+dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
+ uint64_t *sizep)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ int err;
+ uint64_t size = 0;
+
+ ASSERT(dsl_pool_config_held(dp));
+
+ /* tosnap must be a snapshot */
+ if (!dsl_dataset_is_snapshot(ds))
+ return (SET_ERROR(EINVAL));
+
+ /* verify that from_txg is before the provided snapshot was taken */
+ if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
+ return (SET_ERROR(EXDEV));
+ }
/*
- * Subtract out approximate space used by indirect blocks.
- * Assume most space is used by data blocks (non-indirect, non-dnode).
- * Assume all blocks are recordsize. Assume ditto blocks and
- * internal fragmentation counter out compression.
- *
- * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
- * block, which we observe in practice.
+ * traverse the blocks of the snapshot with birth times after
+ * from_txg, summing their uncompressed size
*/
- err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
- if (err != 0)
+ err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
+ dmu_calculate_send_traversal, &size);
+ if (err)
return (err);
- size -= size / recordsize * sizeof (blkptr_t);
-
- /* Add in the space for the record associated with each block. */
- size += size / recordsize * sizeof (dmu_replay_record_t);
- *sizep = size;
-
- return (0);
+ err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+ return (err);
}
typedef struct dmu_recv_begin_arg {
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index f5137d09e..7a890cf3f 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -5245,7 +5245,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
*
* innvl: {
- * (optional) "fromsnap" -> full snap name to send an incremental from
+ * (optional) "from" -> full snap or bookmark name to send an incremental
+ * from
* }
*
* outnvl: {
@@ -5256,7 +5257,6 @@ static int
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
dsl_pool_t *dp;
- dsl_dataset_t *fromsnap = NULL;
dsl_dataset_t *tosnap;
int error;
char *fromname;
@@ -5272,27 +5272,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
- error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+ error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
- error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
- if (error != 0) {
- dsl_dataset_rele(tosnap, FTAG);
- dsl_pool_rele(dp, FTAG);
- return (error);
+ if (strchr(fromname, '@') != NULL) {
+ /*
+ * If from is a snapshot, hold it and use the more
+ * efficient dmu_send_estimate to estimate send space
+ * size using deadlists.
+ */
+ dsl_dataset_t *fromsnap;
+ error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+ if (error != 0)
+ goto out;
+ error = dmu_send_estimate(tosnap, fromsnap, &space);
+ dsl_dataset_rele(fromsnap, FTAG);
+ } else if (strchr(fromname, '#') != NULL) {
+ /*
+ * If from is a bookmark, fetch the creation TXG of the
+ * snapshot it was created from and use that to find
+ * blocks that were born after it.
+ */
+ zfs_bookmark_phys_t frombm;
+
+ error = dsl_bookmark_lookup(dp, fromname, tosnap,
+ &frombm);
+ if (error != 0)
+ goto out;
+ error = dmu_send_estimate_from_txg(tosnap,
+ frombm.zbm_creation_txg, &space);
+ } else {
+ /*
+ * from is not properly formatted as a snapshot or
+ * bookmark
+ */
+ error = SET_ERROR(EINVAL);
+ goto out;
}
+ } else {
+ // If estimating the size of a full send, use dmu_send_estimate
+ error = dmu_send_estimate(tosnap, NULL, &space);
}
- error = dmu_send_estimate(tosnap, fromsnap, &space);
fnvlist_add_uint64(outnvl, "space", space);
- if (fromsnap != NULL)
- dsl_dataset_rele(fromsnap, FTAG);
+out:
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
-
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void