summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2016-06-29 11:26:30 -0700
committerBrian Behlendorf <[email protected]>2016-06-29 13:42:23 -0700
commit5c27b296055301f13103ca0aa98c2ded01dcd9a0 (patch)
tree4813f9cb944c53140de7a12710f0ca7b12e3ec27 /module
parent669cf0ab298dd66e512b37f6c4a42aee2d767b80 (diff)
parent0dab2e84fcecff2806287efacb7c6205f346f69d (diff)
Merge branch 'illumos-2605'
Adds support for resuming interrupted zfs send streams and include all related send/recv bug fixes from upstream OpenZFS. Unlike the upstream implementation this branch does not change the existing ioctl interface. Instead a new ZFS_IOC_RECV_NEW ioctl was added to support resuming zfs send streams. This was done by applying the original upstream patch and then reverting the ioctl changes in a follow up patch. For this reason there are a handful on commits between the relevant patches on this branch which are not interoperable. This was done to make it easier to extract the new ZFS_IOC_RECV_NEW and submit it upstream. Signed-off-by: Brian Behlendorf <[email protected]> Closes #4742
Diffstat (limited to 'module')
-rw-r--r--module/zcommon/zfs_fletcher.c14
-rw-r--r--module/zcommon/zfs_namecheck.c46
-rw-r--r--module/zcommon/zfs_prop.c4
-rw-r--r--module/zfs/dmu_objset.c34
-rw-r--r--module/zfs/dmu_send.c739
-rw-r--r--module/zfs/dmu_traverse.c81
-rw-r--r--module/zfs/dsl_bookmark.c4
-rw-r--r--module/zfs/dsl_dataset.c220
-rw-r--r--module/zfs/dsl_deleg.c6
-rw-r--r--module/zfs/dsl_destroy.c12
-rw-r--r--module/zfs/dsl_dir.c27
-rw-r--r--module/zfs/dsl_prop.c4
-rw-r--r--module/zfs/dsl_scan.c6
-rw-r--r--module/zfs/dsl_userhold.c6
-rw-r--r--module/zfs/spa.c24
-rw-r--r--module/zfs/spa_history.c6
-rw-r--r--module/zfs/zfs_ctldir.c56
-rw-r--r--module/zfs/zfs_ioctl.c390
-rw-r--r--module/zfs/zfs_vfsops.c2
-rw-r--r--module/zfs/zil.c2
-rw-r--r--module/zfs/zpl_inode.c2
-rw-r--r--module/zfs/zvol.c2
22 files changed, 1301 insertions, 386 deletions
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c
index 2c2d01d5c..e76c5b8a5 100644
--- a/module/zcommon/zfs_fletcher.c
+++ b/module/zcommon/zfs_fletcher.c
@@ -334,7 +334,12 @@ fletcher_4_impl_get(void)
void
fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
- const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+ const fletcher_4_ops_t *ops;
+
+ if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
+ ops = fletcher_4_impl_get();
+ else
+ ops = &fletcher_4_scalar_ops;
ops->init(zcp);
ops->compute(buf, size, zcp);
@@ -345,7 +350,12 @@ fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
void
fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
- const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+ const fletcher_4_ops_t *ops;
+
+ if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
+ ops = fletcher_4_impl_get();
+ else
+ ops = &fletcher_4_scalar_ops;
ops->init(zcp);
ops->compute_byteswap(buf, size, zcp);
diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c
index ff724be58..b58071bed 100644
--- a/module/zcommon/zfs_namecheck.c
+++ b/module/zcommon/zfs_namecheck.c
@@ -69,7 +69,7 @@ zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
{
const char *loc;
- if (strlen(path) >= MAXNAMELEN) {
+ if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
@@ -140,27 +140,8 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
/*
* Make sure the name is not too long.
- *
- * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
- * which is the same as MAXNAMELEN used in the kernel.
- * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
- * places using MAXNAMELEN.
- *
- * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
- * length is 20 bytes. This 20 bytes is broken down as follows to
- * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
- * 18 bytes. To ensure bytes are left for <dataset>[@snapshot] the
- * <pool> portition is futher limited to 9 bytes. For 2.6.27 and
- * newer kernels this limit is set to MAXNAMELEN.
- *
- * <pool>/<dataset> + <partition> + <newline>
- * (18) + (1) + (1)
*/
-#ifdef HAVE_KOBJ_NAME_LEN
- if (strlen(path) > 18) {
-#else
- if (strlen(path) >= MAXNAMELEN) {
-#endif /* HAVE_KOBJ_NAME_LEN */
+ if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
@@ -289,7 +270,7 @@ mountpoint_namecheck(const char *path, namecheck_err_t *why)
while (*end != '/' && *end != '\0')
end++;
- if (end - start >= MAXNAMELEN) {
+ if (end - start >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
@@ -314,27 +295,8 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
/*
* Make sure the name is not too long.
- *
- * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
- * which is the same as MAXNAMELEN used in the kernel.
- * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
- * places using MAXNAMELEN.
- *
- * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
- * length is 20 bytes. This 20 bytes is broken down as follows to
- * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
- * 18 bytes. To ensure bytes are left for <dataset>[@snapshot] the
- * <pool> portition is futher limited to 8 bytes. For 2.6.27 and
- * newer kernels this limit is set to MAXNAMELEN.
- *
- * <pool>/<dataset> + <partition> + <newline>
- * (18) + (1) + (1)
*/
-#ifdef HAVE_KOBJ_NAME_LEN
- if (strlen(pool) > 8) {
-#else
- if (strlen(pool) >= MAXNAMELEN) {
-#endif /* HAVE_KOBJ_NAME_LEN */
+ if (strlen(pool) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 1dbeab084..1d68ca29e 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -375,6 +375,10 @@ zfs_prop_init(void)
zprop_register_string(ZFS_PROP_SELINUX_ROOTCONTEXT, "rootcontext",
"none", PROP_DEFAULT, ZFS_TYPE_DATASET, "<selinux rootcontext>",
"ROOTCONTEXT");
+ zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ "receive_resume_token",
+ NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<string token>", "RESUMETOK");
/* readonly number properties */
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index cdc897726..22ca84d96 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -405,6 +405,17 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
* checksum/compression/copies.
*/
if (ds != NULL) {
+ boolean_t needlock = B_FALSE;
+
+ /*
+ * Note: it's valid to open the objset if the dataset is
+ * long-held, in which case the pool_config lock will not
+ * be held.
+ */
+ if (!dsl_pool_config_held(dmu_objset_pool(os))) {
+ needlock = B_TRUE;
+ dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+ }
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os);
@@ -461,6 +472,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
dnodesize_changed_cb, os);
}
}
+ if (needlock)
+ dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (err != 0) {
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
&os->os_phys_buf));
@@ -520,6 +533,13 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
{
int err = 0;
+ /*
+ * We shouldn't be doing anything with dsl_dataset_t's unless the
+ * pool_config lock is held, or the dataset is long-held.
+ */
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
+ dsl_dataset_long_held(ds));
+
mutex_enter(&ds->ds_opening_lock);
if (ds->ds_objset == NULL) {
objset_t *os;
@@ -651,7 +671,7 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
{
dsl_pool_t *dp;
dsl_dataset_t *ds, *newds;
- char name[MAXNAMELEN];
+ char name[ZFS_MAX_DATASET_NAME_LEN];
ds = os->os_dsl_dataset;
VERIFY3P(ds, !=, NULL);
@@ -875,6 +895,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
if (strchr(doca->doca_name, '@') != NULL)
return (SET_ERROR(EINVAL));
+ if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN)
+ return (SET_ERROR(ENAMETOOLONG));
+
error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
if (error != 0)
return (error);
@@ -961,6 +984,9 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
if (strchr(doca->doca_clone, '@') != NULL)
return (SET_ERROR(EINVAL));
+ if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN)
+ return (SET_ERROR(ENAMETOOLONG));
+
error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
if (error != 0)
return (error);
@@ -1000,7 +1026,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
const char *tail;
dsl_dataset_t *origin, *ds;
uint64_t obj;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
@@ -2027,7 +2053,7 @@ dmu_objset_get_user(objset_t *os)
/*
* Determine name of filesystem, given name of snapshot.
- * buf must be at least MAXNAMELEN bytes
+ * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes
*/
int
dmu_fsname(const char *snapname, char *buf)
@@ -2035,7 +2061,7 @@ dmu_fsname(const char *snapname, char *buf)
char *atp = strchr(snapname, '@');
if (atp == NULL)
return (SET_ERROR(EINVAL));
- if (atp - snapname >= MAXNAMELEN)
+ if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strlcpy(buf, snapname, atp - snapname + 1);
return (0);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 901386a5a..80f7dc1aa 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -20,10 +20,11 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright 2014 HybridCluster. All rights reserved.
+ * Copyright 2016 RackTop Systems.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
@@ -62,14 +63,18 @@
int zfs_send_corrupt_data = B_FALSE;
int zfs_send_queue_length = 16 * 1024 * 1024;
int zfs_recv_queue_length = 16 * 1024 * 1024;
+/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
+int zfs_send_set_freerecords_bit = B_TRUE;
static char *dmu_recv_tag = "dmu_recv_tag";
-static const char *recv_clone_name = "%recv";
+const char *recv_clone_name = "%recv";
#define BP_SPAN(datablkszsec, indblkshift, level) \
(((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
(level) * (indblkshift - SPA_BLKPTRSHIFT)))
+static void byteswap_record(dmu_replay_record_t *drr);
+
struct send_thread_arg {
bqueue_t q;
dsl_dataset_t *ds; /* Dataset to traverse */
@@ -77,6 +82,7 @@ struct send_thread_arg {
int flags; /* flags to pass to traverse_dataset */
int error_code;
boolean_t cancel;
+ zbookmark_phys_t resume;
};
struct send_block_record {
@@ -99,8 +105,21 @@ dump_bytes_cb(void *arg)
{
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
dmu_sendarg_t *dsp = dbi->dbi_dsp;
- dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
+ dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os);
ssize_t resid; /* have to get resid to get detailed errno */
+
+ /*
+ * The code does not rely on this (len being a multiple of 8). We keep
+ * this assertion because of the corresponding assertion in
+ * receive_read(). Keeping this assertion ensures that we do not
+ * inadvertently break backwards compatibility (causing the assertion
+ * in receive_read() to trigger on old software).
+ *
+ * Removing the assertions could be rolled into a new feature that uses
+ * data that isn't 8-byte aligned; if the assertions were removed, a
+ * feature flag would have to be added.
+ */
+
ASSERT0(dbi->dbi_len % 8);
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
@@ -169,6 +188,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
return (0);
}
+/*
+ * Fill in the drr_free struct, or perform aggregation if the previous record is
+ * also a free record, and the two are adjacent.
+ *
+ * Note that we send free records even for a full send, because we want to be
+ * able to receive a full send as a clone, which requires a list of all the free
+ * and freeobject records that were generated on the source.
+ */
static int
dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
uint64_t length)
@@ -180,7 +207,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
* that the receiving system doesn't have any dbufs in the range
* being freed. This is always true because there is a one-record
* constraint: we only send one WRITE record for any given
- * object+offset. We know that the one-record constraint is
+ * object,offset. We know that the one-record constraint is
* true because we always send data in increasing order by
* object,offset.
*
@@ -192,15 +219,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
(object == dsp->dsa_last_data_object &&
offset > dsp->dsa_last_data_offset));
- /*
- * If we are doing a non-incremental send, then there can't
- * be any data in the dataset we're receiving into. Therefore
- * a free record would simply be a no-op. Save space by not
- * sending it to begin with.
- */
- if (!dsp->dsa_incremental)
- return (0);
-
if (length != -1ULL && offset + length < offset)
length = -1ULL;
@@ -378,10 +396,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
{
struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
- /* See comment in dump_free(). */
- if (!dsp->dsa_incremental)
- return (0);
-
/*
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
* push it out, since free block aggregation can only be done for
@@ -428,6 +442,19 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
{
struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+ if (object < dsp->dsa_resume_object) {
+ /*
+ * Note: when resuming, we will visit all the dnodes in
+ * the block of dnodes that we are resuming from. In
+ * this case it's unnecessary to send the dnodes prior to
+ * the one we are resuming from. We should be at most one
+ * block's worth of dnodes behind the resume point.
+ */
+ ASSERT3U(dsp->dsa_resume_object - object, <,
+ 1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT));
+ return (0);
+ }
+
if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
return (dump_freeobjects(dsp, object, 1));
@@ -509,6 +536,9 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
uint64_t record_size;
int err = 0;
+ ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+ zb->zb_object >= sta->resume.zb_object);
+
if (sta->cancel)
return (SET_ERROR(EINTR));
@@ -545,8 +575,10 @@ send_traverse_thread(void *arg)
struct send_block_record *data;
if (st_arg->ds != NULL) {
- err = traverse_dataset(st_arg->ds, st_arg->fromtxg,
- st_arg->flags, send_cb, arg);
+ err = traverse_dataset_resume(st_arg->ds,
+ st_arg->fromtxg, &st_arg->resume,
+ st_arg->flags, send_cb, st_arg);
+
if (err != EINTR)
st_arg->error_code = err;
}
@@ -575,6 +607,9 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
ASSERT3U(zb->zb_level, >=, 0);
+ ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+ zb->zb_object >= dsa->dsa_resume_object);
+
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
@@ -637,6 +672,10 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
uint64_t offset;
ASSERT0(zb->zb_level);
+ ASSERT(zb->zb_object > dsa->dsa_resume_object ||
+ (zb->zb_object == dsa->dsa_resume_object &&
+ zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
+
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
&aflags, zb) != 0) {
@@ -697,8 +736,10 @@ get_next_record(bqueue_t *bq, struct send_block_record *data)
*/
static int
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
- zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok,
- boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
+ zfs_bookmark_phys_t *ancestor_zb,
+ boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, int outfd,
+ uint64_t resumeobj, uint64_t resumeoff,
+ vnode_t *vp, offset_t *off)
{
objset_t *os;
dmu_replay_record_t *drr;
@@ -707,6 +748,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
uint64_t fromtxg = 0;
uint64_t featureflags = 0;
struct send_thread_arg to_arg;
+ void *payload = NULL;
+ size_t payload_len = 0;
struct send_block_record *to_data;
err = dmu_objset_from_ds(to_ds, &os);
@@ -721,6 +764,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
DMU_SUBSTREAM);
+ bzero(&to_arg, sizeof (to_arg));
+
#ifdef _KERNEL
if (dmu_objset_type(os) == DMU_OST_ZFS) {
uint64_t version;
@@ -746,6 +791,10 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
}
+ if (resumeobj != 0 || resumeoff != 0) {
+ featureflags |= DMU_BACKUP_FEATURE_RESUMING;
+ }
+
DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
featureflags);
@@ -757,6 +806,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
+ if (zfs_send_set_freerecords_bit)
+ drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
if (ancestor_zb != NULL) {
drr->drr_u.drr_begin.drr_fromguid =
@@ -779,8 +830,9 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
dsp->dsa_pending_op = PENDING_NONE;
- dsp->dsa_incremental = (ancestor_zb != NULL);
dsp->dsa_featureflags = featureflags;
+ dsp->dsa_resume_object = resumeobj;
+ dsp->dsa_resume_offset = resumeoff;
mutex_enter(&to_ds->ds_sendstream_lock);
list_insert_head(&to_ds->ds_sendstreams, dsp);
@@ -789,7 +841,26 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsl_dataset_long_hold(to_ds, FTAG);
dsl_pool_rele(dp, tag);
- if (dump_record(dsp, NULL, 0) != 0) {
+ if (resumeobj != 0 || resumeoff != 0) {
+ dmu_object_info_t to_doi;
+ nvlist_t *nvl;
+ err = dmu_object_info(os, resumeobj, &to_doi);
+ if (err != 0)
+ goto out;
+ SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
+ resumeoff / to_doi.doi_data_block_size);
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+ fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+ payload = fnvlist_pack(nvl, &payload_len);
+ drr->drr_payloadlen = payload_len;
+ fnvlist_free(nvl);
+ }
+
+ err = dump_record(dsp, payload, payload_len);
+ fnvlist_pack_free(payload, payload_len);
+ if (err != 0) {
err = dsp->dsa_err;
goto out;
}
@@ -899,19 +970,19 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, outfd, 0, 0, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, outfd, 0, 0, vp, off);
}
dsl_dataset_rele(ds, FTAG);
return (err);
}
int
-dmu_send(const char *tosnap, const char *fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
- int outfd, vnode_t *vp, offset_t *off)
+dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
+ vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
@@ -978,10 +1049,12 @@ dmu_send(const char *tosnap, const char *fromsnap,
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok,
+ outfd, resumeobj, resumeoff, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok,
+ outfd, resumeobj, resumeoff, vp, off);
}
if (owned)
dsl_dataset_disown(ds, FTAG);
@@ -1221,6 +1294,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* already checked */
ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+ ASSERT(!(featureflags & DMU_BACKUP_FEATURE_RESUMING));
if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM ||
@@ -1233,6 +1307,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
spa_version(dp->dp_spa) < SPA_VERSION_SA)
return (SET_ERROR(ENOTSUP));
+ if (drba->drba_cookie->drc_resumable &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EXTENSIBLE_DATASET))
+ return (SET_ERROR(ENOTSUP));
+
/*
* The receiving code doesn't know how to translate a WRITE_EMBEDDED
* record to a plan WRITE record, so the pool must have the
@@ -1269,7 +1347,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* target fs already exists; recv into temp clone */
/* Can't recv a clone into an existing fs */
- if (flags & DRR_FLAG_CLONE) {
+ if (flags & DRR_FLAG_CLONE || drba->drba_origin) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1278,7 +1356,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
} else if (error == ENOENT) {
/* target fs does not exist; must be a full backup or clone */
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
/*
* If it's a non-clone incremental, we are missing the
@@ -1288,8 +1366,17 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
drba->drba_origin))
return (SET_ERROR(ENOENT));
+ /*
+ * If we're receiving a full send as a clone, and it doesn't
+ * contain all the necessary free records and freeobject
+ * records, reject it.
+ */
+ if (fromguid == 0 && drba->drba_origin &&
+ !(flags & DRR_FLAG_FREERECORDS))
+ return (SET_ERROR(EINVAL));
+
/* Open the parent of tofs */
- ASSERT3U(strlen(tofs), <, MAXNAMELEN);
+ ASSERT3U(strlen(tofs), <, sizeof (buf));
(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
error = dsl_dataset_hold(dp, buf, FTAG, &ds);
if (error != 0)
@@ -1327,7 +1414,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
- if (dsl_dataset_phys(origin)->ds_guid != fromguid) {
+ if (dsl_dataset_phys(origin)->ds_guid != fromguid &&
+ fromguid != 0) {
dsl_dataset_rele(origin, FTAG);
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(ENODEV));
@@ -1345,15 +1433,16 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
{
dmu_recv_begin_arg_t *drba = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
const char *tofs = drba->drba_cookie->drc_tofs;
dsl_dataset_t *ds, *newds;
uint64_t dsobj;
int error;
- uint64_t crflags;
+ uint64_t crflags = 0;
- crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
- DS_FLAG_CI_DATASET : 0;
+ if (drrb->drr_flags & DRR_FLAG_CI_DATA)
+ crflags |= DS_FLAG_CI_DATASET;
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
if (error == 0) {
@@ -1391,6 +1480,32 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
}
VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
+ if (drba->drba_cookie->drc_resumable) {
+ uint64_t one = 1;
+ uint64_t zero = 0;
+
+ dsl_dataset_zapify(newds, tx);
+ if (drrb->drr_fromguid != 0) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_FROMGUID,
+ 8, 1, &drrb->drr_fromguid, tx));
+ }
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TOGUID,
+ 8, 1, &drrb->drr_toguid, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TONAME,
+ 1, strlen(drrb->drr_toname) + 1, drrb->drr_toname, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OBJECT,
+ 8, 1, &one, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OFFSET,
+ 8, 1, &zero, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
+ 8, 1, &zero, tx));
+ if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_EMBED_DATA) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
+ 8, 1, &one, tx));
+ }
+ }
+
dmu_buf_will_dirty(newds->ds_dbuf, tx);
dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
@@ -1408,56 +1523,194 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
spa_history_log_internal_ds(newds, "receive", tx, "");
}
+static int
+dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
+{
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+ int error;
+ uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+ dsl_dataset_t *ds;
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ uint64_t val;
+
+ /* 6 extra bytes for /%recv */
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+ /* already checked */
+ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+ ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING);
+
+ if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM ||
+ drrb->drr_type >= DMU_OST_NUMTYPES)
+ return (SET_ERROR(EINVAL));
+
+ /* Verify pool version supports SA if SA_SPILL feature set */
+ if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
+ spa_version(dp->dp_spa) < SPA_VERSION_SA)
+ return (SET_ERROR(ENOTSUP));
+
+ /*
+ * The receiving code doesn't know how to translate a WRITE_EMBEDDED
+ * record to a plain WRITE record, so the pool must have the
+ * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
+ * records. Same with WRITE_EMBEDDED records that use LZ4 compression.
+ */
+ if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
+ return (SET_ERROR(ENOTSUP));
+ if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
+ return (SET_ERROR(ENOTSUP));
+
+ (void) snprintf(recvname, sizeof (recvname), "%s/%s",
+ tofs, recv_clone_name);
+
+ if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ /* %recv does not exist; continue in tofs */
+ error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ if (error != 0)
+ return (error);
+ }
+
+ /* check that ds is marked inconsistent */
+ if (!DS_IS_INCONSISTENT(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /* check that there is resuming data, and that the toguid matches */
+ if (!dsl_dataset_is_zapified(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+ error = zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val);
+ if (error != 0 || drrb->drr_toguid != val) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Check if the receive is still running. If so, it will be owned.
+ * Note that nothing else can own the dataset (e.g. after the receive
+ * fails) because it will be marked inconsistent.
+ */
+ if (dsl_dataset_has_owner(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EBUSY));
+ }
+
+ /* There should not be any snapshots of this fs yet. */
+ if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Note: resume point will be checked when we process the first WRITE
+ * record.
+ */
+
+ /* check that the origin matches */
+ val = 0;
+ (void) zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val);
+ if (drrb->drr_fromguid != val) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+static void
+dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
+{
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ dsl_dataset_t *ds;
+ uint64_t dsobj;
+ /* 6 extra bytes for /%recv */
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+ (void) snprintf(recvname, sizeof (recvname), "%s/%s",
+ tofs, recv_clone_name);
+
+ if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ /* %recv does not exist; continue in tofs */
+ VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds));
+ drba->drba_cookie->drc_newfs = B_TRUE;
+ }
+
+ /* clear the inconsistent flag so that we can own it */
+ ASSERT(DS_IS_INCONSISTENT(ds));
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+
+ VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds));
+
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
+
+ ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
+
+ drba->drba_cookie->drc_ds = ds;
+
+ spa_history_log_internal_ds(ds, "resume receive", tx, "");
+}
+
/*
* NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
* succeeds; otherwise we will leak the holds on the datasets.
*/
int
-dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
- boolean_t force, char *origin, dmu_recv_cookie_t *drc)
+dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
+ boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc)
{
dmu_recv_begin_arg_t drba = { 0 };
- dmu_replay_record_t *drr;
bzero(drc, sizeof (dmu_recv_cookie_t));
- drc->drc_drrb = drrb;
+ drc->drc_drr_begin = drr_begin;
+ drc->drc_drrb = &drr_begin->drr_u.drr_begin;
drc->drc_tosnap = tosnap;
drc->drc_tofs = tofs;
drc->drc_force = force;
+ drc->drc_resumable = resumable;
drc->drc_cred = CRED();
- if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
+ if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
drc->drc_byteswap = B_TRUE;
- else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
- return (SET_ERROR(EINVAL));
-
- drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
- drr->drr_type = DRR_BEGIN;
- drr->drr_u.drr_begin = *drc->drc_drrb;
- if (drc->drc_byteswap) {
- fletcher_4_incremental_byteswap(drr,
+ fletcher_4_incremental_byteswap(drr_begin,
sizeof (dmu_replay_record_t), &drc->drc_cksum);
- } else {
- fletcher_4_incremental_native(drr,
+ byteswap_record(drr_begin);
+ } else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) {
+ fletcher_4_incremental_native(drr_begin,
sizeof (dmu_replay_record_t), &drc->drc_cksum);
- }
- kmem_free(drr, sizeof (dmu_replay_record_t));
-
- if (drc->drc_byteswap) {
- drrb->drr_magic = BSWAP_64(drrb->drr_magic);
- drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
- drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
- drrb->drr_type = BSWAP_32(drrb->drr_type);
- drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
- drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+ } else {
+ return (SET_ERROR(EINVAL));
}
drba.drba_origin = origin;
drba.drba_cookie = drc;
drba.drba_cred = CRED();
- return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
- &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ if (DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_RESUMING) {
+ return (dsl_sync_task(tofs,
+ dmu_recv_resume_begin_check, dmu_recv_resume_begin_sync,
+ &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ } else {
+ return (dsl_sync_task(tofs,
+ dmu_recv_begin_check, dmu_recv_begin_sync,
+ &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ }
}
struct receive_record_arg {
@@ -1469,6 +1722,7 @@ struct receive_record_arg {
*/
arc_buf_t *write_buf;
int payload_size;
+ uint64_t bytes_read; /* bytes read from stream when record created */
boolean_t eos_marker; /* Marks the end of the stream */
bqueue_node_t node;
};
@@ -1477,6 +1731,7 @@ struct receive_writer_arg {
objset_t *os;
boolean_t byteswap;
bqueue_t q;
+
/*
* These three args are used to signal to the main thread that we're
* done.
@@ -1484,15 +1739,34 @@ struct receive_writer_arg {
kmutex_t mutex;
kcondvar_t cv;
boolean_t done;
+
int err;
/* A map from guid to dataset to help handle dedup'd streams. */
avl_tree_t *guid_to_ds_map;
+ boolean_t resumable;
+ uint64_t last_object, last_offset;
+ uint64_t bytes_read; /* bytes read when current record created */
+};
+
+struct objlist {
+ list_t list; /* List of struct receive_objnode. */
+ /*
+ * Last object looked up. Used to assert that objects are being looked
+ * up in ascending order.
+ */
+ uint64_t last_lookup;
+};
+
+struct receive_objnode {
+ list_node_t node;
+ uint64_t object;
};
struct receive_arg {
objset_t *os;
vnode_t *vp; /* The vnode to read the stream from */
uint64_t voff; /* The current offset in the stream */
+ uint64_t bytes_read;
/*
* A record that has had its payload read in, but hasn't yet been handed
* off to the worker thread.
@@ -1505,12 +1779,7 @@ struct receive_arg {
int err;
boolean_t byteswap;
/* Sorted list of objects not to issue prefetches for. */
- list_t ignore_obj_list;
-};
-
-struct receive_ign_obj_node {
- list_node_t node;
- uint64_t object;
+ struct objlist ignore_objlist;
};
typedef struct guid_map_entry {
@@ -1553,7 +1822,10 @@ receive_read(struct receive_arg *ra, int len, void *buf)
{
int done = 0;
- /* some things will require 8-byte alignment, so everything must */
+ /*
+ * The code doesn't rely on this (lengths being multiples of 8). See
+ * comment in dump_bytes.
+ */
ASSERT0(len % 8);
while (done < len) {
@@ -1564,14 +1836,21 @@ receive_read(struct receive_arg *ra, int len, void *buf)
ra->voff, UIO_SYSSPACE, FAPPEND,
RLIM64_INFINITY, CRED(), &resid);
- if (resid == len - done)
- ra->err = SET_ERROR(EINVAL);
+ if (resid == len - done) {
+ /*
+ * Note: ECKSUM indicates that the receive
+ * was interrupted and can potentially be resumed.
+ */
+ ra->err = SET_ERROR(ECKSUM);
+ }
ra->voff += len - done - resid;
done = len - resid;
if (ra->err != 0)
return (ra->err);
}
+ ra->bytes_read += len;
+
ASSERT3U(done, ==, len);
return (0);
}
@@ -1675,6 +1954,43 @@ deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
}
}
+static void
+save_resume_state(struct receive_writer_arg *rwa,
+ uint64_t object, uint64_t offset, dmu_tx_t *tx)
+{
+ int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;
+
+ if (!rwa->resumable)
+ return;
+
+ /*
+ * We use ds_resume_bytes[] != 0 to indicate that we need to
+ * update this on disk, so it must not be 0.
+ */
+ ASSERT(rwa->bytes_read != 0);
+
+ /*
+ * We only resume from write records, which have a valid
+ * (non-meta-dnode) object number.
+ */
+ ASSERT(object != 0);
+
+ /*
+ * For resuming to work correctly, we must receive records in order,
+ * sorted by object,offset. This is checked by the callers, but
+ * assert it here for good measure.
+ */
+ ASSERT3U(object, >=, rwa->os->os_dsl_dataset->ds_resume_object[txgoff]);
+ ASSERT(object != rwa->os->os_dsl_dataset->ds_resume_object[txgoff] ||
+ offset >= rwa->os->os_dsl_dataset->ds_resume_offset[txgoff]);
+ ASSERT3U(rwa->bytes_read, >=,
+ rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff]);
+
+ rwa->os->os_dsl_dataset->ds_resume_object[txgoff] = object;
+ rwa->os->os_dsl_dataset->ds_resume_offset[txgoff] = offset;
+ rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
+}
+
noinline static int
receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
void *data)
@@ -1773,6 +2089,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
dmu_buf_rele(db, FTAG);
}
dmu_tx_commit(tx);
+
return (0);
}
@@ -1782,13 +2099,14 @@ receive_freeobjects(struct receive_writer_arg *rwa,
struct drr_freeobjects *drrfo)
{
uint64_t obj;
+ int next_err = 0;
if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
return (SET_ERROR(EINVAL));
for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj;
- obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
- (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) {
+ obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0;
+ next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) {
dmu_object_info_t doi;
int err;
@@ -1804,7 +2122,8 @@ receive_freeobjects(struct receive_writer_arg *rwa,
if (err != 0)
return (err);
}
-
+ if (next_err != ESRCH)
+ return (next_err);
return (0);
}
@@ -1820,6 +2139,18 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
!DMU_OT_IS_VALID(drrw->drr_type))
return (SET_ERROR(EINVAL));
+ /*
+ * For resuming to work, records must be in increasing order
+ * by (object, offset).
+ */
+ if (drrw->drr_object < rwa->last_object ||
+ (drrw->drr_object == rwa->last_object &&
+ drrw->drr_offset < rwa->last_offset)) {
+ return (SET_ERROR(EINVAL));
+ }
+ rwa->last_object = drrw->drr_object;
+ rwa->last_offset = drrw->drr_offset;
+
if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
@@ -1842,8 +2173,17 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
return (SET_ERROR(EINVAL));
dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+
+ /*
+ * Note: If the receive fails, we want the resume stream to start
+ * with the same record that we last successfully received (as opposed
+ * to the next record), so that we can verify that we are
+ * resuming from the correct location.
+ */
+ save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
dmu_tx_commit(tx);
dmu_buf_rele(bonus, FTAG);
+
return (0);
}
@@ -1902,43 +2242,48 @@ receive_write_byref(struct receive_writer_arg *rwa,
dmu_write(rwa->os, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
dmu_buf_rele(dbp, FTAG);
+
+ /* See comment in restore_write. */
+ save_resume_state(rwa, drrwbr->drr_object, drrwbr->drr_offset, tx);
dmu_tx_commit(tx);
return (0);
}
static int
receive_write_embedded(struct receive_writer_arg *rwa,
- struct drr_write_embedded *drrwnp, void *data)
+ struct drr_write_embedded *drrwe, void *data)
{
dmu_tx_t *tx;
int err;
- if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
+ if (drrwe->drr_offset + drrwe->drr_length < drrwe->drr_offset)
return (EINVAL);
- if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
+ if (drrwe->drr_psize > BPE_PAYLOAD_SIZE)
return (EINVAL);
- if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
+ if (drrwe->drr_etype >= NUM_BP_EMBEDDED_TYPES)
return (EINVAL);
- if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
+ if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (EINVAL);
tx = dmu_tx_create(rwa->os);
- dmu_tx_hold_write(tx, drrwnp->drr_object,
- drrwnp->drr_offset, drrwnp->drr_length);
+ dmu_tx_hold_write(tx, drrwe->drr_object,
+ drrwe->drr_offset, drrwe->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
- dmu_write_embedded(rwa->os, drrwnp->drr_object,
- drrwnp->drr_offset, data, drrwnp->drr_etype,
- drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
+ dmu_write_embedded(rwa->os, drrwe->drr_object,
+ drrwe->drr_offset, data, drrwe->drr_etype,
+ drrwe->drr_compression, drrwe->drr_lsize, drrwe->drr_psize,
rwa->byteswap ^ ZFS_HOST_BYTEORDER, tx);
+ /* See comment in restore_write. */
+ save_resume_state(rwa, drrwe->drr_object, drrwe->drr_offset, tx);
dmu_tx_commit(tx);
return (0);
}
@@ -2012,10 +2357,16 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
static void
dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
{
- char name[MAXNAMELEN];
- dsl_dataset_name(drc->drc_ds, name);
- dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
- (void) dsl_destroy_head(name);
+ if (drc->drc_resumable) {
+ /* wait for our resume state to be written to disk */
+ txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ } else {
+ char name[ZFS_MAX_DATASET_NAME_LEN];
+ dsl_dataset_name(drc->drc_ds, name);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ (void) dsl_destroy_head(name);
+ }
}
static void
@@ -2044,12 +2395,17 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
if (len != 0) {
ASSERT3U(len, <=, SPA_MAXBLOCKSIZE);
- ra->rrd->payload = buf;
- ra->rrd->payload_size = len;
- err = receive_read(ra, len, ra->rrd->payload);
+ err = receive_read(ra, len, buf);
if (err != 0)
return (err);
- receive_cksum(ra, len, ra->rrd->payload);
+ receive_cksum(ra, len, buf);
+
+ /* note: rrd is NULL when reading the begin record's payload */
+ if (ra->rrd != NULL) {
+ ra->rrd->payload = buf;
+ ra->rrd->payload_size = len;
+ ra->rrd->bytes_read = ra->bytes_read;
+ }
}
ra->prev_cksum = ra->cksum;
@@ -2057,6 +2413,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
err = receive_read(ra, sizeof (ra->next_rrd->header),
&ra->next_rrd->header);
+ ra->next_rrd->bytes_read = ra->bytes_read;
if (err != 0) {
kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
ra->next_rrd = NULL;
@@ -2096,6 +2453,70 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
return (0);
}
+static void
+objlist_create(struct objlist *list)
+{
+ list_create(&list->list, sizeof (struct receive_objnode),
+ offsetof(struct receive_objnode, node));
+ list->last_lookup = 0;
+}
+
+static void
+objlist_destroy(struct objlist *list)
+{
+ struct receive_objnode *n;
+
+ for (n = list_remove_head(&list->list);
+ n != NULL; n = list_remove_head(&list->list)) {
+ kmem_free(n, sizeof (*n));
+ }
+ list_destroy(&list->list);
+}
+
+/*
+ * This function looks through the objlist to see if the specified object number
+ * is contained in the objlist. In the process, it will remove all object
+ * numbers in the list that are smaller than the specified object number. Thus,
+ * any lookup of an object number smaller than a previously looked up object
+ * number will always return false; therefore, all lookups should be done in
+ * ascending order.
+ */
+static boolean_t
+objlist_exists(struct objlist *list, uint64_t object)
+{
+ struct receive_objnode *node = list_head(&list->list);
+ ASSERT3U(object, >=, list->last_lookup);
+ list->last_lookup = object;
+ while (node != NULL && node->object < object) {
+ VERIFY3P(node, ==, list_remove_head(&list->list));
+ kmem_free(node, sizeof (*node));
+ node = list_head(&list->list);
+ }
+ return (node != NULL && node->object == object);
+}
+
+/*
+ * The objlist is a list of object numbers stored in ascending order. However,
+ * the insertion of new object numbers does not seek out the correct location to
+ * store a new object number; instead, it appends it to the list for simplicity.
+ * Thus, any users must take care to only insert new object numbers in ascending
+ * order.
+ */
+static void
+objlist_insert(struct objlist *list, uint64_t object)
+{
+ struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
+ node->object = object;
+#ifdef ZFS_DEBUG
+ {
+ struct receive_objnode *last_object = list_tail(&list->list);
+ uint64_t last_objnum = (last_object != NULL ? last_object->object : 0);
+ ASSERT3U(node->object, >, last_objnum);
+ }
+#endif
+ list_insert_tail(&list->list, node);
+}
+
/*
* Issue the prefetch reads for any necessary indirect blocks.
*
@@ -2118,13 +2539,7 @@ static void
receive_read_prefetch(struct receive_arg *ra,
uint64_t object, uint64_t offset, uint64_t length)
{
- struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list);
- while (node != NULL && node->object < object) {
- VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list));
- kmem_free(node, sizeof (*node));
- node = list_head(&ra->ignore_obj_list);
- }
- if (node == NULL || node->object > object) {
+ if (!objlist_exists(&ra->ignore_objlist, object)) {
dmu_prefetch(ra->os, object, 1, offset, length,
ZIO_PRIORITY_SYNC_READ);
}
@@ -2157,20 +2572,7 @@ receive_read_record(struct receive_arg *ra)
*/
if (err == ENOENT ||
(err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
- struct receive_ign_obj_node *node =
- kmem_zalloc(sizeof (*node),
- KM_SLEEP);
- node->object = drro->drr_object;
-#ifdef ZFS_DEBUG
- {
- struct receive_ign_obj_node *last_object =
- list_tail(&ra->ignore_obj_list);
- uint64_t last_objnum = (last_object != NULL ?
- last_object->object : 0);
- ASSERT3U(node->object, >, last_objnum);
- }
-#endif
- list_insert_tail(&ra->ignore_obj_list, node);
+ objlist_insert(&ra->ignore_objlist, drro->drr_object);
err = 0;
}
return (err);
@@ -2236,7 +2638,7 @@ receive_read_record(struct receive_arg *ra)
{
struct drr_end *drre = &ra->rrd->header.drr_u.drr_end;
if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
- return (SET_ERROR(EINVAL));
+ return (SET_ERROR(ECKSUM));
return (0);
}
case DRR_SPILL:
@@ -2263,6 +2665,10 @@ receive_process_record(struct receive_writer_arg *rwa,
{
int err;
+ /* Processing in order, therefore bytes_read should be increasing. */
+ ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read);
+ rwa->bytes_read = rrd->bytes_read;
+
switch (rrd->header.drr_type) {
case DRR_OBJECT:
{
@@ -2357,6 +2763,32 @@ receive_writer_thread(void *arg)
mutex_exit(&rwa->mutex);
}
+static int
+resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
+{
+ uint64_t val;
+ objset_t *mos = dmu_objset_pool(ra->os)->dp_meta_objset;
+ uint64_t dsobj = dmu_objset_id(ra->os);
+ uint64_t resume_obj, resume_off;
+
+ if (nvlist_lookup_uint64(begin_nvl,
+ "resume_object", &resume_obj) != 0 ||
+ nvlist_lookup_uint64(begin_nvl,
+ "resume_offset", &resume_off) != 0) {
+ return (SET_ERROR(EINVAL));
+ }
+ VERIFY0(zap_lookup(mos, dsobj,
+ DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val));
+ if (resume_obj != val)
+ return (SET_ERROR(EINVAL));
+ VERIFY0(zap_lookup(mos, dsobj,
+ DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val));
+ if (resume_off != val)
+ return (SET_ERROR(EINVAL));
+
+ return (0);
+}
+
/*
* Read in the stream's records, one by one, and apply them to the pool. There
* are two threads involved; the thread that calls this function will spin up a
@@ -2377,7 +2809,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
struct receive_arg *ra;
struct receive_writer_arg *rwa;
int featureflags;
- struct receive_ign_obj_node *n;
+ uint32_t payloadlen;
+ void *payload;
+ nvlist_t *begin_nvl = NULL;
ra = kmem_zalloc(sizeof (*ra), KM_SLEEP);
rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP);
@@ -2386,8 +2820,14 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
ra->cksum = drc->drc_cksum;
ra->vp = vp;
ra->voff = *voffp;
- list_create(&ra->ignore_obj_list, sizeof (struct receive_ign_obj_node),
- offsetof(struct receive_ign_obj_node, node));
+
+ if (dsl_dataset_is_zapified(drc->drc_ds)) {
+ (void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset,
+ drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES,
+ sizeof (ra->bytes_read), 1, &ra->bytes_read);
+ }
+
+ objlist_create(&ra->ignore_objlist);
/* these were verified in dmu_recv_begin */
ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
@@ -2438,9 +2878,29 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
drc->drc_guid_to_ds_map = rwa->guid_to_ds_map;
}
- err = receive_read_payload_and_next_header(ra, 0, NULL);
- if (err)
+ payloadlen = drc->drc_drr_begin->drr_payloadlen;
+ payload = NULL;
+ if (payloadlen != 0)
+ payload = kmem_alloc(payloadlen, KM_SLEEP);
+
+ err = receive_read_payload_and_next_header(ra, payloadlen, payload);
+ if (err != 0) {
+ if (payloadlen != 0)
+ kmem_free(payload, payloadlen);
goto out;
+ }
+ if (payloadlen != 0) {
+ err = nvlist_unpack(payload, payloadlen, &begin_nvl, KM_SLEEP);
+ kmem_free(payload, payloadlen);
+ if (err != 0)
+ goto out;
+ }
+
+ if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+ err = resume_check(ra, begin_nvl);
+ if (err != 0)
+ goto out;
+ }
(void) bqueue_init(&rwa->q, zfs_recv_queue_length,
offsetof(struct receive_record_arg, node));
@@ -2448,6 +2908,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL);
rwa->os = ra->os;
rwa->byteswap = drc->drc_byteswap;
+ rwa->resumable = drc->drc_resumable;
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
TS_RUN, minclsyspri);
@@ -2461,7 +2922,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
* We can leave this loop in 3 ways: First, if rwa->err is
* non-zero. In that case, the writer thread will free the rrd we just
* pushed. Second, if we're interrupted; in that case, either it's the
- * first loop and ra->rrd was never allocated, or it's later, and ra.rrd
+ * first loop and ra->rrd was never allocated, or it's later and ra->rrd
* has been handed off to the writer thread who will free it. Finally,
* if receive_read_record fails or we're at the end of the stream, then
* we free ra->rrd and exit.
@@ -2506,24 +2967,21 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
err = rwa->err;
out:
+ nvlist_free(begin_nvl);
if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
zfs_onexit_fd_rele(cleanup_fd);
if (err != 0) {
/*
- * destroy what we created, so we don't leave it in the
- * inconsistent restoring state.
+ * Clean up references. If receive is not resumable,
+ * destroy what we created, so we don't leave it in
+ * the inconsistent state.
*/
dmu_recv_cleanup_ds(drc);
}
*voffp = ra->voff;
-
- for (n = list_remove_head(&ra->ignore_obj_list); n != NULL;
- n = list_remove_head(&ra->ignore_obj_list)) {
- kmem_free(n, sizeof (*n));
- }
- list_destroy(&ra->ignore_obj_list);
+ objlist_destroy(&ra->ignore_objlist);
kmem_free(ra, sizeof (*ra));
kmem_free(rwa, sizeof (*rwa));
return (err);
@@ -2674,6 +3132,20 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ if (dsl_dataset_has_resume_receive_state(ds)) {
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OBJECT, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OFFSET, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_BYTES, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TONAME, tx);
+ }
}
drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
@@ -2722,16 +3194,13 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
int error;
#ifdef _KERNEL
- char *name;
-
/*
* We will be destroying the ds; make sure its origin is unmounted if
* necessary.
*/
- name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ char name[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(drc->drc_ds, name);
zfs_destroy_unmount_origin(name);
- kmem_free(name, MAXNAMELEN);
#endif
error = dsl_sync_task(drc->drc_tofs,
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index 44ba74181..0df12fac8 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -47,6 +47,7 @@ typedef struct prefetch_data {
int pd_flags;
boolean_t pd_cancel;
boolean_t pd_exited;
+ zbookmark_phys_t pd_resume;
} prefetch_data_t;
typedef struct traverse_data {
@@ -323,30 +324,29 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
uint32_t flags = ARC_FLAG_WAIT;
int32_t i;
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
- dnode_phys_t *cdnp;
+ dnode_phys_t *child_dnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
goto post;
- cdnp = buf->b_data;
+ child_dnp = buf->b_data;
- for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
- prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+ prefetch_dnode_metadata(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
}
/* recursively visitbp() blocks below this */
- for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
- err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+ err = traverse_dnode(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
if (err != 0)
break;
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
- dnode_phys_t *mdnp, *gdnp, *udnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
@@ -354,11 +354,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
goto post;
osp = buf->b_data;
- mdnp = &osp->os_meta_dnode;
- gdnp = &osp->os_groupused_dnode;
- udnp = &osp->os_userused_dnode;
-
- prefetch_dnode_metadata(td, mdnp, zb->zb_objset,
+ prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
/*
* See the block comment above for the goal of this variable.
@@ -370,21 +366,21 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
td->td_realloc_possible = B_FALSE;
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- prefetch_dnode_metadata(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
- prefetch_dnode_metadata(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
- err = traverse_dnode(td, mdnp, zb->zb_objset,
+ err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
}
@@ -416,9 +412,15 @@ post:
* Set the bookmark to the first level-0 block that we need
* to visit. This way, the resuming code does not need to
* deal with resuming from indirect blocks.
+ *
+ * Note, if zb_level <= 0, dnp may be NULL, so we don't want
+ * to dereference it.
*/
- td->td_resume->zb_blkid = zb->zb_blkid <<
- (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+ td->td_resume->zb_blkid = zb->zb_blkid;
+ if (zb->zb_level > 0) {
+ td->td_resume->zb_blkid <<= zb->zb_level *
+ (dnp->dn_indblkshift - SPA_BLKPTRSHIFT);
+ }
td->td_paused = B_TRUE;
}
@@ -450,6 +452,10 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
int j, err = 0;
zbookmark_phys_t czb;
+ if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL &&
+ object < td->td_resume->zb_object)
+ return (0);
+
if (td->td_flags & TRAVERSE_PRE) {
SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
ZB_DNODE_BLKID);
@@ -527,6 +533,7 @@ traverse_prefetch_thread(void *arg)
td.td_func = traverse_prefetcher;
td.td_arg = td_main->td_pfd;
td.td_pfd = NULL;
+ td.td_resume = &td_main->td_pfd->pd_resume;
SET_BOOKMARK(&czb, td.td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
@@ -556,12 +563,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
ASSERT(ds == NULL || objset == ds->ds_object);
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
- /*
- * The data prefetching mechanism (the prefetch thread) is incompatible
- * with resuming from a bookmark.
- */
- ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
-
td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP);
pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP);
czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP);
@@ -586,6 +587,8 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
}
pd->pd_flags = flags;
+ if (resume != NULL)
+ pd->pd_resume = *resume;
mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
@@ -638,11 +641,19 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
* in syncing context).
*/
int
-traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
- blkptr_cb_t func, void *arg)
+traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start,
+ zbookmark_phys_t *resume,
+ int flags, blkptr_cb_t func, void *arg)
{
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
- &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg));
+ &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg));
+}
+
+int
+traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start,
+ int flags, blkptr_cb_t func, void *arg)
+{
+ return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg));
}
int
@@ -675,7 +686,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
/* visit each dataset */
for (obj = 1; err == 0;
- err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
+ err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) {
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c
index 447a3a2dc..5a7f034ce 100644
--- a/module/zfs/dsl_bookmark.c
+++ b/module/zfs/dsl_bookmark.c
@@ -34,10 +34,10 @@ static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
dsl_dataset_t **dsp, void *tag, char **shortnamep)
{
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
char *hashp;
- if (strlen(fullname) >= MAXNAMELEN)
+ if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
hashp = strchr(fullname, '#');
if (hashp == NULL)
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 9c275b234..5b7de74dc 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -25,6 +25,7 @@
* Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
*/
#include <sys/dmu_objset.h>
@@ -52,6 +53,9 @@
#include <sys/dsl_userhold.h>
#include <sys/dsl_bookmark.h>
#include <sys/policy.h>
+#include <sys/dmu_send.h>
+#include <sys/zio_compress.h>
+#include <zfs_fletcher.h>
/*
* The SPA supports block sizes up to 16MB. However, very large blocks
@@ -75,6 +79,8 @@ int zfs_max_recordsize = 1 * 1024 * 1024;
extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
+extern int spa_asize_inflation;
+
/*
* Figure out how much of this delta should be propogated to the dsl_dir
* layer. If there's a refreservation, that space has already been
@@ -664,22 +670,38 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name)
dsl_dir_name(ds->ds_dir, name);
VERIFY0(dsl_dataset_get_snapname(ds));
if (ds->ds_snapname[0]) {
- (void) strcat(name, "@");
+ VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN),
+ <, ZFS_MAX_DATASET_NAME_LEN);
/*
* We use a "recursive" mutex so that we
* can call dprintf_ds() with ds_lock held.
*/
if (!MUTEX_HELD(&ds->ds_lock)) {
mutex_enter(&ds->ds_lock);
- (void) strcat(name, ds->ds_snapname);
+ VERIFY3U(strlcat(name, ds->ds_snapname,
+ ZFS_MAX_DATASET_NAME_LEN), <,
+ ZFS_MAX_DATASET_NAME_LEN);
mutex_exit(&ds->ds_lock);
} else {
- (void) strcat(name, ds->ds_snapname);
+ VERIFY3U(strlcat(name, ds->ds_snapname,
+ ZFS_MAX_DATASET_NAME_LEN), <,
+ ZFS_MAX_DATASET_NAME_LEN);
}
}
}
}
+int
+dsl_dataset_namelen(dsl_dataset_t *ds)
+{
+ int len;
+ VERIFY0(dsl_dataset_get_snapname(ds));
+ mutex_enter(&ds->ds_lock);
+ len = dsl_dir_namelen(ds->ds_dir) + 1 + strlen(ds->ds_snapname);
+ mutex_exit(&ds->ds_lock);
+ return (len);
+}
+
void
dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
{
@@ -704,6 +726,7 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
{
boolean_t gotit = FALSE;
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
mutex_enter(&ds->ds_lock);
if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
ds->ds_owner = tag;
@@ -714,6 +737,16 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
return (gotit);
}
+boolean_t
+dsl_dataset_has_owner(dsl_dataset_t *ds)
+{
+ boolean_t rv;
+ mutex_enter(&ds->ds_lock);
+ rv = (ds->ds_owner != NULL);
+ mutex_exit(&ds->ds_lock);
+ return (rv);
+}
+
static void
dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
{
@@ -1238,10 +1271,10 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
int error = 0;
dsl_dataset_t *ds;
char *name, *atp;
- char dsname[MAXNAMELEN];
+ char dsname[ZFS_MAX_DATASET_NAME_LEN];
name = nvpair_name(pair);
- if (strlen(name) >= MAXNAMELEN)
+ if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN)
error = SET_ERROR(ENAMETOOLONG);
if (error == 0) {
atp = strchr(name, '@');
@@ -1414,7 +1447,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
dsl_dataset_t *ds;
char *name, *atp;
- char dsname[MAXNAMELEN];
+ char dsname[ZFS_MAX_DATASET_NAME_LEN];
name = nvpair_name(pair);
atp = strchr(name, '@');
@@ -1461,7 +1494,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
suspended = fnvlist_alloc();
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
- char fsname[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
char *snapname = nvpair_name(pair);
char *atp;
void *cookie;
@@ -1615,6 +1648,21 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;
+ if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,
+ &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,
+ &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,
+ &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));
+ ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;
+ ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;
+ ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
+ }
+
dmu_objset_sync(ds->ds_objset, zio, tx);
for (f = 0; f < SPA_FEATURES; f++) {
@@ -1655,7 +1703,7 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
- char buf[ZFS_MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
dsl_dir_name(clone->ds_dir, buf);
@@ -1670,6 +1718,78 @@ fail:
nvlist_free(propval);
}
+static void
+get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+ if (dsl_dataset_has_resume_receive_state(ds)) {
+ char *str;
+ void *packed;
+ uint8_t *compressed;
+ uint64_t val;
+ nvlist_t *token_nv = fnvlist_alloc();
+ size_t packed_size, compressed_size;
+ zio_cksum_t cksum;
+ char *propval;
+ char buf[MAXNAMELEN];
+ int i;
+
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "fromguid", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "object", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "offset", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "bytes", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "toguid", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
+ fnvlist_add_string(token_nv, "toname", buf);
+ }
+ if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_EMBEDOK) == 0) {
+ fnvlist_add_boolean(token_nv, "embedok");
+ }
+ packed = fnvlist_pack(token_nv, &packed_size);
+ fnvlist_free(token_nv);
+ compressed = kmem_alloc(packed_size, KM_SLEEP);
+
+ compressed_size = gzip_compress(packed, compressed,
+ packed_size, packed_size, 6);
+
+ fletcher_4_native(compressed, compressed_size, &cksum);
+
+ str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP);
+ for (i = 0; i < compressed_size; i++) {
+ (void) sprintf(str + i * 2, "%02x", compressed[i]);
+ }
+ str[compressed_size * 2] = '\0';
+ propval = kmem_asprintf("%u-%llx-%llx-%s",
+ ZFS_SEND_RESUME_TOKEN_VERSION,
+ (longlong_t)cksum.zc_word[0],
+ (longlong_t)packed_size, str);
+ dsl_prop_nvlist_add_string(nv,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
+ kmem_free(packed, packed_size);
+ kmem_free(str, compressed_size * 2 + 1);
+ kmem_free(compressed, packed_size);
+ strfree(propval);
+ }
+}
+
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
@@ -1693,7 +1813,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
get_clones_stat(ds, nv);
} else {
if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds->ds_prev, buf);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
}
@@ -1743,6 +1863,32 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
}
}
+ if (!dsl_dataset_is_snapshot(ds)) {
+ /* 6 extra bytes for /%recv */
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+ dsl_dataset_t *recv_ds;
+
+ /*
+ * A failed "newfs" (e.g. full) resumable receive leaves
+ * the stats set on this dataset. Check here for the prop.
+ */
+ get_receive_resume_stats(ds, nv);
+
+ /*
+ * A failed incremental resumable receive leaves the
+ * stats set on our child named "%recv". Check the child
+ * for the prop.
+ */
+ dsl_dataset_name(ds, recvname);
+ if (strlcat(recvname, "/", sizeof (recvname)) <
+ sizeof (recvname) &&
+ strlcat(recvname, recv_clone_name, sizeof (recvname)) <
+ sizeof (recvname) &&
+ dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) {
+ get_receive_resume_stats(recv_ds, nv);
+ dsl_dataset_rele(recv_ds, FTAG);
+ }
+ }
}
void
@@ -1863,7 +2009,7 @@ dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
/* dataset name + 1 for the "@" + the new snapshot name must fit */
if (dsl_dir_namelen(hds->ds_dir) + 1 +
- strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
+ strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN)
error = SET_ERROR(ENAMETOOLONG);
return (error);
@@ -1970,7 +2116,8 @@ dsl_dataset_rename_snapshot(const char *fsname,
* only one long hold on the dataset. We're not allowed to change anything here
* so we don't permanently release the long hold or regular hold here. We want
* to do this only when syncing to avoid the dataset unexpectedly going away
- * when we release the long hold.
+ * when we release the long hold. Allow a long hold to exist for volumes, this
+ * may occur when asynchronously registering the minor with the kernel.
*/
static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
@@ -1985,7 +2132,7 @@ dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
dsl_dataset_long_rele(ds, owner);
}
- held = dsl_dataset_long_held(ds);
+ held = (dsl_dataset_long_held(ds) && (ds->ds_owner != zvol_tag));
if (owner != NULL)
dsl_dataset_long_hold(ds, owner);
@@ -2095,7 +2242,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds, *clone;
uint64_t cloneobj;
- char namebuf[ZFS_MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
@@ -2648,7 +2795,7 @@ promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
* Promote a clone.
*
* If it fails due to a conflicting snapshot name, "conflsnap" will be filled
- * in with the name. (It must be at least MAXNAMELEN bytes long.)
+ * in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.)
*/
int
dsl_dataset_promote(const char *name, char *conflsnap)
@@ -2685,6 +2832,11 @@ int
dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
{
+ /*
+ * "slack" factor for received datasets with refquota set on them.
+ * See the bottom of this function for details on its use.
+ */
+ uint64_t refquota_slack = DMU_MAX_ACCESS * spa_asize_inflation;
int64_t unused_refres_delta;
/* they should both be heads */
@@ -2727,10 +2879,22 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
return (SET_ERROR(ENOSPC));
- /* clone can't be over the head's refquota */
+ /*
+ * The clone can't be too much over the head's refquota.
+ *
+ * To ensure that the entire refquota can be used, we allow one
+ * transaction to exceed the the refquota. Therefore, this check
+ * needs to also allow for the space referenced to be more than the
+ * refquota. The maximum amount of space that one transaction can use
+ * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this
+ * overage ensures that we are able to receive a filesystem that
+ * exceeds the refquota on the source system.
+ *
+ * So that overage is the refquota_slack we use below.
+ */
if (origin_head->ds_quota != 0 &&
dsl_dataset_phys(clone)->ds_referenced_bytes >
- origin_head->ds_quota)
+ origin_head->ds_quota + refquota_slack)
return (SET_ERROR(EDQUOT));
return (0);
@@ -2745,8 +2909,13 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
int64_t unused_refres_delta;
ASSERT(clone->ds_reserved == 0);
+ /*
+ * NOTE: On DEBUG kernels there could be a race between this and
+ * the check function if spa_asize_inflation is adjusted...
+ */
ASSERT(origin_head->ds_quota == 0 ||
- dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota);
+ dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota +
+ DMU_MAX_ACCESS * spa_asize_inflation);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
/*
@@ -3391,6 +3560,23 @@ dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
}
+boolean_t
+dsl_dataset_is_zapified(dsl_dataset_t *ds)
+{
+ dmu_object_info_t doi;
+
+ dmu_object_info_from_db(ds->ds_dbuf, &doi);
+ return (doi.doi_type == DMU_OTN_ZAP_METADATA);
+}
+
+boolean_t
+dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_is_zapified(ds) &&
+ zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
#if defined(_LP64)
module_param(zfs_max_recordsize, int, 0644);
diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c
index 952422be2..eb39cff57 100644
--- a/module/zfs/dsl_deleg.c
+++ b/module/zfs/dsl_deleg.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
/*
@@ -330,7 +330,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
baseza = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
- source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP);
+ source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
@@ -370,7 +370,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
nvlist_free(sp_nvp);
}
- kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
+ kmem_free(source, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(baseza, sizeof (zap_attribute_t));
kmem_free(basezc, sizeof (zap_cursor_t));
kmem_free(za, sizeof (zap_attribute_t));
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index d7c34c9a4..716081ba3 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -978,9 +978,17 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
objset_t *os;
if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
- boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+ boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+
+ /*
+ * If the dataset is inconsistent because a resumable receive
+ * has failed, then do not destroy it.
+ */
+ if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
+ need_destroy = B_FALSE;
+
dmu_objset_rele(os, FTAG);
- if (inconsistent)
+ if (need_destroy)
(void) dsl_destroy_head(dsname);
}
return (0);
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 8983e0793..ae67b362e 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -299,13 +299,14 @@ dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
dmu_buf_rele(dd->dd_dbuf, tag);
}
-/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
+/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
void
dsl_dir_name(dsl_dir_t *dd, char *buf)
{
if (dd->dd_parent) {
dsl_dir_name(dd->dd_parent, buf);
- (void) strcat(buf, "/");
+ VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
+ ZFS_MAX_DATASET_NAME_LEN);
} else {
buf[0] = '\0';
}
@@ -315,10 +316,12 @@ dsl_dir_name(dsl_dir_t *dd, char *buf)
* dprintf_dd() with dd_lock held
*/
mutex_enter(&dd->dd_lock);
- (void) strcat(buf, dd->dd_myname);
+ VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
+ <, ZFS_MAX_DATASET_NAME_LEN);
mutex_exit(&dd->dd_lock);
} else {
- (void) strcat(buf, dd->dd_myname);
+ VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
+ <, ZFS_MAX_DATASET_NAME_LEN);
}
}
@@ -367,12 +370,12 @@ getcomponent(const char *path, char *component, const char **nextp)
if (p != NULL &&
(p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
return (SET_ERROR(EINVAL));
- if (strlen(path) >= MAXNAMELEN)
+ if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strcpy(component, path);
p = NULL;
} else if (p[0] == '/') {
- if (p - path >= MAXNAMELEN)
+ if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strncpy(component, path, p - path);
component[p - path] = '\0';
@@ -384,7 +387,7 @@ getcomponent(const char *path, char *component, const char **nextp)
*/
if (strchr(path, '/'))
return (SET_ERROR(EINVAL));
- if (p - path >= MAXNAMELEN)
+ if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strncpy(component, path, p - path);
component[p - path] = '\0';
@@ -412,7 +415,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t *dd;
uint64_t ddobj;
- buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
err = getcomponent(name, buf, &next);
if (err != 0)
goto error;
@@ -479,7 +482,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
*tailp = next;
*ddp = dd;
error:
- kmem_free(buf, MAXNAMELEN);
+ kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN);
return (err);
}
@@ -974,7 +977,7 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
@@ -1691,11 +1694,11 @@ static int
dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
int *deltap = arg;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds, namebuf);
- if (strlen(namebuf) + *deltap >= MAXNAMELEN)
+ if (strlen(namebuf) + *deltap >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
return (0);
}
diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c
index 361473275..66e899a57 100644
--- a/module/zfs/dsl_prop.c
+++ b/module/zfs/dsl_prop.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright 2015, Joyent, Inc.
*/
@@ -1095,7 +1095,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err = 0;
- char setpoint[MAXNAMELEN];
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 72163521e..6c5f1f0b5 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -1115,7 +1115,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
* rootbp's birth time is < cur_min_txg. Then we will
* add the next snapshots/clones to the work queue.
*/
- char *dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname);
zfs_dbgmsg("scanning dataset %llu (%s) is unnecessary because "
"cur_min_txg (%llu) >= max_txg (%llu)",
@@ -1146,7 +1146,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
- dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
+ dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname);
zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
"pausing=%u",
@@ -1154,7 +1154,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
(longlong_t)scn->scn_phys.scn_cur_min_txg,
(longlong_t)scn->scn_phys.scn_cur_max_txg,
(int)scn->scn_pausing);
- kmem_free(dsname, ZFS_MAXNAMELEN);
+ kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
if (scn->scn_pausing)
goto out;
diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c
index 1b234ed48..a6d1aa937 100644
--- a/module/zfs/dsl_userhold.c
+++ b/module/zfs/dsl_userhold.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -181,7 +181,7 @@ dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
}
typedef struct zfs_hold_cleanup_arg {
- char zhca_spaname[MAXNAMELEN];
+ char zhca_spaname[ZFS_MAX_DATASET_NAME_LEN];
uint64_t zhca_spa_load_guid;
nvlist_t *zhca_holds;
} zfs_hold_cleanup_arg_t;
@@ -580,7 +580,7 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
error = dsl_dataset_hold_obj_string(tmpdp,
nvpair_name(pair), FTAG, &ds);
if (error == 0) {
- char name[MAXNAMELEN];
+ char name[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds, name);
dsl_pool_config_exit(tmpdp, FTAG);
dsl_dataset_rele(ds, FTAG);
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index d1aefe585..26181af84 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -361,8 +361,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
break;
}
- strval = kmem_alloc(
- MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
+ strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
KM_SLEEP);
dsl_dataset_name(ds, strval);
dsl_dataset_rele(ds, FTAG);
@@ -375,8 +374,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, prop, strval, intval, src);
if (strval != NULL)
- kmem_free(strval,
- MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
+ kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
break;
@@ -2018,6 +2016,16 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
return (0);
}
+/* ARGSUSED */
+int
+verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
+{
+ if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN)
+ return (SET_ERROR(ENAMETOOLONG));
+
+ return (0);
+}
+
static int
spa_load_verify(spa_t *spa)
{
@@ -2032,6 +2040,14 @@ spa_load_verify(spa_t *spa)
if (policy.zrp_request & ZPOOL_NEVER_REWIND)
return (0);
+ dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
+ error = dmu_objset_find_dp(spa->spa_dsl_pool,
+ spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL,
+ DS_FIND_CHILDREN);
+ dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
+ if (error != 0)
+ return (error);
+
rio = zio_root(spa, NULL, &sle,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index 01aa4641e..cf6fc224a 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@@ -493,7 +493,7 @@ spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
va_list adx;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
nvlist_t *nvl = fnvlist_alloc();
ASSERT(tx != NULL);
@@ -512,7 +512,7 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
va_list adx;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
nvlist_t *nvl = fnvlist_alloc();
ASSERT(tx != NULL);
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index e47cfc878..d279d1828 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -749,12 +749,13 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid,
return (ENOENT);
cookie = spl_fstrans_mark();
- snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
while (error == 0) {
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
- error = dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
- snapname, &id, &pos, &case_conflict);
+ error = dmu_snapshot_list_next(zsb->z_os,
+ ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
+ &case_conflict);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto out;
@@ -767,7 +768,7 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid,
snprintf(full_path, path_len - 1, "%s/.zfs/snapshot/%s",
zsb->z_mntopts->z_mntpoint, snapname);
out:
- kmem_free(snapname, MAXNAMELEN);
+ kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
spl_fstrans_unmark(cookie);
return (error);
@@ -854,14 +855,14 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
ZFS_ENTER(zsb);
- to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
error = dmu_snapshot_realname(zsb->z_os, snm, real,
- MAXNAMELEN, NULL);
+ ZFS_MAX_DATASET_NAME_LEN, NULL);
if (error == 0) {
snm = real;
} else if (error != ENOTSUP) {
@@ -871,9 +872,11 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
dmu_objset_name(zsb->z_os, fsname);
- error = zfsctl_snapshot_name(ITOZSB(sdip), snm, MAXNAMELEN, from);
+ error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
+ ZFS_MAX_DATASET_NAME_LEN, from);
if (error == 0)
- error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, MAXNAMELEN, to);
+ error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
+ ZFS_MAX_DATASET_NAME_LEN, to);
if (error == 0)
error = zfs_secpolicy_rename_perms(from, to, cr);
if (error != 0)
@@ -903,10 +906,10 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
rw_exit(&zfs_snapshot_lock);
out:
- kmem_free(from, MAXNAMELEN);
- kmem_free(to, MAXNAMELEN);
- kmem_free(real, MAXNAMELEN);
- kmem_free(fsname, MAXNAMELEN);
+ kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
ZFS_EXIT(zsb);
@@ -929,12 +932,12 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
ZFS_ENTER(zsb);
- snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
error = dmu_snapshot_realname(zsb->z_os, name, real,
- MAXNAMELEN, NULL);
+ ZFS_MAX_DATASET_NAME_LEN, NULL);
if (error == 0) {
name = real;
} else if (error != ENOTSUP) {
@@ -942,7 +945,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
}
}
- error = zfsctl_snapshot_name(ITOZSB(dip), name, MAXNAMELEN, snapname);
+ error = zfsctl_snapshot_name(ITOZSB(dip), name,
+ ZFS_MAX_DATASET_NAME_LEN, snapname);
if (error == 0)
error = zfs_secpolicy_destroy_perms(snapname, cr);
if (error != 0)
@@ -952,8 +956,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
if ((error == 0) || (error == ENOENT))
error = dsl_destroy_snapshot(snapname, B_FALSE);
out:
- kmem_free(snapname, MAXNAMELEN);
- kmem_free(real, MAXNAMELEN);
+ kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
ZFS_EXIT(zsb);
@@ -975,7 +979,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
if (!zfs_admin_snapshot)
return (EACCES);
- dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
error = SET_ERROR(EILSEQ);
@@ -997,7 +1001,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
0, cr, NULL, NULL);
}
out:
- kmem_free(dsname, MAXNAMELEN);
+ kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
return (error);
}
@@ -1075,11 +1079,11 @@ zfsctl_snapshot_mount(struct path *path, int flags)
zsb = ITOZSB(ip);
ZFS_ENTER(zsb);
- full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
+ full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
error = zfsctl_snapshot_name(zsb, dname(dentry),
- MAXNAMELEN, full_name);
+ ZFS_MAX_DATASET_NAME_LEN, full_name);
if (error)
goto error;
@@ -1153,7 +1157,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
}
path_put(&spath);
error:
- kmem_free(full_name, MAXNAMELEN);
+ kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(full_path, MAXPATHLEN);
ZFS_EXIT(zsb);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 30338ac14..09f83a5cf 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 Martin Matuska
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
* Portions Copyright 2012 Pawel Jakub Dawidek <[email protected]>
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
@@ -603,7 +604,7 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
case ZFS_PROP_SNAPSHOT_LIMIT:
if (!INGLOBALZONE(curproc)) {
uint64_t zoned;
- char setpoint[MAXNAMELEN];
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
/*
* Unprivileged users are allowed to modify the
* limit on things *under* (ie. contained by)
@@ -845,7 +846,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
int
zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
{
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
int error;
if ((error = zfs_secpolicy_write_perms(from,
@@ -898,7 +899,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
if (error == 0) {
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_t *origin = NULL;
dsl_dir_t *dd;
dd = clone->ds_dir;
@@ -944,6 +945,13 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
ZFS_DELEG_PERM_CREATE, cr));
}
+/* ARGSUSED */
+static int
+zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+ return (zfs_secpolicy_recv(zc, innvl, cr));
+}
+
int
zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
{
@@ -1068,7 +1076,7 @@ zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
int error;
char *origin;
@@ -1211,7 +1219,7 @@ zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
- char fsname[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
error = dmu_fsname(nvpair_name(pair), fsname);
if (error != 0)
return (error);
@@ -1232,7 +1240,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
pair = nvlist_next_nvpair(innvl, pair)) {
- char fsname[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
error = dmu_fsname(nvpair_name(pair), fsname);
if (error != 0)
return (error);
@@ -2252,7 +2260,8 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
* A dataset name of maximum length cannot have any snapshots,
* so exit immediately.
*/
- if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
+ if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
+ ZFS_MAX_DATASET_NAME_LEN) {
dmu_objset_rele(os, FTAG);
return (SET_ERROR(ESRCH));
}
@@ -3040,7 +3049,7 @@ zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
boolean_t fuids_ok, sa_ok;
uint64_t zplver = ZPL_VERSION;
objset_t *os = NULL;
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
char *cp;
spa_t *spa;
uint64_t spa_vers;
@@ -3406,7 +3415,7 @@ zfs_destroy_unmount_origin(const char *fsname)
return;
ds = dmu_objset_ds(os);
if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
- char originname[MAXNAMELEN];
+ char originname[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds->ds_prev, originname);
dmu_objset_rele(os, FTAG);
(void) zfs_unmount_snap(originname);
@@ -3990,77 +3999,93 @@ next:
}
}
+/*
+ * Extract properties that cannot be set PRIOR to the receipt of a dataset.
+ * For example, refquota cannot be set until after the receipt of a dataset,
+ * because in replication streams, an older/earlier snapshot may exceed the
+ * refquota. We want to receive the older/earlier snapshot, but setting
+ * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
+ * the older/earlier snapshot from being received (with EDQUOT).
+ *
+ * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
+ *
+ * libzfs will need to be judicious handling errors encountered by props
+ * extracted by this function.
+ */
+static nvlist_t *
+extract_delay_props(nvlist_t *props)
+{
+ nvlist_t *delayprops;
+ nvpair_t *nvp, *tmp;
+ static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
+ int i;
+
+ VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
+ nvp = nvlist_next_nvpair(props, nvp)) {
+ /*
+ * strcmp() is safe because zfs_prop_to_name() always returns
+ * a bounded string.
+ */
+ for (i = 0; delayable[i] != 0; i++) {
+ if (strcmp(zfs_prop_to_name(delayable[i]),
+ nvpair_name(nvp)) == 0) {
+ break;
+ }
+ }
+ if (delayable[i] != 0) {
+ tmp = nvlist_prev_nvpair(props, nvp);
+ VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
+ VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
+ nvp = tmp;
+ }
+ }
+
+ if (nvlist_empty(delayprops)) {
+ nvlist_free(delayprops);
+ delayprops = NULL;
+ }
+ return (delayprops);
+}
+
#ifdef DEBUG
static boolean_t zfs_ioc_recv_inject_err;
#endif
/*
- * inputs:
- * zc_name name of containing filesystem
- * zc_nvlist_src{_size} nvlist of properties to apply
- * zc_value name of snapshot to create
- * zc_string name of clone origin (if DRR_FLAG_CLONE)
- * zc_cookie file descriptor to recv from
- * zc_begin_record the BEGIN record of the stream (not byteswapped)
- * zc_guid force flag
- * zc_cleanup_fd cleanup-on-exit file descriptor
- * zc_action_handle handle for this guid/ds mapping (or zero on first call)
- *
- * outputs:
- * zc_cookie number of bytes read
- * zc_nvlist_dst{_size} error for each unapplied received property
- * zc_obj zprop_errflags_t
- * zc_action_handle handle for this guid/ds mapping
+ * On failure the 'errors' nvlist may be allocated and will contain a
+ * descriptions of the failures. It's the callers responsibilty to free.
*/
static int
-zfs_ioc_recv(zfs_cmd_t *zc)
+zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin,
+ nvlist_t *props, boolean_t force, boolean_t resumable, int input_fd,
+ dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
+ uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
{
- file_t *fp;
dmu_recv_cookie_t drc;
- boolean_t force = (boolean_t)zc->zc_guid;
- int fd;
int error = 0;
int props_error = 0;
- nvlist_t *errors;
offset_t off;
- nvlist_t *props = NULL; /* sent properties */
+ nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
nvlist_t *origprops = NULL; /* existing properties */
- char *origin = NULL;
- char *tosnap;
- char tofs[ZFS_MAXNAMELEN];
boolean_t first_recvd_props = B_FALSE;
+ file_t *input_fp;
- if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
- strchr(zc->zc_value, '@') == NULL ||
- strchr(zc->zc_value, '%'))
- return (SET_ERROR(EINVAL));
-
- (void) strcpy(tofs, zc->zc_value);
- tosnap = strchr(tofs, '@');
- *tosnap++ = '\0';
-
- if (zc->zc_nvlist_src != 0 &&
- (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
- zc->zc_iflags, &props)) != 0)
- return (error);
-
- fd = zc->zc_cookie;
- fp = getf(fd);
- if (fp == NULL) {
- nvlist_free(props);
+ *errors = NULL;
+ input_fp = getf(input_fd);
+ if (input_fp == NULL)
return (SET_ERROR(EBADF));
- }
-
- VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- if (zc->zc_string[0])
- origin = zc->zc_string;
error = dmu_recv_begin(tofs, tosnap,
- &zc->zc_begin_record, force, origin, &drc);
+ begin_record, force, resumable, origin, &drc);
if (error != 0)
goto out;
+ *read_bytes = 0;
+ *errflags = 0;
+ *errors = fnvlist_alloc();
+
/*
* Set properties before we receive the stream so that they are applied
* to the new data. Note that we must call dmu_recv_stream() if
@@ -4090,14 +4115,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
if (!first_recvd_props)
props_reduce(props, origprops);
if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
- (void) nvlist_merge(errors, errlist, 0);
+ (void) nvlist_merge(*errors, errlist, 0);
nvlist_free(errlist);
if (clear_received_props(tofs, origprops,
first_recvd_props ? NULL : props) != 0)
- zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+ *errflags |= ZPROP_ERR_NOCLEAR;
} else {
- zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+ *errflags |= ZPROP_ERR_NOCLEAR;
}
}
@@ -4105,24 +4130,15 @@ zfs_ioc_recv(zfs_cmd_t *zc)
props_error = dsl_prop_set_hasrecvd(tofs);
if (props_error == 0) {
+ delayprops = extract_delay_props(props);
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
- props, errors);
+ props, *errors);
}
}
- if (zc->zc_nvlist_dst_size != 0 &&
- (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
- put_nvlist(zc, errors) != 0)) {
- /*
- * Caller made zc->zc_nvlist_dst less than the minimum expected
- * size or supplied an invalid address.
- */
- props_error = SET_ERROR(EINVAL);
- }
-
- off = fp->f_offset;
- error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
- &zc->zc_action_handle);
+ off = input_fp->f_offset;
+ error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
+ action_handle);
if (error == 0) {
zfs_sb_t *zsb = NULL;
@@ -4144,11 +4160,32 @@ zfs_ioc_recv(zfs_cmd_t *zc)
} else {
error = dmu_recv_end(&drc, NULL);
}
+
+ /* Set delayed properties now, after we're done receiving. */
+ if (delayprops != NULL && error == 0) {
+ (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
+ delayprops, *errors);
+ }
}
- zc->zc_cookie = off - fp->f_offset;
- if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
- fp->f_offset = off;
+ if (delayprops != NULL) {
+ /*
+ * Merge delayed props back in with initial props, in case
+ * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
+ * we have to make sure clear_received_props() includes
+ * the delayed properties).
+ *
+ * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
+ * using ASSERT() will be just like a VERIFY.
+ */
+ ASSERT(nvlist_merge(props, delayprops, 0) == 0);
+ nvlist_free(delayprops);
+ }
+
+
+ *read_bytes = off - input_fp->f_offset;
+ if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
+ input_fp->f_offset = off;
#ifdef DEBUG
if (zfs_ioc_recv_inject_err) {
@@ -4167,14 +4204,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* Since we may have left a $recvd value on the
* system, we can't clear the $hasrecvd flag.
*/
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ *errflags |= ZPROP_ERR_NORESTORE;
} else if (first_recvd_props) {
dsl_prop_unset_hasrecvd(tofs);
}
if (origprops == NULL && !drc.drc_newfs) {
/* We failed to stash the original properties. */
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ *errflags |= ZPROP_ERR_NORESTORE;
}
/*
@@ -4191,14 +4228,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* We stashed the original properties but failed to
* restore them.
*/
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ *errflags |= ZPROP_ERR_NORESTORE;
}
}
out:
- nvlist_free(props);
+ releasef(input_fd);
nvlist_free(origprops);
- nvlist_free(errors);
- releasef(fd);
if (error == 0)
error = props_error;
@@ -4208,6 +4243,176 @@ out:
/*
* inputs:
+ * zc_name name of containing filesystem (unused)
+ * zc_nvlist_src{_size} nvlist of properties to apply
+ * zc_value name of snapshot to create
+ * zc_string name of clone origin (if DRR_FLAG_CLONE)
+ * zc_cookie file descriptor to recv from
+ * zc_begin_record the BEGIN record of the stream (not byteswapped)
+ * zc_guid force flag
+ * zc_cleanup_fd cleanup-on-exit file descriptor
+ * zc_action_handle handle for this guid/ds mapping (or zero on first call)
+ *
+ * outputs:
+ * zc_cookie number of bytes read
+ * zc_obj zprop_errflags_t
+ * zc_action_handle handle for this guid/ds mapping
+ * zc_nvlist_dst{_size} error for each unapplied received property
+ */
+static int
+zfs_ioc_recv(zfs_cmd_t *zc)
+{
+ dmu_replay_record_t begin_record;
+ nvlist_t *errors = NULL;
+ nvlist_t *props = NULL;
+ char *origin = NULL;
+ char *tosnap;
+ char tofs[ZFS_MAX_DATASET_NAME_LEN];
+ int error = 0;
+
+ if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
+ strchr(zc->zc_value, '@') == NULL ||
+ strchr(zc->zc_value, '%'))
+ return (SET_ERROR(EINVAL));
+
+ (void) strcpy(tofs, zc->zc_value);
+ tosnap = strchr(tofs, '@');
+ *tosnap++ = '\0';
+
+ if (zc->zc_nvlist_src != 0 &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &props)) != 0)
+ return (error);
+
+ if (zc->zc_string[0])
+ origin = zc->zc_string;
+
+ begin_record.drr_type = DRR_BEGIN;
+ begin_record.drr_payloadlen = 0;
+ begin_record.drr_u.drr_begin = zc->zc_begin_record;
+
+ error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, zc->zc_guid,
+ B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd,
+ &zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors);
+ nvlist_free(props);
+
+ /*
+ * Now that all props, initial and delayed, are set, report the prop
+ * errors to the caller.
+ */
+ if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
+ (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
+ put_nvlist(zc, errors) != 0)) {
+ /*
+ * Caller made zc->zc_nvlist_dst less than the minimum expected
+ * size or supplied an invalid address.
+ */
+ error = SET_ERROR(EINVAL);
+ }
+
+ nvlist_free(errors);
+
+ return (error);
+}
+
+/*
+ * innvl: {
+ * "snapname" -> full name of the snapshot to create
+ * (optional) "props" -> properties to set (nvlist)
+ * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
+ * "begin_record" -> non-byteswapped dmu_replay_record_t
+ * "input_fd" -> file descriptor to read stream from (int32)
+ * (optional) "force" -> force flag (value ignored)
+ * (optional) "resumable" -> resumable flag (value ignored)
+ * (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
+ * (optional) "action_handle" -> handle for this guid/ds mapping
+ * }
+ *
+ * outnvl: {
+ * "read_bytes" -> number of bytes read
+ * "error_flags" -> zprop_errflags_t
+ * "action_handle" -> handle for this guid/ds mapping
+ * "errors" -> error for each unapplied received property (nvlist)
+ * }
+ */
+static int
+zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ dmu_replay_record_t *begin_record;
+ uint_t begin_record_size;
+ nvlist_t *errors = NULL;
+ nvlist_t *props = NULL;
+ char *snapname = NULL;
+ char *origin = NULL;
+ char *tosnap;
+ char tofs[ZFS_MAX_DATASET_NAME_LEN];
+ boolean_t force;
+ boolean_t resumable;
+ uint64_t action_handle = 0;
+ uint64_t read_bytes = 0;
+ uint64_t errflags = 0;
+ int input_fd = -1;
+ int cleanup_fd = -1;
+ int error;
+
+ error = nvlist_lookup_string(innvl, "snapname", &snapname);
+ if (error != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
+ strchr(snapname, '@') == NULL ||
+ strchr(snapname, '%'))
+ return (SET_ERROR(EINVAL));
+
+ (void) strcpy(tofs, snapname);
+ tosnap = strchr(tofs, '@');
+ *tosnap++ = '\0';
+
+ error = nvlist_lookup_string(innvl, "origin", &origin);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = nvlist_lookup_byte_array(innvl, "begin_record",
+ (uchar_t **) &begin_record, &begin_record_size);
+ if (error != 0 || begin_record_size != sizeof (*begin_record))
+ return (SET_ERROR(EINVAL));
+
+ error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
+ if (error != 0)
+ return (SET_ERROR(EINVAL));
+
+ force = nvlist_exists(innvl, "force");
+ resumable = nvlist_exists(innvl, "resumable");
+
+ error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = nvlist_lookup_nvlist(innvl, "props", &props);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, force,
+ resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
+ &errflags, &action_handle, &errors);
+
+ fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
+ fnvlist_add_uint64(outnvl, "error_flags", errflags);
+ fnvlist_add_uint64(outnvl, "action_handle", action_handle);
+ fnvlist_add_nvlist(outnvl, "errors", errors);
+
+ nvlist_free(errors);
+ nvlist_free(props);
+
+ return (error);
+}
+
+/*
+ * inputs:
* zc_name name of snapshot to send
* zc_cookie file descriptor to send stream to
* zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
@@ -5182,6 +5387,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
* indicates that blocks > 128KB are permitted
* (optional) "embedok" -> (value ignored)
* presence indicates DRR_WRITE_EMBEDDED records are permitted
+ * (optional) "resume_object" and "resume_offset" -> (uint64)
+ * if present, resume send stream from specified object and offset.
* }
*
* outnvl is unused
@@ -5197,6 +5404,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
file_t *fp;
boolean_t largeblockok;
boolean_t embedok;
+ uint64_t resumeobj = 0;
+ uint64_t resumeoff = 0;
error = nvlist_lookup_int32(innvl, "fd", &fd);
if (error != 0)
@@ -5207,12 +5416,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
+ (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+ (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
+
if ((fp = getf(fd)) == NULL)
return (SET_ERROR(EBADF));
off = fp->f_offset;
- error = dmu_send(snapname, fromname, embedok, largeblockok,
- fd, fp->f_vnode, &off);
+ error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
+ resumeobj, resumeoff, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
@@ -5470,6 +5682,10 @@ zfs_ioctl_init(void)
POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+ zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
+ zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index a72841c15..ef04b203d 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1020,7 +1020,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
statp->f_fsid.val[0] = (uint32_t)fsid;
statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
statp->f_type = ZFS_SUPER_MAGIC;
- statp->f_namelen = ZFS_MAXNAMELEN;
+ statp->f_namelen = MAXNAMELEN - 1;
/*
* We have all of 40 characters to stuff a string here.
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 988ffec29..863ccb930 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -2080,7 +2080,7 @@ typedef struct zil_replay_arg {
static int
zil_replay_error(zilog_t *zilog, lr_t *lr, int error)
{
- char name[MAXNAMELEN];
+ char name[ZFS_MAX_DATASET_NAME_LEN];
zilog->zl_replaying_seq--; /* didn't actually replay this one */
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index 089e3a1bc..8c75698e5 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -50,7 +50,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
int zfs_flags = 0;
zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
- if (dlen(dentry) > ZFS_MAXNAMELEN)
+ if (dlen(dentry) > ZFS_MAX_DATASET_NAME_LEN)
return (ERR_PTR(-ENAMETOOLONG));
crhold(cr);
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 9c89493ed..73277901f 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -61,7 +61,7 @@ unsigned long zvol_max_discard_blocks = 16384;
static kmutex_t zvol_state_lock;
static list_t zvol_state_list;
-static char *zvol_tag = "zvol_tag";
+void *zvol_tag = "zvol_tag";
/*
* The in-core state of each volume.