Merge branch 'illumos-2605'

Adds support for resuming interrupted zfs send streams and include all related send/recv bug fixes from upstream OpenZFS. Unlike the upstream implementation this branch does not change the existing ioctl interface. Instead a new ZFS_IOC_RECV_NEW ioctl was added to support resuming zfs send streams. This was done by applying the original upstream patch and then reverting the ioctl changes in a follow up patch. For this reason there are a handful on commits between the relevant patches on this branch which are not interoperable. This was done to make it easier to extract the new ZFS_IOC_RECV_NEW and submit it upstream. Signed-off-by: Brian Behlendorf <[email protected]> Closes #4742
author: Brian Behlendorf <[email protected]> 2016-06-29 11:26:30 -0700
committer: Brian Behlendorf <[email protected]> 2016-06-29 13:42:23 -0700
commit: 5c27b296055301f13103ca0aa98c2ded01dcd9a0 (patch)
tree: 4813f9cb944c53140de7a12710f0ca7b12e3ec27 /module
parent: 669cf0ab298dd66e512b37f6c4a42aee2d767b80 (diff)
parent: 0dab2e84fcecff2806287efacb7c6205f346f69d (diff)
22 files changed, 1301 insertions, 386 deletions
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c
index 2c2d01d5c..e76c5b8a5 100644
--- a/module/zcommon/zfs_fletcher.c
+++ b/module/zcommon/zfs_fletcher.c
@@ -334,7 +334,12 @@ fletcher_4_impl_get(void)
 void
 fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
 {
-	const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+	const fletcher_4_ops_t *ops;
+
+	if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
+		ops = fletcher_4_impl_get();
+	else
+		ops = &fletcher_4_scalar_ops;
 
 	ops->init(zcp);
 	ops->compute(buf, size, zcp);
@@ -345,7 +350,12 @@ fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
 void
 fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
 {
-	const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+	const fletcher_4_ops_t *ops;
+
+	if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
+		ops = fletcher_4_impl_get();
+	else
+		ops = &fletcher_4_scalar_ops;
 
 	ops->init(zcp);
 	ops->compute_byteswap(buf, size, zcp);
diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c
index ff724be58..b58071bed 100644
--- a/module/zcommon/zfs_namecheck.c
+++ b/module/zcommon/zfs_namecheck.c
@@ -69,7 +69,7 @@ zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
 {
 	const char *loc;
 
-	if (strlen(path) >= MAXNAMELEN) {
+	if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
 		if (why)
 			*why = NAME_ERR_TOOLONG;
 		return (-1);
@@ -140,27 +140,8 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
 
 	/*
 	 * Make sure the name is not too long.
-	 *
-	 * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
-	 * which is the same as MAXNAMELEN used in the kernel.
-	 * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
-	 * places using MAXNAMELEN.
-	 *
-	 * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
-	 * length is 20 bytes.  This 20 bytes is broken down as follows to
-	 * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
-	 * 18 bytes.  To ensure bytes are left for <dataset>[@snapshot] the
-	 * <pool> portition is futher limited to 9 bytes.  For 2.6.27 and
-	 * newer kernels this limit is set to MAXNAMELEN.
-	 *
-	 *   <pool>/<dataset> + <partition> + <newline>
-	 *   (18)             + (1)         + (1)
 	 */
-#ifdef HAVE_KOBJ_NAME_LEN
-	if (strlen(path) > 18) {
-#else
-	if (strlen(path) >= MAXNAMELEN) {
-#endif /* HAVE_KOBJ_NAME_LEN */
+	if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
 		if (why)
 			*why = NAME_ERR_TOOLONG;
 		return (-1);
@@ -289,7 +270,7 @@ mountpoint_namecheck(const char *path, namecheck_err_t *why)
 		while (*end != '/' && *end != '\0')
 			end++;
 
-		if (end - start >= MAXNAMELEN) {
+		if (end - start >= ZFS_MAX_DATASET_NAME_LEN) {
 			if (why)
 				*why = NAME_ERR_TOOLONG;
 			return (-1);
@@ -314,27 +295,8 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
 
 	/*
 	 * Make sure the name is not too long.
-	 *
-	 * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
-	 * which is the same as MAXNAMELEN used in the kernel.
-	 * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
-	 * places using MAXNAMELEN.
-	 *
-	 * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
-	 * length is 20 bytes.  This 20 bytes is broken down as follows to
-	 * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
-	 * 18 bytes.  To ensure bytes are left for <dataset>[@snapshot] the
-	 * <pool> portition is futher limited to 8 bytes.  For 2.6.27 and
-	 * newer kernels this limit is set to MAXNAMELEN.
-	 *
-	 *   <pool>/<dataset> + <partition> + <newline>
-	 *   (18)             + (1)         + (1)
 	 */
-#ifdef HAVE_KOBJ_NAME_LEN
-	if (strlen(pool) > 8) {
-#else
-	if (strlen(pool) >= MAXNAMELEN) {
-#endif /* HAVE_KOBJ_NAME_LEN */
+	if (strlen(pool) >= ZFS_MAX_DATASET_NAME_LEN) {
 		if (why)
 			*why = NAME_ERR_TOOLONG;
 		return (-1);
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 1dbeab084..1d68ca29e 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -375,6 +375,10 @@ zfs_prop_init(void)
 	zprop_register_string(ZFS_PROP_SELINUX_ROOTCONTEXT, "rootcontext",
 	    "none", PROP_DEFAULT, ZFS_TYPE_DATASET, "<selinux rootcontext>",
 	    "ROOTCONTEXT");
+	zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN,
+	    "receive_resume_token",
+	    NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "<string token>", "RESUMETOK");
 
 	/* readonly number properties */
 	zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index cdc897726..22ca84d96 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -405,6 +405,17 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	 * checksum/compression/copies.
 	 */
 	if (ds != NULL) {
+		boolean_t needlock = B_FALSE;
+
+		/*
+		 * Note: it's valid to open the objset if the dataset is
+		 * long-held, in which case the pool_config lock will not
+		 * be held.
+		 */
+		if (!dsl_pool_config_held(dmu_objset_pool(os))) {
+			needlock = B_TRUE;
+			dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+		}
 		err = dsl_prop_register(ds,
 		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 		    primary_cache_changed_cb, os);
@@ -461,6 +472,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 				    dnodesize_changed_cb, os);
 			}
 		}
+		if (needlock)
+			dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		if (err != 0) {
 			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
 			    &os->os_phys_buf));
@@ -520,6 +533,13 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 {
 	int err = 0;
 
+	/*
+	 * We shouldn't be doing anything with dsl_dataset_t's unless the
+	 * pool_config lock is held, or the dataset is long-held.
+	 */
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
+	    dsl_dataset_long_held(ds));
+
 	mutex_enter(&ds->ds_opening_lock);
 	if (ds->ds_objset == NULL) {
 		objset_t *os;
@@ -651,7 +671,7 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds, *newds;
-	char name[MAXNAMELEN];
+	char name[ZFS_MAX_DATASET_NAME_LEN];
 
 	ds = os->os_dsl_dataset;
 	VERIFY3P(ds, !=, NULL);
@@ -875,6 +895,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
 	if (strchr(doca->doca_name, '@') != NULL)
 		return (SET_ERROR(EINVAL));
 
+	if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN)
+		return (SET_ERROR(ENAMETOOLONG));
+
 	error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
 	if (error != 0)
 		return (error);
@@ -961,6 +984,9 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
 	if (strchr(doca->doca_clone, '@') != NULL)
 		return (SET_ERROR(EINVAL));
 
+	if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN)
+		return (SET_ERROR(ENAMETOOLONG));
+
 	error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
 	if (error != 0)
 		return (error);
@@ -1000,7 +1026,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
 	const char *tail;
 	dsl_dataset_t *origin, *ds;
 	uint64_t obj;
-	char namebuf[MAXNAMELEN];
+	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 
 	VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
 	VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
@@ -2027,7 +2053,7 @@ dmu_objset_get_user(objset_t *os)
 
 /*
  * Determine name of filesystem, given name of snapshot.
- * buf must be at least MAXNAMELEN bytes
+ * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes
  */
 int
 dmu_fsname(const char *snapname, char *buf)
@@ -2035,7 +2061,7 @@ dmu_fsname(const char *snapname, char *buf)
 	char *atp = strchr(snapname, '@');
 	if (atp == NULL)
 		return (SET_ERROR(EINVAL));
-	if (atp - snapname >= MAXNAMELEN)
+	if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN)
 		return (SET_ERROR(ENAMETOOLONG));
 	(void) strlcpy(buf, snapname, atp - snapname + 1);
 	return (0);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 901386a5a..80f7dc1aa 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -20,10 +20,11 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright 2014 HybridCluster. All rights reserved.
+ * Copyright 2016 RackTop Systems.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  */
 
@@ -62,14 +63,18 @@
 int zfs_send_corrupt_data = B_FALSE;
 int zfs_send_queue_length = 16 * 1024 * 1024;
 int zfs_recv_queue_length = 16 * 1024 * 1024;
+/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
+int zfs_send_set_freerecords_bit = B_TRUE;
 
 static char *dmu_recv_tag = "dmu_recv_tag";
-static const char *recv_clone_name = "%recv";
+const char *recv_clone_name = "%recv";
 
 #define	BP_SPAN(datablkszsec, indblkshift, level) \
 	(((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
 	(level) * (indblkshift - SPA_BLKPTRSHIFT)))
 
+static void byteswap_record(dmu_replay_record_t *drr);
+
 struct send_thread_arg {
 	bqueue_t	q;
 	dsl_dataset_t	*ds;		/* Dataset to traverse */
@@ -77,6 +82,7 @@ struct send_thread_arg {
 	int		flags;		/* flags to pass to traverse_dataset */
 	int		error_code;
 	boolean_t	cancel;
+	zbookmark_phys_t resume;
 };
 
 struct send_block_record {
@@ -99,8 +105,21 @@ dump_bytes_cb(void *arg)
 {
 	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
 	dmu_sendarg_t *dsp = dbi->dbi_dsp;
-	dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
+	dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os);
 	ssize_t resid; /* have to get resid to get detailed errno */
+
+	/*
+	 * The code does not rely on this (len being a multiple of 8).  We keep
+	 * this assertion because of the corresponding assertion in
+	 * receive_read().  Keeping this assertion ensures that we do not
+	 * inadvertently break backwards compatibility (causing the assertion
+	 * in receive_read() to trigger on old software).
+	 *
+	 * Removing the assertions could be rolled into a new feature that uses
+	 * data that isn't 8-byte aligned; if the assertions were removed, a
+	 * feature flag would have to be added.
+	 */
+
 	ASSERT0(dbi->dbi_len % 8);
 
 	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
@@ -169,6 +188,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
 	return (0);
 }
 
+/*
+ * Fill in the drr_free struct, or perform aggregation if the previous record is
+ * also a free record, and the two are adjacent.
+ *
+ * Note that we send free records even for a full send, because we want to be
+ * able to receive a full send as a clone, which requires a list of all the free
+ * and freeobject records that were generated on the source.
+ */
 static int
 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
     uint64_t length)
@@ -180,7 +207,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 	 * that the receiving system doesn't have any dbufs in the range
 	 * being freed.  This is always true because there is a one-record
 	 * constraint: we only send one WRITE record for any given
-	 * object+offset.  We know that the one-record constraint is
+	 * object,offset.  We know that the one-record constraint is
 	 * true because we always send data in increasing order by
 	 * object,offset.
 	 *
@@ -192,15 +219,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 	    (object == dsp->dsa_last_data_object &&
 	    offset > dsp->dsa_last_data_offset));
 
-	/*
-	 * If we are doing a non-incremental send, then there can't
-	 * be any data in the dataset we're receiving into.  Therefore
-	 * a free record would simply be a no-op.  Save space by not
-	 * sending it to begin with.
-	 */
-	if (!dsp->dsa_incremental)
-		return (0);
-
 	if (length != -1ULL && offset + length < offset)
 		length = -1ULL;
 
@@ -378,10 +396,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 {
 	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
 
-	/* See comment in dump_free(). */
-	if (!dsp->dsa_incremental)
-		return (0);
-
 	/*
 	 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
 	 * push it out, since free block aggregation can only be done for
@@ -428,6 +442,19 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
 {
 	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
 
+	if (object < dsp->dsa_resume_object) {
+		/*
+		 * Note: when resuming, we will visit all the dnodes in
+		 * the block of dnodes that we are resuming from.  In
+		 * this case it's unnecessary to send the dnodes prior to
+		 * the one we are resuming from.  We should be at most one
+		 * block's worth of dnodes behind the resume point.
+		 */
+		ASSERT3U(dsp->dsa_resume_object - object, <,
+		    1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT));
+		return (0);
+	}
+
 	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
 		return (dump_freeobjects(dsp, object, 1));
 
@@ -509,6 +536,9 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 	uint64_t record_size;
 	int err = 0;
 
+	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+	    zb->zb_object >= sta->resume.zb_object);
+
 	if (sta->cancel)
 		return (SET_ERROR(EINTR));
 
@@ -545,8 +575,10 @@ send_traverse_thread(void *arg)
 	struct send_block_record *data;
 
 	if (st_arg->ds != NULL) {
-		err = traverse_dataset(st_arg->ds, st_arg->fromtxg,
-		    st_arg->flags, send_cb, arg);
+		err = traverse_dataset_resume(st_arg->ds,
+		    st_arg->fromtxg, &st_arg->resume,
+		    st_arg->flags, send_cb, st_arg);
+
 		if (err != EINTR)
 			st_arg->error_code = err;
 	}
@@ -575,6 +607,9 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 
 	ASSERT3U(zb->zb_level, >=, 0);
 
+	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+	    zb->zb_object >= dsa->dsa_resume_object);
+
 	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
 	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
 		return (0);
@@ -637,6 +672,10 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 		uint64_t offset;
 
 		ASSERT0(zb->zb_level);
+		ASSERT(zb->zb_object > dsa->dsa_resume_object ||
+		    (zb->zb_object == dsa->dsa_resume_object &&
+		    zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
+
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0) {
@@ -697,8 +736,10 @@ get_next_record(bqueue_t *bq, struct send_block_record *data)
  */
 static int
 dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
-    zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok,
-    boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
+    zfs_bookmark_phys_t *ancestor_zb,
+    boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, int outfd,
+    uint64_t resumeobj, uint64_t resumeoff,
+    vnode_t *vp, offset_t *off)
 {
 	objset_t *os;
 	dmu_replay_record_t *drr;
@@ -707,6 +748,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	uint64_t fromtxg = 0;
 	uint64_t featureflags = 0;
 	struct send_thread_arg to_arg;
+	void *payload = NULL;
+	size_t payload_len = 0;
 	struct send_block_record *to_data;
 
 	err = dmu_objset_from_ds(to_ds, &os);
@@ -721,6 +764,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
 	    DMU_SUBSTREAM);
 
+	bzero(&to_arg, sizeof (to_arg));
+
 #ifdef _KERNEL
 	if (dmu_objset_type(os) == DMU_OST_ZFS) {
 		uint64_t version;
@@ -746,6 +791,10 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 			featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
 	}
 
+	if (resumeobj != 0 || resumeoff != 0) {
+		featureflags |= DMU_BACKUP_FEATURE_RESUMING;
+	}
+
 	DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
 	    featureflags);
 
@@ -757,6 +806,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
 	if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
+	if (zfs_send_set_freerecords_bit)
+		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
 
 	if (ancestor_zb != NULL) {
 		drr->drr_u.drr_begin.drr_fromguid =
@@ -779,8 +830,9 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	dsp->dsa_off = off;
 	dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
 	dsp->dsa_pending_op = PENDING_NONE;
-	dsp->dsa_incremental = (ancestor_zb != NULL);
 	dsp->dsa_featureflags = featureflags;
+	dsp->dsa_resume_object = resumeobj;
+	dsp->dsa_resume_offset = resumeoff;
 
 	mutex_enter(&to_ds->ds_sendstream_lock);
 	list_insert_head(&to_ds->ds_sendstreams, dsp);
@@ -789,7 +841,26 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	dsl_dataset_long_hold(to_ds, FTAG);
 	dsl_pool_rele(dp, tag);
 
-	if (dump_record(dsp, NULL, 0) != 0) {
+	if (resumeobj != 0 || resumeoff != 0) {
+		dmu_object_info_t to_doi;
+		nvlist_t *nvl;
+		err = dmu_object_info(os, resumeobj, &to_doi);
+		if (err != 0)
+			goto out;
+		SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
+		    resumeoff / to_doi.doi_data_block_size);
+
+		nvl = fnvlist_alloc();
+		fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+		fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+		payload = fnvlist_pack(nvl, &payload_len);
+		drr->drr_payloadlen = payload_len;
+		fnvlist_free(nvl);
+	}
+
+	err = dump_record(dsp, payload, payload_len);
+	fnvlist_pack_free(payload, payload_len);
+	if (err != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
@@ -899,19 +970,19 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
 		is_clone = (fromds->ds_dir != ds->ds_dir);
 		dsl_dataset_rele(fromds, FTAG);
 		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
-		    embedok, large_block_ok, outfd, vp, off);
+		    embedok, large_block_ok, outfd, 0, 0, vp, off);
 	} else {
 		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
-		    embedok, large_block_ok, outfd, vp, off);
+		    embedok, large_block_ok, outfd, 0, 0, vp, off);
 	}
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 int
-dmu_send(const char *tosnap, const char *fromsnap,
-    boolean_t embedok, boolean_t large_block_ok,
-    int outfd, vnode_t *vp, offset_t *off)
+dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+    boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
+    vnode_t *vp, offset_t *off)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
@@ -978,10 +1049,12 @@ dmu_send(const char *tosnap, const char *fromsnap,
 			return (err);
 		}
 		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
-		    embedok, large_block_ok, outfd, vp, off);
+		    embedok, large_block_ok,
+		    outfd, resumeobj, resumeoff, vp, off);
 	} else {
 		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
-		    embedok, large_block_ok, outfd, vp, off);
+		    embedok, large_block_ok,
+		    outfd, resumeobj, resumeoff, vp, off);
 	}
 	if (owned)
 		dsl_dataset_disown(ds, FTAG);
@@ -1221,6 +1294,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 
 	/* already checked */
 	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+	ASSERT(!(featureflags & DMU_BACKUP_FEATURE_RESUMING));
 
 	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 	    DMU_COMPOUNDSTREAM ||
@@ -1233,6 +1307,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 	    spa_version(dp->dp_spa) < SPA_VERSION_SA)
 		return (SET_ERROR(ENOTSUP));
 
+	if (drba->drba_cookie->drc_resumable &&
+	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EXTENSIBLE_DATASET))
+		return (SET_ERROR(ENOTSUP));
+
 	/*
 	 * The receiving code doesn't know how to translate a WRITE_EMBEDDED
 	 * record to a plan WRITE record, so the pool must have the
@@ -1269,7 +1347,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 		/* target fs already exists; recv into temp clone */
 
 		/* Can't recv a clone into an existing fs */
-		if (flags & DRR_FLAG_CLONE) {
+		if (flags & DRR_FLAG_CLONE || drba->drba_origin) {
 			dsl_dataset_rele(ds, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
@@ -1278,7 +1356,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 		dsl_dataset_rele(ds, FTAG);
 	} else if (error == ENOENT) {
 		/* target fs does not exist; must be a full backup or clone */
-		char buf[MAXNAMELEN];
+		char buf[ZFS_MAX_DATASET_NAME_LEN];
 
 		/*
 		 * If it's a non-clone incremental, we are missing the
@@ -1288,8 +1366,17 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 		    drba->drba_origin))
 			return (SET_ERROR(ENOENT));
 
+		/*
+		 * If we're receiving a full send as a clone, and it doesn't
+		 * contain all the necessary free records and freeobject
+		 * records, reject it.
+		 */
+		if (fromguid == 0 && drba->drba_origin &&
+		    !(flags & DRR_FLAG_FREERECORDS))
+			return (SET_ERROR(EINVAL));
+
 		/* Open the parent of tofs */
-		ASSERT3U(strlen(tofs), <, MAXNAMELEN);
+		ASSERT3U(strlen(tofs), <, sizeof (buf));
 		(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
 		error = dsl_dataset_hold(dp, buf, FTAG, &ds);
 		if (error != 0)
@@ -1327,7 +1414,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 				dsl_dataset_rele(ds, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
-			if (dsl_dataset_phys(origin)->ds_guid != fromguid) {
+			if (dsl_dataset_phys(origin)->ds_guid != fromguid &&
+			    fromguid != 0) {
 				dsl_dataset_rele(origin, FTAG);
 				dsl_dataset_rele(ds, FTAG);
 				return (SET_ERROR(ENODEV));
@@ -1345,15 +1433,16 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 {
 	dmu_recv_begin_arg_t *drba = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
+	objset_t *mos = dp->dp_meta_objset;
 	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 	const char *tofs = drba->drba_cookie->drc_tofs;
 	dsl_dataset_t *ds, *newds;
 	uint64_t dsobj;
 	int error;
-	uint64_t crflags;
+	uint64_t crflags = 0;
 
-	crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
-	    DS_FLAG_CI_DATASET : 0;
+	if (drrb->drr_flags & DRR_FLAG_CI_DATA)
+		crflags |= DS_FLAG_CI_DATASET;
 
 	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
 	if (error == 0) {
@@ -1391,6 +1480,32 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 	}
 	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
 
+	if (drba->drba_cookie->drc_resumable) {
+		uint64_t one = 1;
+		uint64_t zero = 0;
+
+		dsl_dataset_zapify(newds, tx);
+		if (drrb->drr_fromguid != 0) {
+			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_FROMGUID,
+			    8, 1, &drrb->drr_fromguid, tx));
+		}
+		VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TOGUID,
+		    8, 1, &drrb->drr_toguid, tx));
+		VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TONAME,
+		    1, strlen(drrb->drr_toname) + 1, drrb->drr_toname, tx));
+		VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OBJECT,
+		    8, 1, &one, tx));
+		VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OFFSET,
+		    8, 1, &zero, tx));
+		VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
+		    8, 1, &zero, tx));
+		if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+		    DMU_BACKUP_FEATURE_EMBED_DATA) {
+			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
+			    8, 1, &one, tx));
+		}
+	}
+
 	dmu_buf_will_dirty(newds->ds_dbuf, tx);
 	dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
 
@@ -1408,56 +1523,194 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 	spa_history_log_internal_ds(newds, "receive", tx, "");
 }
 
+static int
+dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
+{
+	dmu_recv_begin_arg_t *drba = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+	int error;
+	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	dsl_dataset_t *ds;
+	const char *tofs = drba->drba_cookie->drc_tofs;
+	uint64_t val;
+
+	/* 6 extra bytes for /%recv */
+	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+	/* already checked */
+	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+	ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING);
+
+	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+	    DMU_COMPOUNDSTREAM ||
+	    drrb->drr_type >= DMU_OST_NUMTYPES)
+		return (SET_ERROR(EINVAL));
+
+	/* Verify pool version supports SA if SA_SPILL feature set */
+	if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
+	    spa_version(dp->dp_spa) < SPA_VERSION_SA)
+		return (SET_ERROR(ENOTSUP));
+
+	/*
+	 * The receiving code doesn't know how to translate a WRITE_EMBEDDED
+	 * record to a plain WRITE record, so the pool must have the
+	 * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
+	 * records.  Same with WRITE_EMBEDDED records that use LZ4 compression.
+	 */
+	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
+	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
+		return (SET_ERROR(ENOTSUP));
+	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
+	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
+		return (SET_ERROR(ENOTSUP));
+
+	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
+	    tofs, recv_clone_name);
+
+	if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+		/* %recv does not exist; continue in tofs */
+		error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+		if (error != 0)
+			return (error);
+	}
+
+	/* check that ds is marked inconsistent */
+	if (!DS_IS_INCONSISTENT(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/* check that there is resuming data, and that the toguid matches */
+	if (!dsl_dataset_is_zapified(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+	error = zap_lookup(dp->dp_meta_objset, ds->ds_object,
+	    DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val);
+	if (error != 0 || drrb->drr_toguid != val) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Check if the receive is still running.  If so, it will be owned.
+	 * Note that nothing else can own the dataset (e.g. after the receive
+	 * fails) because it will be marked inconsistent.
+	 */
+	if (dsl_dataset_has_owner(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EBUSY));
+	}
+
+	/* There should not be any snapshots of this fs yet. */
+	if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Note: resume point will be checked when we process the first WRITE
+	 * record.
+	 */
+
+	/* check that the origin matches */
+	val = 0;
+	(void) zap_lookup(dp->dp_meta_objset, ds->ds_object,
+	    DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val);
+	if (drrb->drr_fromguid != val) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+
+	dsl_dataset_rele(ds, FTAG);
+	return (0);
+}
+
+static void
+dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
+{
+	dmu_recv_begin_arg_t *drba = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	const char *tofs = drba->drba_cookie->drc_tofs;
+	dsl_dataset_t *ds;
+	uint64_t dsobj;
+	/* 6 extra bytes for /%recv */
+	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
+	    tofs, recv_clone_name);
+
+	if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+		/* %recv does not exist; continue in tofs */
+		VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds));
+		drba->drba_cookie->drc_newfs = B_TRUE;
+	}
+
+	/* clear the inconsistent flag so that we can own it */
+	ASSERT(DS_IS_INCONSISTENT(ds));
+	dmu_buf_will_dirty(ds->ds_dbuf, tx);
+	dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+	dsobj = ds->ds_object;
+	dsl_dataset_rele(ds, FTAG);
+
+	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds));
+
+	dmu_buf_will_dirty(ds->ds_dbuf, tx);
+	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
+
+	ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
+
+	drba->drba_cookie->drc_ds = ds;
+
+	spa_history_log_internal_ds(ds, "resume receive", tx, "");
+}
+
 /*
  * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
  * succeeds; otherwise we will leak the holds on the datasets.
  */
 int
-dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
-    boolean_t force, char *origin, dmu_recv_cookie_t *drc)
+dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
+    boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc)
 {
 	dmu_recv_begin_arg_t drba = { 0 };
-	dmu_replay_record_t *drr;
 
 	bzero(drc, sizeof (dmu_recv_cookie_t));
-	drc->drc_drrb = drrb;
+	drc->drc_drr_begin = drr_begin;
+	drc->drc_drrb = &drr_begin->drr_u.drr_begin;
 	drc->drc_tosnap = tosnap;
 	drc->drc_tofs = tofs;
 	drc->drc_force = force;
+	drc->drc_resumable = resumable;
 	drc->drc_cred = CRED();
 
-	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
+	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
 		drc->drc_byteswap = B_TRUE;
-	else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
-		return (SET_ERROR(EINVAL));
-
-	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
-	drr->drr_type = DRR_BEGIN;
-	drr->drr_u.drr_begin = *drc->drc_drrb;
-	if (drc->drc_byteswap) {
-		fletcher_4_incremental_byteswap(drr,
+		fletcher_4_incremental_byteswap(drr_begin,
 		    sizeof (dmu_replay_record_t), &drc->drc_cksum);
-	} else {
-		fletcher_4_incremental_native(drr,
+		byteswap_record(drr_begin);
+	} else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) {
+		fletcher_4_incremental_native(drr_begin,
 		    sizeof (dmu_replay_record_t), &drc->drc_cksum);
-	}
-	kmem_free(drr, sizeof (dmu_replay_record_t));
-
-	if (drc->drc_byteswap) {
-		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
-		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
-		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
-		drrb->drr_type = BSWAP_32(drrb->drr_type);
-		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
-		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+	} else {
+		return (SET_ERROR(EINVAL));
 	}
 
 	drba.drba_origin = origin;
 	drba.drba_cookie = drc;
 	drba.drba_cred = CRED();
 
-	return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
-	    &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+	if (DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo) &
+	    DMU_BACKUP_FEATURE_RESUMING) {
+		return (dsl_sync_task(tofs,
+		    dmu_recv_resume_begin_check, dmu_recv_resume_begin_sync,
+		    &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+	} else  {
+		return (dsl_sync_task(tofs,
+		    dmu_recv_begin_check, dmu_recv_begin_sync,
+		    &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+	}
 }
 
 struct receive_record_arg {
@@ -1469,6 +1722,7 @@ struct receive_record_arg {
 	 */
 	arc_buf_t *write_buf;
 	int payload_size;
+	uint64_t bytes_read; /* bytes read from stream when record created */
 	boolean_t eos_marker; /* Marks the end of the stream */
 	bqueue_node_t node;
 };
@@ -1477,6 +1731,7 @@ struct receive_writer_arg {
 	objset_t *os;
 	boolean_t byteswap;
 	bqueue_t q;
+
 	/*
 	 * These three args are used to signal to the main thread that we're
 	 * done.
@@ -1484,15 +1739,34 @@ struct receive_writer_arg {
 	kmutex_t mutex;
 	kcondvar_t cv;
 	boolean_t done;
+
 	int err;
 	/* A map from guid to dataset to help handle dedup'd streams. */
 	avl_tree_t *guid_to_ds_map;
+	boolean_t resumable;
+	uint64_t last_object, last_offset;
+	uint64_t bytes_read; /* bytes read when current record created */
+};
+
+struct objlist {
+	list_t list; /* List of struct receive_objnode. */
+	/*
+	 * Last object looked up. Used to assert that objects are being looked
+	 * up in ascending order.
+	 */
+	uint64_t last_lookup;
+};
+
+struct receive_objnode {
+	list_node_t node;
+	uint64_t object;
 };
 
 struct receive_arg  {
 	objset_t *os;
 	vnode_t *vp; /* The vnode to read the stream from */
 	uint64_t voff; /* The current offset in the stream */
+	uint64_t bytes_read;
 	/*
 	 * A record that has had its payload read in, but hasn't yet been handed
 	 * off to the worker thread.
@@ -1505,12 +1779,7 @@ struct receive_arg  {
 	int err;
 	boolean_t byteswap;
 	/* Sorted list of objects not to issue prefetches for. */
-	list_t ignore_obj_list;
-};
-
-struct receive_ign_obj_node {
-	list_node_t node;
-	uint64_t object;
+	struct objlist ignore_objlist;
 };
 
 typedef struct guid_map_entry {
@@ -1553,7 +1822,10 @@ receive_read(struct receive_arg *ra, int len, void *buf)
 {
 	int done = 0;
 
-	/* some things will require 8-byte alignment, so everything must */
+	/*
+	 * The code doesn't rely on this (lengths being multiples of 8).  See
+	 * comment in dump_bytes.
+	 */
 	ASSERT0(len % 8);
 
 	while (done < len) {
@@ -1564,14 +1836,21 @@ receive_read(struct receive_arg *ra, int len, void *buf)
 		    ra->voff, UIO_SYSSPACE, FAPPEND,
 		    RLIM64_INFINITY, CRED(), &resid);
 
-		if (resid == len - done)
-			ra->err = SET_ERROR(EINVAL);
+		if (resid == len - done) {
+			/*
+			 * Note: ECKSUM indicates that the receive
+			 * was interrupted and can potentially be resumed.
+			 */
+			ra->err = SET_ERROR(ECKSUM);
+		}
 		ra->voff += len - done - resid;
 		done = len - resid;
 		if (ra->err != 0)
 			return (ra->err);
 	}
 
+	ra->bytes_read += len;
+
 	ASSERT3U(done, ==, len);
 	return (0);
 }
@@ -1675,6 +1954,43 @@ deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
 	}
 }
 
+static void
+save_resume_state(struct receive_writer_arg *rwa,
+    uint64_t object, uint64_t offset, dmu_tx_t *tx)
+{
+	int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;
+
+	if (!rwa->resumable)
+		return;
+
+	/*
+	 * We use ds_resume_bytes[] != 0 to indicate that we need to
+	 * update this on disk, so it must not be 0.
+	 */
+	ASSERT(rwa->bytes_read != 0);
+
+	/*
+	 * We only resume from write records, which have a valid
+	 * (non-meta-dnode) object number.
+	 */
+	ASSERT(object != 0);
+
+	/*
+	 * For resuming to work correctly, we must receive records in order,
+	 * sorted by object,offset.  This is checked by the callers, but
+	 * assert it here for good measure.
+	 */
+	ASSERT3U(object, >=, rwa->os->os_dsl_dataset->ds_resume_object[txgoff]);
+	ASSERT(object != rwa->os->os_dsl_dataset->ds_resume_object[txgoff] ||
+	    offset >= rwa->os->os_dsl_dataset->ds_resume_offset[txgoff]);
+	ASSERT3U(rwa->bytes_read, >=,
+	    rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff]);
+
+	rwa->os->os_dsl_dataset->ds_resume_object[txgoff] = object;
+	rwa->os->os_dsl_dataset->ds_resume_offset[txgoff] = offset;
+	rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
+}
+
 noinline static int
 receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	void *data)
@@ -1773,6 +2089,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		dmu_buf_rele(db, FTAG);
 	}
 	dmu_tx_commit(tx);
+
 	return (0);
 }
 
@@ -1782,13 +2099,14 @@ receive_freeobjects(struct receive_writer_arg *rwa,
     struct drr_freeobjects *drrfo)
 {
 	uint64_t obj;
+	int next_err = 0;
 
 	if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
 		return (SET_ERROR(EINVAL));
 
 	for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj;
-	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
-	    (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) {
+	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0;
+	    next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) {
 		dmu_object_info_t doi;
 		int err;
 
@@ -1804,7 +2122,8 @@ receive_freeobjects(struct receive_writer_arg *rwa,
 		if (err != 0)
 			return (err);
 	}
-
+	if (next_err != ESRCH)
+		return (next_err);
 	return (0);
 }
 
@@ -1820,6 +2139,18 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
 	    !DMU_OT_IS_VALID(drrw->drr_type))
 		return (SET_ERROR(EINVAL));
 
+	/*
+	 * For resuming to work, records must be in increasing order
+	 * by (object, offset).
+	 */
+	if (drrw->drr_object < rwa->last_object ||
+	    (drrw->drr_object == rwa->last_object &&
+	    drrw->drr_offset < rwa->last_offset)) {
+		return (SET_ERROR(EINVAL));
+	}
+	rwa->last_object = drrw->drr_object;
+	rwa->last_offset = drrw->drr_offset;
+
 	if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
@@ -1842,8 +2173,17 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
 	if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
 		return (SET_ERROR(EINVAL));
 	dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+
+	/*
+	 * Note: If the receive fails, we want the resume stream to start
+	 * with the same record that we last successfully received (as opposed
+	 * to the next record), so that we can verify that we are
+	 * resuming from the correct location.
+	 */
+	save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
 	dmu_tx_commit(tx);
 	dmu_buf_rele(bonus, FTAG);
+
 	return (0);
 }
 
@@ -1902,43 +2242,48 @@ receive_write_byref(struct receive_writer_arg *rwa,
 	dmu_write(rwa->os, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
 	dmu_buf_rele(dbp, FTAG);
+
+	/* See comment in restore_write. */
+	save_resume_state(rwa, drrwbr->drr_object, drrwbr->drr_offset, tx);
 	dmu_tx_commit(tx);
 	return (0);
 }
 
 static int
 receive_write_embedded(struct receive_writer_arg *rwa,
-    struct drr_write_embedded *drrwnp, void *data)
+    struct drr_write_embedded *drrwe, void *data)
 {
 	dmu_tx_t *tx;
 	int err;
 
-	if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
+	if (drrwe->drr_offset + drrwe->drr_length < drrwe->drr_offset)
 		return (EINVAL);
 
-	if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
+	if (drrwe->drr_psize > BPE_PAYLOAD_SIZE)
 		return (EINVAL);
 
-	if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
+	if (drrwe->drr_etype >= NUM_BP_EMBEDDED_TYPES)
 		return (EINVAL);
-	if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
+	if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
 		return (EINVAL);
 
 	tx = dmu_tx_create(rwa->os);
 
-	dmu_tx_hold_write(tx, drrwnp->drr_object,
-	    drrwnp->drr_offset, drrwnp->drr_length);
+	dmu_tx_hold_write(tx, drrwe->drr_object,
+	    drrwe->drr_offset, drrwe->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
-	dmu_write_embedded(rwa->os, drrwnp->drr_object,
-	    drrwnp->drr_offset, data, drrwnp->drr_etype,
-	    drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
+	dmu_write_embedded(rwa->os, drrwe->drr_object,
+	    drrwe->drr_offset, data, drrwe->drr_etype,
+	    drrwe->drr_compression, drrwe->drr_lsize, drrwe->drr_psize,
 	    rwa->byteswap ^ ZFS_HOST_BYTEORDER, tx);
 
+	/* See comment in restore_write. */
+	save_resume_state(rwa, drrwe->drr_object, drrwe->drr_offset, tx);
 	dmu_tx_commit(tx);
 	return (0);
 }
@@ -2012,10 +2357,16 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
 static void
 dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
 {
-	char name[MAXNAMELEN];
-	dsl_dataset_name(drc->drc_ds, name);
-	dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
-	(void) dsl_destroy_head(name);
+	if (drc->drc_resumable) {
+		/* wait for our resume state to be written to disk */
+		txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
+		dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+	} else {
+		char name[ZFS_MAX_DATASET_NAME_LEN];
+		dsl_dataset_name(drc->drc_ds, name);
+		dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+		(void) dsl_destroy_head(name);
+	}
 }
 
 static void
@@ -2044,12 +2395,17 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
 
 	if (len != 0) {
 		ASSERT3U(len, <=, SPA_MAXBLOCKSIZE);
-		ra->rrd->payload = buf;
-		ra->rrd->payload_size = len;
-		err = receive_read(ra, len, ra->rrd->payload);
+		err = receive_read(ra, len, buf);
 		if (err != 0)
 			return (err);
-		receive_cksum(ra, len, ra->rrd->payload);
+		receive_cksum(ra, len, buf);
+
+		/* note: rrd is NULL when reading the begin record's payload */
+		if (ra->rrd != NULL) {
+			ra->rrd->payload = buf;
+			ra->rrd->payload_size = len;
+			ra->rrd->bytes_read = ra->bytes_read;
+		}
 	}
 
 	ra->prev_cksum = ra->cksum;
@@ -2057,6 +2413,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
 	ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
 	err = receive_read(ra, sizeof (ra->next_rrd->header),
 	    &ra->next_rrd->header);
+	ra->next_rrd->bytes_read = ra->bytes_read;
 	if (err != 0) {
 		kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
 		ra->next_rrd = NULL;
@@ -2096,6 +2453,70 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
 	return (0);
 }
 
+static void
+objlist_create(struct objlist *list)
+{
+	list_create(&list->list, sizeof (struct receive_objnode),
+	    offsetof(struct receive_objnode, node));
+	list->last_lookup = 0;
+}
+
+static void
+objlist_destroy(struct objlist *list)
+{
+	struct receive_objnode *n;
+
+	for (n = list_remove_head(&list->list);
+	    n != NULL; n = list_remove_head(&list->list)) {
+		kmem_free(n, sizeof (*n));
+	}
+	list_destroy(&list->list);
+}
+
+/*
+ * This function looks through the objlist to see if the specified object number
+ * is contained in the objlist.  In the process, it will remove all object
+ * numbers in the list that are smaller than the specified object number.  Thus,
+ * any lookup of an object number smaller than a previously looked up object
+ * number will always return false; therefore, all lookups should be done in
+ * ascending order.
+ */
+static boolean_t
+objlist_exists(struct objlist *list, uint64_t object)
+{
+	struct receive_objnode *node = list_head(&list->list);
+	ASSERT3U(object, >=, list->last_lookup);
+	list->last_lookup = object;
+	while (node != NULL && node->object < object) {
+		VERIFY3P(node, ==, list_remove_head(&list->list));
+		kmem_free(node, sizeof (*node));
+		node = list_head(&list->list);
+	}
+	return (node != NULL && node->object == object);
+}
+
+/*
+ * The objlist is a list of object numbers stored in ascending order.  However,
+ * the insertion of new object numbers does not seek out the correct location to
+ * store a new object number; instead, it appends it to the list for simplicity.
+ * Thus, any users must take care to only insert new object numbers in ascending
+ * order.
+ */
+static void
+objlist_insert(struct objlist *list, uint64_t object)
+{
+	struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
+	node->object = object;
+#ifdef ZFS_DEBUG
+	{
+	struct receive_objnode *last_object = list_tail(&list->list);
+	uint64_t last_objnum = (last_object != NULL ? last_object->object : 0);
+	ASSERT3U(node->object, >, last_objnum);
+	}
+#endif
+	list_insert_tail(&list->list, node);
+}
+
 /*
  * Issue the prefetch reads for any necessary indirect blocks.
  *
@@ -2118,13 +2539,7 @@ static void
 receive_read_prefetch(struct receive_arg *ra,
     uint64_t object, uint64_t offset, uint64_t length)
 {
-	struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list);
-	while (node != NULL && node->object < object) {
-		VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list));
-		kmem_free(node, sizeof (*node));
-		node = list_head(&ra->ignore_obj_list);
-	}
-	if (node == NULL || node->object > object) {
+	if (!objlist_exists(&ra->ignore_objlist, object)) {
 		dmu_prefetch(ra->os, object, 1, offset, length,
 		    ZIO_PRIORITY_SYNC_READ);
 	}
@@ -2157,20 +2572,7 @@ receive_read_record(struct receive_arg *ra)
 		 */
 		if (err == ENOENT ||
 		    (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
-			struct receive_ign_obj_node *node =
-			    kmem_zalloc(sizeof (*node),
-			    KM_SLEEP);
-			node->object = drro->drr_object;
-#ifdef ZFS_DEBUG
-			{
-			struct receive_ign_obj_node *last_object =
-			    list_tail(&ra->ignore_obj_list);
-			uint64_t last_objnum = (last_object != NULL ?
-				last_object->object : 0);
-			ASSERT3U(node->object, >, last_objnum);
-			}
-#endif
-			list_insert_tail(&ra->ignore_obj_list, node);
+			objlist_insert(&ra->ignore_objlist, drro->drr_object);
 			err = 0;
 		}
 		return (err);
@@ -2236,7 +2638,7 @@ receive_read_record(struct receive_arg *ra)
 	{
 		struct drr_end *drre = &ra->rrd->header.drr_u.drr_end;
 		if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
-			return (SET_ERROR(EINVAL));
+			return (SET_ERROR(ECKSUM));
 		return (0);
 	}
 	case DRR_SPILL:
@@ -2263,6 +2665,10 @@ receive_process_record(struct receive_writer_arg *rwa,
 {
 	int err;
 
+	/* Processing in order, therefore bytes_read should be increasing. */
+	ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read);
+	rwa->bytes_read = rrd->bytes_read;
+
 	switch (rrd->header.drr_type) {
 	case DRR_OBJECT:
 	{
@@ -2357,6 +2763,32 @@ receive_writer_thread(void *arg)
 	mutex_exit(&rwa->mutex);
 }
 
+static int
+resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
+{
+	uint64_t val;
+	objset_t *mos = dmu_objset_pool(ra->os)->dp_meta_objset;
+	uint64_t dsobj = dmu_objset_id(ra->os);
+	uint64_t resume_obj, resume_off;
+
+	if (nvlist_lookup_uint64(begin_nvl,
+	    "resume_object", &resume_obj) != 0 ||
+	    nvlist_lookup_uint64(begin_nvl,
+	    "resume_offset", &resume_off) != 0) {
+		return (SET_ERROR(EINVAL));
+	}
+	VERIFY0(zap_lookup(mos, dsobj,
+	    DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val));
+	if (resume_obj != val)
+		return (SET_ERROR(EINVAL));
+	VERIFY0(zap_lookup(mos, dsobj,
+	    DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val));
+	if (resume_off != val)
+		return (SET_ERROR(EINVAL));
+
+	return (0);
+}
+
 /*
  * Read in the stream's records, one by one, and apply them to the pool.  There
  * are two threads involved; the thread that calls this function will spin up a
@@ -2377,7 +2809,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	struct receive_arg *ra;
 	struct receive_writer_arg *rwa;
 	int featureflags;
-	struct receive_ign_obj_node *n;
+	uint32_t payloadlen;
+	void *payload;
+	nvlist_t *begin_nvl = NULL;
 
 	ra = kmem_zalloc(sizeof (*ra), KM_SLEEP);
 	rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP);
@@ -2386,8 +2820,14 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	ra->cksum = drc->drc_cksum;
 	ra->vp = vp;
 	ra->voff = *voffp;
-	list_create(&ra->ignore_obj_list, sizeof (struct receive_ign_obj_node),
-		offsetof(struct receive_ign_obj_node, node));
+
+	if (dsl_dataset_is_zapified(drc->drc_ds)) {
+		(void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset,
+		    drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES,
+		    sizeof (ra->bytes_read), 1, &ra->bytes_read);
+	}
+
+	objlist_create(&ra->ignore_objlist);
 
 	/* these were verified in dmu_recv_begin */
 	ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
@@ -2438,9 +2878,29 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 		drc->drc_guid_to_ds_map = rwa->guid_to_ds_map;
 	}
 
-	err = receive_read_payload_and_next_header(ra, 0, NULL);
-	if (err)
+	payloadlen = drc->drc_drr_begin->drr_payloadlen;
+	payload = NULL;
+	if (payloadlen != 0)
+		payload = kmem_alloc(payloadlen, KM_SLEEP);
+
+	err = receive_read_payload_and_next_header(ra, payloadlen, payload);
+	if (err != 0) {
+		if (payloadlen != 0)
+			kmem_free(payload, payloadlen);
 		goto out;
+	}
+	if (payloadlen != 0) {
+		err = nvlist_unpack(payload, payloadlen, &begin_nvl, KM_SLEEP);
+		kmem_free(payload, payloadlen);
+		if (err != 0)
+			goto out;
+	}
+
+	if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+		err = resume_check(ra, begin_nvl);
+		if (err != 0)
+			goto out;
+	}
 
 	(void) bqueue_init(&rwa->q, zfs_recv_queue_length,
 	    offsetof(struct receive_record_arg, node));
@@ -2448,6 +2908,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL);
 	rwa->os = ra->os;
 	rwa->byteswap = drc->drc_byteswap;
+	rwa->resumable = drc->drc_resumable;
 
 	(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
 	    TS_RUN, minclsyspri);
@@ -2461,7 +2922,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	 * We can leave this loop in 3 ways:  First, if rwa->err is
 	 * non-zero.  In that case, the writer thread will free the rrd we just
 	 * pushed.  Second, if  we're interrupted; in that case, either it's the
-	 * first loop and ra->rrd was never allocated, or it's later, and ra.rrd
+	 * first loop and ra->rrd was never allocated, or it's later and ra->rrd
 	 * has been handed off to the writer thread who will free it.  Finally,
 	 * if receive_read_record fails or we're at the end of the stream, then
 	 * we free ra->rrd and exit.
@@ -2506,24 +2967,21 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 		err = rwa->err;
 
 out:
+	nvlist_free(begin_nvl);
 	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
 		zfs_onexit_fd_rele(cleanup_fd);
 
 	if (err != 0) {
 		/*
-		 * destroy what we created, so we don't leave it in the
-		 * inconsistent restoring state.
+		 * Clean up references. If receive is not resumable,
+		 * destroy what we created, so we don't leave it in
+		 * the inconsistent state.
 		 */
 		dmu_recv_cleanup_ds(drc);
 	}
 
 	*voffp = ra->voff;
-
-	for (n = list_remove_head(&ra->ignore_obj_list); n != NULL;
-	    n = list_remove_head(&ra->ignore_obj_list)) {
-		kmem_free(n, sizeof (*n));
-	}
-	list_destroy(&ra->ignore_obj_list);
+	objlist_destroy(&ra->ignore_objlist);
 	kmem_free(ra, sizeof (*ra));
 	kmem_free(rwa, sizeof (*rwa));
 	return (err);
@@ -2674,6 +3132,20 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
 
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+		if (dsl_dataset_has_resume_receive_state(ds)) {
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_FROMGUID, tx);
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_OBJECT, tx);
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_OFFSET, tx);
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_BYTES, tx);
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_TOGUID, tx);
+			(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+			    DS_FIELD_RESUME_TONAME, tx);
+		}
 	}
 	drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
 	zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
@@ -2722,16 +3194,13 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
 	int error;
 
 #ifdef _KERNEL
-	char *name;
-
 	/*
 	 * We will be destroying the ds; make sure its origin is unmounted if
 	 * necessary.
 	 */
-	name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	char name[ZFS_MAX_DATASET_NAME_LEN];
 	dsl_dataset_name(drc->drc_ds, name);
 	zfs_destroy_unmount_origin(name);
-	kmem_free(name, MAXNAMELEN);
 #endif
 
 	error = dsl_sync_task(drc->drc_tofs,
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index 44ba74181..0df12fac8 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -47,6 +47,7 @@ typedef struct prefetch_data {
 	int pd_flags;
 	boolean_t pd_cancel;
 	boolean_t pd_exited;
+	zbookmark_phys_t pd_resume;
 } prefetch_data_t;
 
 typedef struct traverse_data {
@@ -323,30 +324,29 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 		uint32_t flags = ARC_FLAG_WAIT;
 		int32_t i;
 		int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
-		dnode_phys_t *cdnp;
+		dnode_phys_t *child_dnp;
 
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err != 0)
 			goto post;
-		cdnp = buf->b_data;
+		child_dnp = buf->b_data;
 
-		for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
-			prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
-			    zb->zb_blkid * epb + i);
+		for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+			prefetch_dnode_metadata(td, &child_dnp[i],
+			    zb->zb_objset, zb->zb_blkid * epb + i);
 		}
 
 		/* recursively visitbp() blocks below this */
-		for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) {
-			err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
-			    zb->zb_blkid * epb + i);
+		for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+			err = traverse_dnode(td, &child_dnp[i],
+			    zb->zb_objset, zb->zb_blkid * epb + i);
 			if (err != 0)
 				break;
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		objset_phys_t *osp;
-		dnode_phys_t *mdnp, *gdnp, *udnp;
 
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
@@ -354,11 +354,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 			goto post;
 
 		osp = buf->b_data;
-		mdnp = &osp->os_meta_dnode;
-		gdnp = &osp->os_groupused_dnode;
-		udnp = &osp->os_userused_dnode;
-
-		prefetch_dnode_metadata(td, mdnp, zb->zb_objset,
+		prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
 		    DMU_META_DNODE_OBJECT);
 		/*
 		 * See the block comment above for the goal of this variable.
@@ -370,21 +366,21 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 			td->td_realloc_possible = B_FALSE;
 
 		if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
-			prefetch_dnode_metadata(td, gdnp, zb->zb_objset,
-			    DMU_GROUPUSED_OBJECT);
-			prefetch_dnode_metadata(td, udnp, zb->zb_objset,
-			    DMU_USERUSED_OBJECT);
+			prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
+			    zb->zb_objset, DMU_GROUPUSED_OBJECT);
+			prefetch_dnode_metadata(td, &osp->os_userused_dnode,
+			    zb->zb_objset, DMU_USERUSED_OBJECT);
 		}
 
-		err = traverse_dnode(td, mdnp, zb->zb_objset,
+		err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
 		    DMU_META_DNODE_OBJECT);
 		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
-			err = traverse_dnode(td, gdnp, zb->zb_objset,
-			    DMU_GROUPUSED_OBJECT);
+			err = traverse_dnode(td, &osp->os_groupused_dnode,
+			    zb->zb_objset, DMU_GROUPUSED_OBJECT);
 		}
 		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
-			err = traverse_dnode(td, udnp, zb->zb_objset,
-			    DMU_USERUSED_OBJECT);
+			err = traverse_dnode(td, &osp->os_userused_dnode,
+			    zb->zb_objset, DMU_USERUSED_OBJECT);
 		}
 	}
 
@@ -416,9 +412,15 @@ post:
 		 * Set the bookmark to the first level-0 block that we need
 		 * to visit.  This way, the resuming code does not need to
 		 * deal with resuming from indirect blocks.
+		 *
+		 * Note, if zb_level <= 0, dnp may be NULL, so we don't want
+		 * to dereference it.
 		 */
-		td->td_resume->zb_blkid = zb->zb_blkid <<
-		    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+		td->td_resume->zb_blkid = zb->zb_blkid;
+		if (zb->zb_level > 0) {
+			td->td_resume->zb_blkid <<= zb->zb_level *
+			    (dnp->dn_indblkshift - SPA_BLKPTRSHIFT);
+		}
 		td->td_paused = B_TRUE;
 	}
 
@@ -450,6 +452,10 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
 	int j, err = 0;
 	zbookmark_phys_t czb;
 
+	if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL &&
+	    object < td->td_resume->zb_object)
+		return (0);
+
 	if (td->td_flags & TRAVERSE_PRE) {
 		SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
 		    ZB_DNODE_BLKID);
@@ -527,6 +533,7 @@ traverse_prefetch_thread(void *arg)
 	td.td_func = traverse_prefetcher;
 	td.td_arg = td_main->td_pfd;
 	td.td_pfd = NULL;
+	td.td_resume = &td_main->td_pfd->pd_resume;
 
 	SET_BOOKMARK(&czb, td.td_objset,
 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
@@ -556,12 +563,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 	ASSERT(ds == NULL || objset == ds->ds_object);
 	ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
 
-	/*
-	 * The data prefetching mechanism (the prefetch thread) is incompatible
-	 * with resuming from a bookmark.
-	 */
-	ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
-
 	td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP);
 	pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP);
 	czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP);
@@ -586,6 +587,8 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 	}
 
 	pd->pd_flags = flags;
+	if (resume != NULL)
+		pd->pd_resume = *resume;
 	mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
 
@@ -638,11 +641,19 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
  * in syncing context).
  */
 int
-traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
-    blkptr_cb_t func, void *arg)
+traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start,
+    zbookmark_phys_t *resume,
+    int flags, blkptr_cb_t func, void *arg)
 {
 	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
-	    &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg));
+	    &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg));
+}
+
+int
+traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start,
+    int flags, blkptr_cb_t func, void *arg)
+{
+	return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg));
 }
 
 int
@@ -675,7 +686,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
 
 	/* visit each dataset */
 	for (obj = 1; err == 0;
-	    err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
+	    err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) {
 		dmu_object_info_t doi;
 
 		err = dmu_object_info(mos, obj, &doi);
diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c
index 447a3a2dc..5a7f034ce 100644
--- a/module/zfs/dsl_bookmark.c
+++ b/module/zfs/dsl_bookmark.c
@@ -34,10 +34,10 @@ static int
 dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
     dsl_dataset_t **dsp, void *tag, char **shortnamep)
 {
-	char buf[MAXNAMELEN];
+	char buf[ZFS_MAX_DATASET_NAME_LEN];
 	char *hashp;
 
-	if (strlen(fullname) >= MAXNAMELEN)
+	if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN)
 		return (SET_ERROR(ENAMETOOLONG));
 	hashp = strchr(fullname, '#');
 	if (hashp == NULL)
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 9c275b234..5b7de74dc 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -25,6 +25,7 @@
  * Copyright (c) 2014 RackTop Systems.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
  */
 
 #include <sys/dmu_objset.h>
@@ -52,6 +53,9 @@
 #include <sys/dsl_userhold.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/policy.h>
+#include <sys/dmu_send.h>
+#include <sys/zio_compress.h>
+#include <zfs_fletcher.h>
 
 /*
  * The SPA supports block sizes up to 16MB.  However, very large blocks
@@ -75,6 +79,8 @@ int zfs_max_recordsize = 1 * 1024 * 1024;
 
 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
 
+extern int spa_asize_inflation;
+
 /*
  * Figure out how much of this delta should be propogated to the dsl_dir
  * layer.  If there's a refreservation, that space has already been
@@ -664,22 +670,38 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name)
 		dsl_dir_name(ds->ds_dir, name);
 		VERIFY0(dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
-			(void) strcat(name, "@");
+			VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN),
+			    <, ZFS_MAX_DATASET_NAME_LEN);
 			/*
 			 * We use a "recursive" mutex so that we
 			 * can call dprintf_ds() with ds_lock held.
 			 */
 			if (!MUTEX_HELD(&ds->ds_lock)) {
 				mutex_enter(&ds->ds_lock);
-				(void) strcat(name, ds->ds_snapname);
+				VERIFY3U(strlcat(name, ds->ds_snapname,
+				    ZFS_MAX_DATASET_NAME_LEN), <,
+				    ZFS_MAX_DATASET_NAME_LEN);
 				mutex_exit(&ds->ds_lock);
 			} else {
-				(void) strcat(name, ds->ds_snapname);
+				VERIFY3U(strlcat(name, ds->ds_snapname,
+				    ZFS_MAX_DATASET_NAME_LEN), <,
+				    ZFS_MAX_DATASET_NAME_LEN);
 			}
 		}
 	}
 }
 
+int
+dsl_dataset_namelen(dsl_dataset_t *ds)
+{
+	int len;
+	VERIFY0(dsl_dataset_get_snapname(ds));
+	mutex_enter(&ds->ds_lock);
+	len = dsl_dir_namelen(ds->ds_dir) + 1 + strlen(ds->ds_snapname);
+	mutex_exit(&ds->ds_lock);
+	return (len);
+}
+
 void
 dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 {
@@ -704,6 +726,7 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
 {
 	boolean_t gotit = FALSE;
 
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
 		ds->ds_owner = tag;
@@ -714,6 +737,16 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
 	return (gotit);
 }
 
+boolean_t
+dsl_dataset_has_owner(dsl_dataset_t *ds)
+{
+	boolean_t rv;
+	mutex_enter(&ds->ds_lock);
+	rv = (ds->ds_owner != NULL);
+	mutex_exit(&ds->ds_lock);
+	return (rv);
+}
+
 static void
 dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
 {
@@ -1238,10 +1271,10 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
 		int error = 0;
 		dsl_dataset_t *ds;
 		char *name, *atp;
-		char dsname[MAXNAMELEN];
+		char dsname[ZFS_MAX_DATASET_NAME_LEN];
 
 		name = nvpair_name(pair);
-		if (strlen(name) >= MAXNAMELEN)
+		if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN)
 			error = SET_ERROR(ENAMETOOLONG);
 		if (error == 0) {
 			atp = strchr(name, '@');
@@ -1414,7 +1447,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
 	    pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
 		dsl_dataset_t *ds;
 		char *name, *atp;
-		char dsname[MAXNAMELEN];
+		char dsname[ZFS_MAX_DATASET_NAME_LEN];
 
 		name = nvpair_name(pair);
 		atp = strchr(name, '@');
@@ -1461,7 +1494,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
 		suspended = fnvlist_alloc();
 		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 		    pair = nvlist_next_nvpair(snaps, pair)) {
-			char fsname[MAXNAMELEN];
+			char fsname[ZFS_MAX_DATASET_NAME_LEN];
 			char *snapname = nvpair_name(pair);
 			char *atp;
 			void *cookie;
@@ -1615,6 +1648,21 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;
 
+	if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {
+		VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+		    ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,
+		    &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));
+		VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+		    ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,
+		    &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));
+		VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+		    ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,
+		    &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));
+		ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;
+		ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;
+		ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
+	}
+
 	dmu_objset_sync(ds->ds_objset, zio, tx);
 
 	for (f = 0; f < SPA_FEATURES; f++) {
@@ -1655,7 +1703,7 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
-		char buf[ZFS_MAXNAMELEN];
+		char buf[ZFS_MAX_DATASET_NAME_LEN];
 		VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 		    za.za_first_integer, FTAG, &clone));
 		dsl_dir_name(clone->ds_dir, buf);
@@ -1670,6 +1718,78 @@ fail:
 	nvlist_free(propval);
 }
 
+static void
+get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+	if (dsl_dataset_has_resume_receive_state(ds)) {
+		char *str;
+		void *packed;
+		uint8_t *compressed;
+		uint64_t val;
+		nvlist_t *token_nv = fnvlist_alloc();
+		size_t packed_size, compressed_size;
+		zio_cksum_t cksum;
+		char *propval;
+		char buf[MAXNAMELEN];
+		int i;
+
+		if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {
+			fnvlist_add_uint64(token_nv, "fromguid", val);
+		}
+		if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {
+			fnvlist_add_uint64(token_nv, "object", val);
+		}
+		if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {
+			fnvlist_add_uint64(token_nv, "offset", val);
+		}
+		if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {
+			fnvlist_add_uint64(token_nv, "bytes", val);
+		}
+		if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {
+			fnvlist_add_uint64(token_nv, "toguid", val);
+		}
+		if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
+			fnvlist_add_string(token_nv, "toname", buf);
+		}
+		if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_EMBEDOK) == 0) {
+			fnvlist_add_boolean(token_nv, "embedok");
+		}
+		packed = fnvlist_pack(token_nv, &packed_size);
+		fnvlist_free(token_nv);
+		compressed = kmem_alloc(packed_size, KM_SLEEP);
+
+		compressed_size = gzip_compress(packed, compressed,
+		    packed_size, packed_size, 6);
+
+		fletcher_4_native(compressed, compressed_size, &cksum);
+
+		str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP);
+		for (i = 0; i < compressed_size; i++) {
+			(void) sprintf(str + i * 2, "%02x", compressed[i]);
+		}
+		str[compressed_size * 2] = '\0';
+		propval = kmem_asprintf("%u-%llx-%llx-%s",
+		    ZFS_SEND_RESUME_TOKEN_VERSION,
+		    (longlong_t)cksum.zc_word[0],
+		    (longlong_t)packed_size, str);
+		dsl_prop_nvlist_add_string(nv,
+		    ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
+		kmem_free(packed, packed_size);
+		kmem_free(str, compressed_size * 2 + 1);
+		kmem_free(compressed, packed_size);
+		strfree(propval);
+	}
+}
+
 void
 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
@@ -1693,7 +1813,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 		get_clones_stat(ds, nv);
 	} else {
 		if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
-			char buf[MAXNAMELEN];
+			char buf[ZFS_MAX_DATASET_NAME_LEN];
 			dsl_dataset_name(ds->ds_prev, buf);
 			dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
 		}
@@ -1743,6 +1863,32 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 		}
 	}
 
+	if (!dsl_dataset_is_snapshot(ds)) {
+		/* 6 extra bytes for /%recv */
+		char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+		dsl_dataset_t *recv_ds;
+
+		/*
+		 * A failed "newfs" (e.g. full) resumable receive leaves
+		 * the stats set on this dataset.  Check here for the prop.
+		 */
+		get_receive_resume_stats(ds, nv);
+
+		/*
+		 * A failed incremental resumable receive leaves the
+		 * stats set on our child named "%recv".  Check the child
+		 * for the prop.
+		 */
+		dsl_dataset_name(ds, recvname);
+		if (strlcat(recvname, "/", sizeof (recvname)) <
+		    sizeof (recvname) &&
+		    strlcat(recvname, recv_clone_name, sizeof (recvname)) <
+		    sizeof (recvname) &&
+		    dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) {
+			get_receive_resume_stats(recv_ds, nv);
+			dsl_dataset_rele(recv_ds, FTAG);
+		}
+	}
 }
 
 void
@@ -1863,7 +2009,7 @@ dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
 
 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
 	if (dsl_dir_namelen(hds->ds_dir) + 1 +
-	    strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
+	    strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN)
 		error = SET_ERROR(ENAMETOOLONG);
 
 	return (error);
@@ -1970,7 +2116,8 @@ dsl_dataset_rename_snapshot(const char *fsname,
  * only one long hold on the dataset.  We're not allowed to change anything here
  * so we don't permanently release the long hold or regular hold here.  We want
  * to do this only when syncing to avoid the dataset unexpectedly going away
- * when we release the long hold.
+ * when we release the long hold.  Allow a long hold to exist for volumes, this
+ * may occur when asynchronously registering the minor with the kernel.
  */
 static int
 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
@@ -1985,7 +2132,7 @@ dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
 		dsl_dataset_long_rele(ds, owner);
 	}
 
-	held = dsl_dataset_long_held(ds);
+	held = (dsl_dataset_long_held(ds) && (ds->ds_owner != zvol_tag));
 
 	if (owner != NULL)
 		dsl_dataset_long_hold(ds, owner);
@@ -2095,7 +2242,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds, *clone;
 	uint64_t cloneobj;
-	char namebuf[ZFS_MAXNAMELEN];
+	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 
 	VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
 
@@ -2648,7 +2795,7 @@ promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
  * Promote a clone.
  *
  * If it fails due to a conflicting snapshot name, "conflsnap" will be filled
- * in with the name.  (It must be at least MAXNAMELEN bytes long.)
+ * in with the name.  (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.)
  */
 int
 dsl_dataset_promote(const char *name, char *conflsnap)
@@ -2685,6 +2832,11 @@ int
 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
     dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
 {
+	/*
+	 * "slack" factor for received datasets with refquota set on them.
+	 * See the bottom of this function for details on its use.
+	 */
+	uint64_t refquota_slack = DMU_MAX_ACCESS * spa_asize_inflation;
 	int64_t unused_refres_delta;
 
 	/* they should both be heads */
@@ -2727,10 +2879,22 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
 	    dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
 		return (SET_ERROR(ENOSPC));
 
-	/* clone can't be over the head's refquota */
+	/*
+	 * The clone can't be too much over the head's refquota.
+	 *
+	 * To ensure that the entire refquota can be used, we allow one
+	 * transaction to exceed the the refquota.  Therefore, this check
+	 * needs to also allow for the space referenced to be more than the
+	 * refquota.  The maximum amount of space that one transaction can use
+	 * on disk is DMU_MAX_ACCESS * spa_asize_inflation.  Allowing this
+	 * overage ensures that we are able to receive a filesystem that
+	 * exceeds the refquota on the source system.
+	 *
+	 * So that overage is the refquota_slack we use below.
+	 */
 	if (origin_head->ds_quota != 0 &&
 	    dsl_dataset_phys(clone)->ds_referenced_bytes >
-	    origin_head->ds_quota)
+	    origin_head->ds_quota + refquota_slack)
 		return (SET_ERROR(EDQUOT));
 
 	return (0);
@@ -2745,8 +2909,13 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
 	int64_t unused_refres_delta;
 
 	ASSERT(clone->ds_reserved == 0);
+	/*
+	 * NOTE: On DEBUG kernels there could be a race between this and
+	 * the check function if spa_asize_inflation is adjusted...
+	 */
 	ASSERT(origin_head->ds_quota == 0 ||
-	    dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota);
+	    dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota +
+	    DMU_MAX_ACCESS * spa_asize_inflation);
 	ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
 
 	/*
@@ -3391,6 +3560,23 @@ dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
 	dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
 }
 
+boolean_t
+dsl_dataset_is_zapified(dsl_dataset_t *ds)
+{
+	dmu_object_info_t doi;
+
+	dmu_object_info_from_db(ds->ds_dbuf, &doi);
+	return (doi.doi_type == DMU_OTN_ZAP_METADATA);
+}
+
+boolean_t
+dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
+{
+	return (dsl_dataset_is_zapified(ds) &&
+	    zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,
+	    ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
+}
+
 #if defined(_KERNEL) && defined(HAVE_SPL)
 #if defined(_LP64)
 module_param(zfs_max_recordsize, int, 0644);
diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c
index 952422be2..eb39cff57 100644
--- a/module/zfs/dsl_deleg.c
+++ b/module/zfs/dsl_deleg.c
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  */
 
 /*
@@ -330,7 +330,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
 	za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 	basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
 	baseza = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
-	source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP);
+	source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
@@ -370,7 +370,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
 		nvlist_free(sp_nvp);
 	}
 
-	kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
+	kmem_free(source, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(baseza, sizeof (zap_attribute_t));
 	kmem_free(basezc, sizeof (zap_cursor_t));
 	kmem_free(za, sizeof (zap_attribute_t));
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index d7c34c9a4..716081ba3 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -978,9 +978,17 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
 	objset_t *os;
 
 	if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
-		boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+		boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+
+		/*
+		 * If the dataset is inconsistent because a resumable receive
+		 * has failed, then do not destroy it.
+		 */
+		if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
+			need_destroy = B_FALSE;
+
 		dmu_objset_rele(os, FTAG);
-		if (inconsistent)
+		if (need_destroy)
 			(void) dsl_destroy_head(dsname);
 	}
 	return (0);
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 8983e0793..ae67b362e 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -299,13 +299,14 @@ dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
 	dmu_buf_rele(dd->dd_dbuf, tag);
 }
 
-/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
+/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
 void
 dsl_dir_name(dsl_dir_t *dd, char *buf)
 {
 	if (dd->dd_parent) {
 		dsl_dir_name(dd->dd_parent, buf);
-		(void) strcat(buf, "/");
+		VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
+		    ZFS_MAX_DATASET_NAME_LEN);
 	} else {
 		buf[0] = '\0';
 	}
@@ -315,10 +316,12 @@ dsl_dir_name(dsl_dir_t *dd, char *buf)
 		 * dprintf_dd() with dd_lock held
 		 */
 		mutex_enter(&dd->dd_lock);
-		(void) strcat(buf, dd->dd_myname);
+		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
+		    <, ZFS_MAX_DATASET_NAME_LEN);
 		mutex_exit(&dd->dd_lock);
 	} else {
-		(void) strcat(buf, dd->dd_myname);
+		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
+		    <, ZFS_MAX_DATASET_NAME_LEN);
 	}
 }
 
@@ -367,12 +370,12 @@ getcomponent(const char *path, char *component, const char **nextp)
 		if (p != NULL &&
 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 			return (SET_ERROR(EINVAL));
-		if (strlen(path) >= MAXNAMELEN)
+		if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
 			return (SET_ERROR(ENAMETOOLONG));
 		(void) strcpy(component, path);
 		p = NULL;
 	} else if (p[0] == '/') {
-		if (p - path >= MAXNAMELEN)
+		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
 			return (SET_ERROR(ENAMETOOLONG));
 		(void) strncpy(component, path, p - path);
 		component[p - path] = '\0';
@@ -384,7 +387,7 @@ getcomponent(const char *path, char *component, const char **nextp)
 		 */
 		if (strchr(path, '/'))
 			return (SET_ERROR(EINVAL));
-		if (p - path >= MAXNAMELEN)
+		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
 			return (SET_ERROR(ENAMETOOLONG));
 		(void) strncpy(component, path, p - path);
 		component[p - path] = '\0';
@@ -412,7 +415,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
 	dsl_dir_t *dd;
 	uint64_t ddobj;
 
-	buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	err = getcomponent(name, buf, &next);
 	if (err != 0)
 		goto error;
@@ -479,7 +482,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
 		*tailp = next;
 	*ddp = dd;
 error:
-	kmem_free(buf, MAXNAMELEN);
+	kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN);
 	return (err);
 }
 
@@ -974,7 +977,7 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 
 	if (dsl_dir_is_clone(dd)) {
 		dsl_dataset_t *ds;
-		char buf[MAXNAMELEN];
+		char buf[ZFS_MAX_DATASET_NAME_LEN];
 
 		VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
@@ -1691,11 +1694,11 @@ static int
 dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	int *deltap = arg;
-	char namebuf[MAXNAMELEN];
+	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 
 	dsl_dataset_name(ds, namebuf);
 
-	if (strlen(namebuf) + *deltap >= MAXNAMELEN)
+	if (strlen(namebuf) + *deltap >= ZFS_MAX_DATASET_NAME_LEN)
 		return (SET_ERROR(ENAMETOOLONG));
 	return (0);
 }
diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c
index 361473275..66e899a57 100644
--- a/module/zfs/dsl_prop.c
+++ b/module/zfs/dsl_prop.c
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
  * Copyright 2015, Joyent, Inc.
  */
@@ -1095,7 +1095,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err = 0;
-	char setpoint[MAXNAMELEN];
+	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 72163521e..6c5f1f0b5 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -1115,7 +1115,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
 		 * rootbp's birth time is < cur_min_txg.  Then we will
 		 * add the next snapshots/clones to the work queue.
 		 */
-		char *dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+		char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 		dsl_dataset_name(ds, dsname);
 		zfs_dbgmsg("scanning dataset %llu (%s) is unnecessary because "
 		    "cur_min_txg (%llu) >= max_txg (%llu)",
@@ -1146,7 +1146,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
 
-	dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
+	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	dsl_dataset_name(ds, dsname);
 	zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
 	    "pausing=%u",
@@ -1154,7 +1154,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
 	    (longlong_t)scn->scn_phys.scn_cur_min_txg,
 	    (longlong_t)scn->scn_phys.scn_cur_max_txg,
 	    (int)scn->scn_pausing);
-	kmem_free(dsname, ZFS_MAXNAMELEN);
+	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
 
 	if (scn->scn_pausing)
 		goto out;
diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c
index 1b234ed48..a6d1aa937 100644
--- a/module/zfs/dsl_userhold.c
+++ b/module/zfs/dsl_userhold.c
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
@@ -181,7 +181,7 @@ dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
 }
 
 typedef struct zfs_hold_cleanup_arg {
-	char zhca_spaname[MAXNAMELEN];
+	char zhca_spaname[ZFS_MAX_DATASET_NAME_LEN];
 	uint64_t zhca_spa_load_guid;
 	nvlist_t *zhca_holds;
 } zfs_hold_cleanup_arg_t;
@@ -580,7 +580,7 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
 			error = dsl_dataset_hold_obj_string(tmpdp,
 			    nvpair_name(pair), FTAG, &ds);
 			if (error == 0) {
-				char name[MAXNAMELEN];
+				char name[ZFS_MAX_DATASET_NAME_LEN];
 				dsl_dataset_name(ds, name);
 				dsl_pool_config_exit(tmpdp, FTAG);
 				dsl_dataset_rele(ds, FTAG);
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index d1aefe585..26181af84 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -361,8 +361,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
 					break;
 				}
 
-				strval = kmem_alloc(
-				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
+				strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
 				    KM_SLEEP);
 				dsl_dataset_name(ds, strval);
 				dsl_dataset_rele(ds, FTAG);
@@ -375,8 +374,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
 			spa_prop_add_list(*nvp, prop, strval, intval, src);
 
 			if (strval != NULL)
-				kmem_free(strval,
-				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
+				kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
 
 			break;
 
@@ -2018,6 +2016,16 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 	return (0);
 }
 
+/* ARGSUSED */
+int
+verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
+{
+	if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN)
+		return (SET_ERROR(ENAMETOOLONG));
+
+	return (0);
+}
+
 static int
 spa_load_verify(spa_t *spa)
 {
@@ -2032,6 +2040,14 @@ spa_load_verify(spa_t *spa)
 	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
 		return (0);
 
+	dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
+	error = dmu_objset_find_dp(spa->spa_dsl_pool,
+	    spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL,
+	    DS_FIND_CHILDREN);
+	dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
+	if (error != 0)
+		return (error);
+
 	rio = zio_root(spa, NULL, &sle,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
 
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index 01aa4641e..cf6fc224a 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -493,7 +493,7 @@ spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
     dmu_tx_t *tx, const char *fmt, ...)
 {
 	va_list adx;
-	char namebuf[MAXNAMELEN];
+	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 	nvlist_t *nvl = fnvlist_alloc();
 
 	ASSERT(tx != NULL);
@@ -512,7 +512,7 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
     dmu_tx_t *tx, const char *fmt, ...)
 {
 	va_list adx;
-	char namebuf[MAXNAMELEN];
+	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 	nvlist_t *nvl = fnvlist_alloc();
 
 	ASSERT(tx != NULL);
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index e47cfc878..d279d1828 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -749,12 +749,13 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid,
 		return (ENOENT);
 
 	cookie = spl_fstrans_mark();
-	snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	while (error == 0) {
 		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-		error = dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
-		    snapname, &id, &pos, &case_conflict);
+		error = dmu_snapshot_list_next(zsb->z_os,
+		    ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
+		    &case_conflict);
 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		if (error)
 			goto out;
@@ -767,7 +768,7 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid,
 	snprintf(full_path, path_len - 1, "%s/.zfs/snapshot/%s",
 	    zsb->z_mntopts->z_mntpoint, snapname);
 out:
-	kmem_free(snapname, MAXNAMELEN);
+	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
 	spl_fstrans_unmark(cookie);
 
 	return (error);
@@ -854,14 +855,14 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
 
 	ZFS_ENTER(zsb);
 
-	to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-	from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-	fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
 		error = dmu_snapshot_realname(zsb->z_os, snm, real,
-		    MAXNAMELEN, NULL);
+		    ZFS_MAX_DATASET_NAME_LEN, NULL);
 		if (error == 0) {
 			snm = real;
 		} else if (error != ENOTSUP) {
@@ -871,9 +872,11 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
 
 	dmu_objset_name(zsb->z_os, fsname);
 
-	error = zfsctl_snapshot_name(ITOZSB(sdip), snm, MAXNAMELEN, from);
+	error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
+	    ZFS_MAX_DATASET_NAME_LEN, from);
 	if (error == 0)
-		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, MAXNAMELEN, to);
+		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
+	    ZFS_MAX_DATASET_NAME_LEN, to);
 	if (error == 0)
 		error = zfs_secpolicy_rename_perms(from, to, cr);
 	if (error != 0)
@@ -903,10 +906,10 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
 
 	rw_exit(&zfs_snapshot_lock);
 out:
-	kmem_free(from, MAXNAMELEN);
-	kmem_free(to, MAXNAMELEN);
-	kmem_free(real, MAXNAMELEN);
-	kmem_free(fsname, MAXNAMELEN);
+	kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 
 	ZFS_EXIT(zsb);
 
@@ -929,12 +932,12 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
 
 	ZFS_ENTER(zsb);
 
-	snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
 		error = dmu_snapshot_realname(zsb->z_os, name, real,
-		    MAXNAMELEN, NULL);
+		    ZFS_MAX_DATASET_NAME_LEN, NULL);
 		if (error == 0) {
 			name = real;
 		} else if (error != ENOTSUP) {
@@ -942,7 +945,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
 		}
 	}
 
-	error = zfsctl_snapshot_name(ITOZSB(dip), name, MAXNAMELEN, snapname);
+	error = zfsctl_snapshot_name(ITOZSB(dip), name,
+	    ZFS_MAX_DATASET_NAME_LEN, snapname);
 	if (error == 0)
 		error = zfs_secpolicy_destroy_perms(snapname, cr);
 	if (error != 0)
@@ -952,8 +956,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
 	if ((error == 0) || (error == ENOENT))
 		error = dsl_destroy_snapshot(snapname, B_FALSE);
 out:
-	kmem_free(snapname, MAXNAMELEN);
-	kmem_free(real, MAXNAMELEN);
+	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 
 	ZFS_EXIT(zsb);
 
@@ -975,7 +979,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
 	if (!zfs_admin_snapshot)
 		return (EACCES);
 
-	dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
 		error = SET_ERROR(EILSEQ);
@@ -997,7 +1001,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
 		    0, cr, NULL, NULL);
 	}
 out:
-	kmem_free(dsname, MAXNAMELEN);
+	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
 
 	return (error);
 }
@@ -1075,11 +1079,11 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 	zsb = ITOZSB(ip);
 	ZFS_ENTER(zsb);
 
-	full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
+	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 
 	error = zfsctl_snapshot_name(zsb, dname(dentry),
-	    MAXNAMELEN, full_name);
+	    ZFS_MAX_DATASET_NAME_LEN, full_name);
 	if (error)
 		goto error;
 
@@ -1153,7 +1157,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 	}
 	path_put(&spath);
 error:
-	kmem_free(full_name, MAXNAMELEN);
+	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(full_path, MAXPATHLEN);
 
 	ZFS_EXIT(zsb);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 30338ac14..09f83a5cf 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  * Portions Copyright 2012 Pawel Jakub Dawidek <[email protected]>
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
@@ -603,7 +604,7 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (!INGLOBALZONE(curproc)) {
 			uint64_t zoned;
-			char setpoint[MAXNAMELEN];
+			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 			/*
 			 * Unprivileged users are allowed to modify the
 			 * limit on things *under* (ie. contained by)
@@ -845,7 +846,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
-	char	parentname[MAXNAMELEN];
+	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
 	int	error;
 
 	if ((error = zfs_secpolicy_write_perms(from,
@@ -898,7 +899,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
-		char parentname[MAXNAMELEN];
+		char parentname[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dataset_t *origin = NULL;
 		dsl_dir_t *dd;
 		dd = clone->ds_dir;
@@ -944,6 +945,13 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 	    ZFS_DELEG_PERM_CREATE, cr));
 }
 
+/* ARGSUSED */
+static int
+zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+	return (zfs_secpolicy_recv(zc, innvl, cr));
+}
+
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
@@ -1068,7 +1076,7 @@ zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 static int
 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	char	parentname[MAXNAMELEN];
+	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
 	int	error;
 	char	*origin;
 
@@ -1211,7 +1219,7 @@ zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(holds, pair)) {
-		char fsname[MAXNAMELEN];
+		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
@@ -1232,7 +1240,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(innvl, pair)) {
-		char fsname[MAXNAMELEN];
+		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
@@ -2252,7 +2260,8 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 	 * A dataset name of maximum length cannot have any snapshots,
 	 * so exit immediately.
 	 */
-	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
+	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
+	    ZFS_MAX_DATASET_NAME_LEN) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(ESRCH));
 	}
@@ -3040,7 +3049,7 @@ zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
 	boolean_t fuids_ok, sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	objset_t *os = NULL;
-	char parentname[MAXNAMELEN];
+	char parentname[ZFS_MAX_DATASET_NAME_LEN];
 	char *cp;
 	spa_t *spa;
 	uint64_t spa_vers;
@@ -3406,7 +3415,7 @@ zfs_destroy_unmount_origin(const char *fsname)
 		return;
 	ds = dmu_objset_ds(os);
 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
-		char originname[MAXNAMELEN];
+		char originname[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dataset_name(ds->ds_prev, originname);
 		dmu_objset_rele(os, FTAG);
 		(void) zfs_unmount_snap(originname);
@@ -3990,77 +3999,93 @@ next:
 	}
 }
 
+/*
+ * Extract properties that cannot be set PRIOR to the receipt of a dataset.
+ * For example, refquota cannot be set until after the receipt of a dataset,
+ * because in replication streams, an older/earlier snapshot may exceed the
+ * refquota.  We want to receive the older/earlier snapshot, but setting
+ * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
+ * the older/earlier snapshot from being received (with EDQUOT).
+ *
+ * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
+ *
+ * libzfs will need to be judicious handling errors encountered by props
+ * extracted by this function.
+ */
+static nvlist_t *
+extract_delay_props(nvlist_t *props)
+{
+	nvlist_t *delayprops;
+	nvpair_t *nvp, *tmp;
+	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
+	int i;
+
+	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
+	    nvp = nvlist_next_nvpair(props, nvp)) {
+		/*
+		 * strcmp() is safe because zfs_prop_to_name() always returns
+		 * a bounded string.
+		 */
+		for (i = 0; delayable[i] != 0; i++) {
+			if (strcmp(zfs_prop_to_name(delayable[i]),
+			    nvpair_name(nvp)) == 0) {
+				break;
+			}
+		}
+		if (delayable[i] != 0) {
+			tmp = nvlist_prev_nvpair(props, nvp);
+			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
+			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
+			nvp = tmp;
+		}
+	}
+
+	if (nvlist_empty(delayprops)) {
+		nvlist_free(delayprops);
+		delayprops = NULL;
+	}
+	return (delayprops);
+}
+
 #ifdef	DEBUG
 static boolean_t zfs_ioc_recv_inject_err;
 #endif
 
 /*
- * inputs:
- * zc_name		name of containing filesystem
- * zc_nvlist_src{_size}	nvlist of properties to apply
- * zc_value		name of snapshot to create
- * zc_string		name of clone origin (if DRR_FLAG_CLONE)
- * zc_cookie		file descriptor to recv from
- * zc_begin_record	the BEGIN record of the stream (not byteswapped)
- * zc_guid		force flag
- * zc_cleanup_fd	cleanup-on-exit file descriptor
- * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
- *
- * outputs:
- * zc_cookie		number of bytes read
- * zc_nvlist_dst{_size} error for each unapplied received property
- * zc_obj		zprop_errflags_t
- * zc_action_handle	handle for this guid/ds mapping
+ * On failure the 'errors' nvlist may be allocated and will contain a
+ * descriptions of the failures.  It's the callers responsibilty to free.
  */
 static int
-zfs_ioc_recv(zfs_cmd_t *zc)
+zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin,
+    nvlist_t *props, boolean_t force, boolean_t resumable, int input_fd,
+    dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
+    uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
 {
-	file_t *fp;
 	dmu_recv_cookie_t drc;
-	boolean_t force = (boolean_t)zc->zc_guid;
-	int fd;
 	int error = 0;
 	int props_error = 0;
-	nvlist_t *errors;
 	offset_t off;
-	nvlist_t *props = NULL; /* sent properties */
+	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
 	nvlist_t *origprops = NULL; /* existing properties */
-	char *origin = NULL;
-	char *tosnap;
-	char tofs[ZFS_MAXNAMELEN];
 	boolean_t first_recvd_props = B_FALSE;
+	file_t *input_fp;
 
-	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
-	    strchr(zc->zc_value, '@') == NULL ||
-	    strchr(zc->zc_value, '%'))
-		return (SET_ERROR(EINVAL));
-
-	(void) strcpy(tofs, zc->zc_value);
-	tosnap = strchr(tofs, '@');
-	*tosnap++ = '\0';
-
-	if (zc->zc_nvlist_src != 0 &&
-	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &props)) != 0)
-		return (error);
-
-	fd = zc->zc_cookie;
-	fp = getf(fd);
-	if (fp == NULL) {
-		nvlist_free(props);
+	*errors = NULL;
+	input_fp = getf(input_fd);
+	if (input_fp == NULL)
 		return (SET_ERROR(EBADF));
-	}
-
-	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
-	if (zc->zc_string[0])
-		origin = zc->zc_string;
 
 	error = dmu_recv_begin(tofs, tosnap,
-	    &zc->zc_begin_record, force, origin, &drc);
+	    begin_record, force, resumable, origin, &drc);
 	if (error != 0)
 		goto out;
 
+	*read_bytes = 0;
+	*errflags = 0;
+	*errors = fnvlist_alloc();
+
 	/*
 	 * Set properties before we receive the stream so that they are applied
 	 * to the new data. Note that we must call dmu_recv_stream() if
@@ -4090,14 +4115,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
 			if (!first_recvd_props)
 				props_reduce(props, origprops);
 			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
-				(void) nvlist_merge(errors, errlist, 0);
+				(void) nvlist_merge(*errors, errlist, 0);
 			nvlist_free(errlist);
 
 			if (clear_received_props(tofs, origprops,
 			    first_recvd_props ? NULL : props) != 0)
-				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+				*errflags |= ZPROP_ERR_NOCLEAR;
 		} else {
-			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+			*errflags |= ZPROP_ERR_NOCLEAR;
 		}
 	}
 
@@ -4105,24 +4130,15 @@ zfs_ioc_recv(zfs_cmd_t *zc)
 		props_error = dsl_prop_set_hasrecvd(tofs);
 
 		if (props_error == 0) {
+			delayprops = extract_delay_props(props);
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
-			    props, errors);
+			    props, *errors);
 		}
 	}
 
-	if (zc->zc_nvlist_dst_size != 0 &&
-	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
-	    put_nvlist(zc, errors) != 0)) {
-		/*
-		 * Caller made zc->zc_nvlist_dst less than the minimum expected
-		 * size or supplied an invalid address.
-		 */
-		props_error = SET_ERROR(EINVAL);
-	}
-
-	off = fp->f_offset;
-	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
-	    &zc->zc_action_handle);
+	off = input_fp->f_offset;
+	error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
+	    action_handle);
 
 	if (error == 0) {
 		zfs_sb_t *zsb = NULL;
@@ -4144,11 +4160,32 @@ zfs_ioc_recv(zfs_cmd_t *zc)
 		} else {
 			error = dmu_recv_end(&drc, NULL);
 		}
+
+		/* Set delayed properties now, after we're done receiving. */
+		if (delayprops != NULL && error == 0) {
+			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
+			    delayprops, *errors);
+		}
 	}
 
-	zc->zc_cookie = off - fp->f_offset;
-	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
-		fp->f_offset = off;
+	if (delayprops != NULL) {
+		/*
+		 * Merge delayed props back in with initial props, in case
+		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
+		 * we have to make sure clear_received_props() includes
+		 * the delayed properties).
+		 *
+		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
+		 * using ASSERT() will be just like a VERIFY.
+		 */
+		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
+		nvlist_free(delayprops);
+	}
+
+
+	*read_bytes = off - input_fp->f_offset;
+	if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
+	    input_fp->f_offset = off;
 
 #ifdef	DEBUG
 	if (zfs_ioc_recv_inject_err) {
@@ -4167,14 +4204,14 @@ zfs_ioc_recv(zfs_cmd_t *zc)
 			 * Since we may have left a $recvd value on the
 			 * system, we can't clear the $hasrecvd flag.
 			 */
-			zc->zc_obj |= ZPROP_ERR_NORESTORE;
+			*errflags |= ZPROP_ERR_NORESTORE;
 		} else if (first_recvd_props) {
 			dsl_prop_unset_hasrecvd(tofs);
 		}
 
 		if (origprops == NULL && !drc.drc_newfs) {
 			/* We failed to stash the original properties. */
-			zc->zc_obj |= ZPROP_ERR_NORESTORE;
+			*errflags |= ZPROP_ERR_NORESTORE;
 		}
 
 		/*
@@ -4191,14 +4228,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
 			 * We stashed the original properties but failed to
 			 * restore them.
 			 */
-			zc->zc_obj |= ZPROP_ERR_NORESTORE;
+			*errflags |= ZPROP_ERR_NORESTORE;
 		}
 	}
 out:
-	nvlist_free(props);
+	releasef(input_fd);
 	nvlist_free(origprops);
-	nvlist_free(errors);
-	releasef(fd);
 
 	if (error == 0)
 		error = props_error;
@@ -4208,6 +4243,176 @@ out:
 
 /*
  * inputs:
+ * zc_name		name of containing filesystem (unused)
+ * zc_nvlist_src{_size}	nvlist of properties to apply
+ * zc_value		name of snapshot to create
+ * zc_string		name of clone origin (if DRR_FLAG_CLONE)
+ * zc_cookie		file descriptor to recv from
+ * zc_begin_record	the BEGIN record of the stream (not byteswapped)
+ * zc_guid		force flag
+ * zc_cleanup_fd	cleanup-on-exit file descriptor
+ * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
+ *
+ * outputs:
+ * zc_cookie		number of bytes read
+ * zc_obj		zprop_errflags_t
+ * zc_action_handle	handle for this guid/ds mapping
+ * zc_nvlist_dst{_size} error for each unapplied received property
+ */
+static int
+zfs_ioc_recv(zfs_cmd_t *zc)
+{
+	dmu_replay_record_t begin_record;
+	nvlist_t *errors = NULL;
+	nvlist_t *props = NULL;
+	char *origin = NULL;
+	char *tosnap;
+	char tofs[ZFS_MAX_DATASET_NAME_LEN];
+	int error = 0;
+
+	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
+	    strchr(zc->zc_value, '@') == NULL ||
+	    strchr(zc->zc_value, '%'))
+		return (SET_ERROR(EINVAL));
+
+	(void) strcpy(tofs, zc->zc_value);
+	tosnap = strchr(tofs, '@');
+	*tosnap++ = '\0';
+
+	if (zc->zc_nvlist_src != 0 &&
+	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+	    zc->zc_iflags, &props)) != 0)
+		return (error);
+
+	if (zc->zc_string[0])
+		origin = zc->zc_string;
+
+	begin_record.drr_type = DRR_BEGIN;
+	begin_record.drr_payloadlen = 0;
+	begin_record.drr_u.drr_begin = zc->zc_begin_record;
+
+	error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, zc->zc_guid,
+	    B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd,
+	    &zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors);
+	nvlist_free(props);
+
+	/*
+	 * Now that all props, initial and delayed, are set, report the prop
+	 * errors to the caller.
+	 */
+	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
+	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
+	    put_nvlist(zc, errors) != 0)) {
+		/*
+		 * Caller made zc->zc_nvlist_dst less than the minimum expected
+		 * size or supplied an invalid address.
+		 */
+		error = SET_ERROR(EINVAL);
+	}
+
+	nvlist_free(errors);
+
+	return (error);
+}
+
+/*
+ * innvl: {
+ *     "snapname" -> full name of the snapshot to create
+ *     (optional) "props" -> properties to set (nvlist)
+ *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
+ *     "begin_record" -> non-byteswapped dmu_replay_record_t
+ *     "input_fd" -> file descriptor to read stream from (int32)
+ *     (optional) "force" -> force flag (value ignored)
+ *     (optional) "resumable" -> resumable flag (value ignored)
+ *     (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
+ *     (optional) "action_handle" -> handle for this guid/ds mapping
+ * }
+ *
+ * outnvl: {
+ *     "read_bytes" -> number of bytes read
+ *     "error_flags" -> zprop_errflags_t
+ *     "action_handle" -> handle for this guid/ds mapping
+ *     "errors" -> error for each unapplied received property (nvlist)
+ * }
+ */
+static int
+zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	dmu_replay_record_t *begin_record;
+	uint_t begin_record_size;
+	nvlist_t *errors = NULL;
+	nvlist_t *props = NULL;
+	char *snapname = NULL;
+	char *origin = NULL;
+	char *tosnap;
+	char tofs[ZFS_MAX_DATASET_NAME_LEN];
+	boolean_t force;
+	boolean_t resumable;
+	uint64_t action_handle = 0;
+	uint64_t read_bytes = 0;
+	uint64_t errflags = 0;
+	int input_fd = -1;
+	int cleanup_fd = -1;
+	int error;
+
+	error = nvlist_lookup_string(innvl, "snapname", &snapname);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
+
+	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
+	    strchr(snapname, '@') == NULL ||
+	    strchr(snapname, '%'))
+		return (SET_ERROR(EINVAL));
+
+	(void) strcpy(tofs, snapname);
+	tosnap = strchr(tofs, '@');
+	*tosnap++ = '\0';
+
+	error = nvlist_lookup_string(innvl, "origin", &origin);
+	if (error && error != ENOENT)
+		return (error);
+
+	error = nvlist_lookup_byte_array(innvl, "begin_record",
+	    (uchar_t **) &begin_record, &begin_record_size);
+	if (error != 0 || begin_record_size != sizeof (*begin_record))
+		return (SET_ERROR(EINVAL));
+
+	error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
+
+	force = nvlist_exists(innvl, "force");
+	resumable = nvlist_exists(innvl, "resumable");
+
+	error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
+	if (error && error != ENOENT)
+		return (error);
+
+	error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
+	if (error && error != ENOENT)
+		return (error);
+
+	error = nvlist_lookup_nvlist(innvl, "props", &props);
+	if (error && error != ENOENT)
+		return (error);
+
+	error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, force,
+	    resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
+	    &errflags, &action_handle, &errors);
+
+	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
+	fnvlist_add_uint64(outnvl, "error_flags", errflags);
+	fnvlist_add_uint64(outnvl, "action_handle", action_handle);
+	fnvlist_add_nvlist(outnvl, "errors", errors);
+
+	nvlist_free(errors);
+	nvlist_free(props);
+
+	return (error);
+}
+
+/*
+ * inputs:
  * zc_name	name of snapshot to send
  * zc_cookie	file descriptor to send stream to
  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
@@ -5182,6 +5387,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
  *         indicates that blocks > 128KB are permitted
  *     (optional) "embedok" -> (value ignored)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
+ *     (optional) "resume_object" and "resume_offset" -> (uint64)
+ *         if present, resume send stream from specified object and offset.
  * }
  *
  * outnvl is unused
@@ -5197,6 +5404,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 	file_t *fp;
 	boolean_t largeblockok;
 	boolean_t embedok;
+	uint64_t resumeobj = 0;
+	uint64_t resumeoff = 0;
 
 	error = nvlist_lookup_int32(innvl, "fd", &fd);
 	if (error != 0)
@@ -5207,12 +5416,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 
+	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
+
 	if ((fp = getf(fd)) == NULL)
 		return (SET_ERROR(EBADF));
 
 	off = fp->f_offset;
-	error = dmu_send(snapname, fromname, embedok, largeblockok,
-	    fd, fp->f_vnode, &off);
+	error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
+	    resumeobj, resumeoff, fp->f_vnode, &off);
 
 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 		fp->f_offset = off;
@@ -5470,6 +5682,10 @@ zfs_ioctl_init(void)
 	    POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
+	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
+	    zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
 	/* IOCTLS that use the legacy function signature */
 
 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index a72841c15..ef04b203d 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1020,7 +1020,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
 	statp->f_fsid.val[0] = (uint32_t)fsid;
 	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
 	statp->f_type = ZFS_SUPER_MAGIC;
-	statp->f_namelen = ZFS_MAXNAMELEN;
+	statp->f_namelen = MAXNAMELEN - 1;
 
 	/*
 	 * We have all of 40 characters to stuff a string here.
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 988ffec29..863ccb930 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -2080,7 +2080,7 @@ typedef struct zil_replay_arg {
 static int
 zil_replay_error(zilog_t *zilog, lr_t *lr, int error)
 {
-	char name[MAXNAMELEN];
+	char name[ZFS_MAX_DATASET_NAME_LEN];
 
 	zilog->zl_replaying_seq--;	/* didn't actually replay this one */
 
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index 089e3a1bc..8c75698e5 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -50,7 +50,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 	int zfs_flags = 0;
 	zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
 
-	if (dlen(dentry) > ZFS_MAXNAMELEN)
+	if (dlen(dentry) > ZFS_MAX_DATASET_NAME_LEN)
 		return (ERR_PTR(-ENAMETOOLONG));
 
 	crhold(cr);
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 9c89493ed..73277901f 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -61,7 +61,7 @@ unsigned long zvol_max_discard_blocks = 16384;
 
 static kmutex_t zvol_state_lock;
 static list_t zvol_state_list;
-static char *zvol_tag = "zvol_tag";
+void *zvol_tag = "zvol_tag";
 
 /*
  * The in-core state of each volume.
author	Brian Behlendorf <[email protected]>	2016-06-29 11:26:30 -0700
committer	Brian Behlendorf <[email protected]>	2016-06-29 13:42:23 -0700
commit	5c27b296055301f13103ca0aa98c2ded01dcd9a0 (patch)
tree	4813f9cb944c53140de7a12710f0ca7b12e3ec27 /module
parent	669cf0ab298dd66e512b37f6c4a42aee2d767b80 (diff)
parent	0dab2e84fcecff2806287efacb7c6205f346f69d (diff)