Update core ZFS code from build 121 to build 141.

author: Brian Behlendorf <[email protected]> 2010-05-28 13:45:14 -0700
committer: Brian Behlendorf <[email protected]> 2010-05-28 13:45:14 -0700
commit: 428870ff734fdaccc342b33fc53cf94724409a46 (patch)
tree: 164e83c0ceda52a843795ed7cd9e95637d02c177 /lib/libzfs
parent: 6119cb885a976e175a6e827894accf657ff1984f (diff)
12 files changed, 3482 insertions, 1425 deletions
diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h
index f19e398f6..6f7fed62c 100644
--- a/lib/libzfs/include/libzfs.h
+++ b/lib/libzfs/include/libzfs.h
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_LIBZFS_H
@@ -66,7 +65,6 @@ enum {
 	EZFS_BADSTREAM,		/* bad backup stream */
 	EZFS_DSREADONLY,	/* dataset is readonly */
 	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
-	EZFS_VOLHASDATA,	/* volume already contains data */
 	EZFS_INVALIDNAME,	/* invalid dataset name */
 	EZFS_BADRESTORE,	/* unable to restore to destination */
 	EZFS_BADBACKUP,		/* backup failed */
@@ -85,17 +83,15 @@ enum {
 	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
 	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
 	EZFS_SHARENFSFAILED,	/* share(1M) failed */
-	EZFS_DEVLINKS,		/* failed to create zvol links */
 	EZFS_PERM,		/* permission denied */
 	EZFS_NOSPC,		/* out of space */
+	EZFS_FAULT,		/* bad address */
 	EZFS_IO,		/* I/O error */
 	EZFS_INTR,		/* signal received */
 	EZFS_ISSPARE,		/* device is a hot spare */
 	EZFS_INVALCONFIG,	/* invalid vdev configuration */
 	EZFS_RECURSIVE,		/* recursive dependency */
 	EZFS_NOHISTORY,		/* no history object */
-	EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
-	EZFS_SHAREISCSIFAILED,	/* iscsitgtd failed request to share */
 	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
 	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
 	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
@@ -103,7 +99,6 @@ enum {
 	EZFS_OPENFAILED,	/* open of device failed */
 	EZFS_NOCAP,		/* couldn't get capacity */
 	EZFS_LABELFAILED,	/* write of label failed */
-	EZFS_ISCSISVCUNAVAIL,	/* iscsi service unavailable */
 	EZFS_BADWHO,		/* invalid permission who */
 	EZFS_BADPERM,		/* invalid permission */
 	EZFS_BADPERMSET,	/* invalid permission set name */
@@ -119,6 +114,12 @@ enum {
 	EZFS_UNPLAYED_LOGS,	/* log device has unplayed logs */
 	EZFS_REFTAG_RELE,	/* snapshot release: tag not found */
 	EZFS_REFTAG_HOLD,	/* snapshot hold: tag already exists */
+	EZFS_TAGTOOLONG,	/* snapshot hold/rele: tag too long */
+	EZFS_PIPEFAILED,	/* pipe create failed */
+	EZFS_THREADCREATEFAILED, /* thread create failed */
+	EZFS_POSTSPLIT_ONLINE,	/* onlining a disk after splitting it */
+	EZFS_SCRUBBING,		/* currently scrubbing */
+	EZFS_NO_SCRUB,		/* no active scrub */
 	EZFS_UNKNOWN
 };
 
@@ -213,11 +214,19 @@ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
 extern int zpool_destroy(zpool_handle_t *);
 extern int zpool_add(zpool_handle_t *, nvlist_t *);
 
+typedef struct splitflags {
+	/* do not split, but return the config that would be split off */
+	int dryrun : 1;
+
+	/* after splitting, import the pool */
+	int import : 1;
+} splitflags_t;
+
 /*
  * Functions to manipulate pool and vdev state
  */
-extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
-extern int zpool_clear(zpool_handle_t *, const char *);
+extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
+extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
 
 extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
     vdev_state_t *);
@@ -226,9 +235,11 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
     const char *, nvlist_t *, int);
 extern int zpool_vdev_detach(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
+    splitflags_t);
 
-extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
-extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
+extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
 extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
 
 extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
@@ -298,6 +309,7 @@ typedef enum {
 
 extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
 extern zpool_status_t zpool_import_status(nvlist_t *, char **);
+extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
 
 /*
  * Statistics and configuration functions.
@@ -319,23 +331,38 @@ extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
 /*
  * Search for pools to import
  */
+
+typedef struct importargs {
+	char **path;		/* a list of paths to search		*/
+	int paths;		/* number of paths to search		*/
+	char *poolname;		/* name of a pool to find		*/
+	uint64_t guid;		/* guid of a pool to find		*/
+	char *cachefile;	/* cachefile to use for import		*/
+	int can_be_active : 1;	/* can the pool be active?		*/
+	int unique : 1;		/* does 'poolname' already exist?	*/
+	int exists : 1;		/* set on return if pool already exists	*/
+} importargs_t;
+
+extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
+
+/* legacy pool search routines */
 extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
 extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
     char *, uint64_t);
-extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
-    char *);
-extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
-    uint64_t);
-extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);
 
 /*
  * Miscellaneous pool functions
  */
 struct zfs_cmd;
 
-extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
+extern const char *zfs_history_event_names[LOG_END];
+
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
+    boolean_t verbose);
 extern int zpool_upgrade(zpool_handle_t *, uint64_t);
 extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
+extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
+    nvlist_t ***, uint_t *);
 extern void zpool_set_history_str(const char *subcommand, int argc,
     char **argv, char *history_str);
 extern int zpool_stage_history(libzfs_handle_t *, const char *);
@@ -343,6 +370,8 @@ extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
     size_t len);
 extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
 extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
+extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
+    nvlist_t *);
 
 /*
  * Basic handle manipulations.  These functions do not create or destroy the
@@ -374,6 +403,8 @@ extern const char *zfs_prop_to_name(zfs_prop_t);
 extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
 extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
     zprop_source_t *, char *, size_t, boolean_t);
+extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
+    boolean_t);
 extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
     zprop_source_t *, char *, size_t);
 extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
@@ -381,10 +412,11 @@ extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
 extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
-extern int zfs_prop_inherit(zfs_handle_t *, const char *);
+extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
 extern const char *zfs_prop_values(zfs_prop_t);
 extern int zfs_prop_is_string(zfs_prop_t prop);
 extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
+extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
 
 typedef struct zprop_list {
 	int		pl_prop;
@@ -392,10 +424,11 @@ typedef struct zprop_list {
 	struct zprop_list *pl_next;
 	boolean_t	pl_all;
 	size_t		pl_width;
+	size_t		pl_recvd_width;
 	boolean_t	pl_fixed;
 } zprop_list_t;
 
-extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
+extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t);
 extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
 
 #define	ZFS_MOUNTPOINT_NONE	"none"
@@ -419,13 +452,24 @@ extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
     zfs_type_t);
 extern void zprop_free_list(zprop_list_t *);
 
+#define	ZFS_GET_NCOLS	5
+
+typedef enum {
+	GET_COL_NONE,
+	GET_COL_NAME,
+	GET_COL_PROPERTY,
+	GET_COL_VALUE,
+	GET_COL_RECVD,
+	GET_COL_SOURCE
+} zfs_get_column_t;
+
 /*
  * Functions for printing zfs or zpool properties
  */
 typedef struct zprop_get_cbdata {
 	int cb_sources;
-	int cb_columns[4];
-	int cb_colwidths[5];
+	zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
+	int cb_colwidths[ZFS_GET_NCOLS + 1];
 	boolean_t cb_scripted;
 	boolean_t cb_literal;
 	boolean_t cb_first;
@@ -434,12 +478,8 @@ typedef struct zprop_get_cbdata {
 } zprop_get_cbdata_t;
 
 void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
-    const char *, const char *, zprop_source_t, const char *);
-
-#define	GET_COL_NAME		1
-#define	GET_COL_PROPERTY	2
-#define	GET_COL_VALUE		3
-#define	GET_COL_SOURCE		4
+    const char *, const char *, zprop_source_t, const char *,
+    const char *);
 
 /*
  * Iterator functions.
@@ -450,6 +490,7 @@ extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
 extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
 
 /*
  * Functions to create and destroy datasets.
@@ -463,11 +504,42 @@ extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
 extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
-extern int zfs_send(zfs_handle_t *, const char *, const char *,
-    boolean_t, boolean_t, boolean_t, boolean_t, int);
+
+typedef struct sendflags {
+	/* print informational messages (ie, -v was specified) */
+	int verbose : 1;
+
+	/* recursive send  (ie, -R) */
+	int replicate : 1;
+
+	/* for incrementals, do all intermediate snapshots */
+	int doall : 1; /* (ie, -I) */
+
+	/* if dataset is a clone, do incremental from its origin */
+	int fromorigin : 1;
+
+	/* do deduplication */
+	int dedup : 1;
+
+	/* send properties (ie, -p) */
+	int props : 1;
+} sendflags_t;
+
+typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
+
+extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
+    void *cb_arg, nvlist_t **debugnvp);
+
 extern int zfs_promote(zfs_handle_t *);
-extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t);
+extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
+    boolean_t, boolean_t);
+extern int zfs_hold_range(zfs_handle_t *, const char *, const char *,
+    const char *, boolean_t, boolean_t, snapfilter_cb_t, void *);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
+extern int zfs_release_range(zfs_handle_t *, const char *, const char *,
+    const char *, boolean_t);
+extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
 
 typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
     uid_t rid, uint64_t space);
@@ -482,6 +554,12 @@ typedef struct recvflags {
 	/* the destination is a prefix, not the exact fs (ie, -d) */
 	int isprefix : 1;
 
+	/*
+	 * Only the tail of the sent snapshot path is appended to the
+	 * destination to determine the received snapshot name (ie, -e).
+	 */
+	int istail : 1;
+
 	/* do not actually do the recv, just check if it would work (ie, -n) */
 	int dryrun : 1;
 
@@ -542,10 +620,6 @@ extern int zfs_unshareall_nfs(zfs_handle_t *);
 extern int zfs_unshareall_smb(zfs_handle_t *);
 extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
 extern int zfs_unshareall(zfs_handle_t *);
-extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
-extern int zfs_share_iscsi(zfs_handle_t *);
-extern int zfs_unshare_iscsi(zfs_handle_t *);
-extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
 extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
     void *, void *, int, zfs_share_op_t);
 
@@ -571,15 +645,10 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
     boolean_t *);
 
 /*
- * ftyp special.  Read the label from a given device.
+ * Label manipulation.
  */
 extern int zpool_read_label(int, nvlist_t **);
-
-/*
- * Create and remove zvol /dev links.
- */
-extern int zpool_create_zvol_links(zpool_handle_t *);
-extern int zpool_remove_zvol_links(zpool_handle_t *);
+extern int zpool_clear_label(int);
 
 /* is this zvol valid for use as a dump device? */
 extern int zvol_check_dump_config(char *);
@@ -600,6 +669,17 @@ int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
 extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
 extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
 
+/*
+ * Mappings between vdev and FRU.
+ */
+extern void libzfs_fru_refresh(libzfs_handle_t *);
+extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
+extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
+extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
+    const char *);
+extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
+extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h
index 70a1d1cbf..89c48c1c0 100644
--- a/lib/libzfs/include/libzfs_impl.h
+++ b/lib/libzfs/include/libzfs_impl.h
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -30,7 +30,6 @@
 #include <sys/dmu.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ioctl.h>
-#include <sys/zfs_acl.h>
 #include <sys/spa.h>
 #include <sys/nvpair.h>
 
@@ -38,6 +37,8 @@
 #include <libzfs.h>
 #include <libshare.h>
 
+#include <fm/libtopo.h>
+
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -47,6 +48,13 @@ extern "C" {
 #endif
 #define	VERIFY	verify
 
+typedef struct libzfs_fru {
+	char *zf_device;
+	char *zf_fru;
+	struct libzfs_fru *zf_chain;
+	struct libzfs_fru *zf_next;
+} libzfs_fru_t;
+
 struct libzfs_handle {
 	int libzfs_error;
 	int libzfs_fd;
@@ -65,7 +73,13 @@ struct libzfs_handle {
 	uint_t libzfs_shareflags;
 	boolean_t libzfs_mnttab_enable;
 	avl_tree_t libzfs_mnttab_cache;
+	int libzfs_pool_iter;
+	topo_hdl_t *libzfs_topo_hdl;
+	libzfs_fru_t **libzfs_fru_hash;
+	libzfs_fru_t *libzfs_fru_list;
+	char libzfs_chassis_id[256];
 };
+
 #define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */
 
 struct zfs_handle {
@@ -77,6 +91,7 @@ struct zfs_handle {
 	dmu_objset_stats_t zfs_dmustats;
 	nvlist_t *zfs_props;
 	nvlist_t *zfs_user_props;
+	nvlist_t *zfs_recvd_props;
 	boolean_t zfs_mntcheck;
 	char *zfs_mntopts;
 	uint8_t *zfs_props_table;
@@ -112,7 +127,6 @@ typedef  enum {
  */
 typedef enum {
 	SHARED_NOT_SHARED = 0x0,
-	SHARED_ISCSI = 0x1,
 	SHARED_NFS = 0x2,
 	SHARED_SMB = 0x4
 } zfs_share_type_t;
@@ -172,9 +186,6 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
 
 int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
 
-int zvol_create_link(libzfs_handle_t *, const char *);
-int zvol_remove_link(libzfs_handle_t *, const char *);
-int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
 boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
 
 void namespace_clear(libzfs_handle_t *);
@@ -189,6 +200,9 @@ extern int zfs_parse_options(char *, zfs_share_proto_t);
 
 extern int zfs_unshare_proto(zfs_handle_t *,
     const char *, zfs_share_proto_t *);
+
+extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
index ff438b3d7..4328d38a2 100644
--- a/lib/libzfs/libzfs_changelist.c
+++ b/lib/libzfs/libzfs_changelist.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
  * Portions Copyright 2007 Ramprakash Jelari
@@ -116,32 +116,7 @@ changelist_prefix(prop_changelist_t *clp)
 		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
 			continue;
 
-		if (ZFS_IS_VOLUME(cn->cn_handle)) {
-			switch (clp->cl_realprop) {
-			case ZFS_PROP_NAME:
-				/*
-				 * If this was a rename, unshare the zvol, and
-				 * remove the /dev/zvol links.
-				 */
-				(void) zfs_unshare_iscsi(cn->cn_handle);
-
-				if (zvol_remove_link(cn->cn_handle->zfs_hdl,
-				    cn->cn_handle->zfs_name) != 0) {
-					ret = -1;
-					cn->cn_needpost = B_FALSE;
-					(void) zfs_share_iscsi(cn->cn_handle);
-				}
-				break;
-
-			case ZFS_PROP_VOLSIZE:
-				/*
-				 * If this was a change to the volume size, we
-				 * need to unshare and reshare the volume.
-				 */
-				(void) zfs_unshare_iscsi(cn->cn_handle);
-				break;
-			}
-		} else {
+		if (!ZFS_IS_VOLUME(cn->cn_handle)) {
 			/*
 			 * Do the property specific processing.
 			 */
@@ -234,32 +209,8 @@ changelist_postfix(prop_changelist_t *clp)
 
 		zfs_refresh_properties(cn->cn_handle);
 
-		if (ZFS_IS_VOLUME(cn->cn_handle)) {
-			/*
-			 * If we're doing a rename, recreate the /dev/zvol
-			 * links.
-			 */
-			if (clp->cl_realprop == ZFS_PROP_NAME &&
-			    zvol_create_link(cn->cn_handle->zfs_hdl,
-			    cn->cn_handle->zfs_name) != 0) {
-				errors++;
-			} else if (cn->cn_shared ||
-			    clp->cl_prop == ZFS_PROP_SHAREISCSI) {
-				if (zfs_prop_get(cn->cn_handle,
-				    ZFS_PROP_SHAREISCSI, shareopts,
-				    sizeof (shareopts), NULL, NULL, 0,
-				    B_FALSE) == 0 &&
-				    strcmp(shareopts, "off") == 0) {
-					errors +=
-					    zfs_unshare_iscsi(cn->cn_handle);
-				} else {
-					errors +=
-					    zfs_share_iscsi(cn->cn_handle);
-				}
-			}
-
+		if (ZFS_IS_VOLUME(cn->cn_handle))
 			continue;
-		}
 
 		/*
 		 * Remount if previously mounted or mountpoint was legacy,
@@ -658,8 +609,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
 
 	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
 	    clp->cl_prop != ZFS_PROP_SHARENFS &&
-	    clp->cl_prop != ZFS_PROP_SHARESMB &&
-	    clp->cl_prop != ZFS_PROP_SHAREISCSI)
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
 		return (clp);
 
 	/*
diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c
index 94640d1b1..dc27238c9 100644
--- a/lib/libzfs/libzfs_config.c
+++ b/lib/libzfs/libzfs_config.c
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
  * single packed nvlist.  While it would be nice to just read in this
@@ -313,21 +311,33 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
 	zpool_handle_t *zhp;
 	int ret;
 
-	if (namespace_reload(hdl) != 0)
+	/*
+	 * If someone makes a recursive call to zpool_iter(), we want to avoid
+	 * refreshing the namespace because that will invalidate the parent
+	 * context.  We allow recursive calls, but simply re-use the same
+	 * namespace AVL tree.
+	 */
+	if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0)
 		return (-1);
 
+	hdl->libzfs_pool_iter++;
 	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
 	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
 
-		if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
+		if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) {
+			hdl->libzfs_pool_iter--;
 			return (-1);
+		}
 
 		if (zhp == NULL)
 			continue;
 
-		if ((ret = func(zhp, data)) != 0)
+		if ((ret = func(zhp, data)) != 0) {
+			hdl->libzfs_pool_iter--;
 			return (ret);
+		}
 	}
+	hdl->libzfs_pool_iter--;
 
 	return (0);
 }
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index ab9ba6b80..a3f5a7d0f 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -20,14 +20,12 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#include <assert.h>
 #include <ctype.h>
 #include <errno.h>
-#include <libdevinfo.h>
 #include <libintl.h>
 #include <math.h>
 #include <stdio.h>
@@ -39,7 +37,6 @@
 #include <fcntl.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
-#include <sys/avl.h>
 #include <priv.h>
 #include <pwd.h>
 #include <grp.h>
@@ -49,6 +46,7 @@
 #include <aclutils.h>
 #include <directory.h>
 
+#include <sys/dnode.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <libzfs.h>
@@ -58,7 +56,6 @@
 #include "libzfs_impl.h"
 #include "zfs_deleg.h"
 
-static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
 static int userquota_propname_decode(const char *propname, boolean_t zoned,
     zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
 
@@ -340,6 +337,44 @@ get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
 	return (0);
 }
 
+/*
+ * Utility function to get the received properties of the given object.
+ */
+static int
+get_recvd_props_ioctl(zfs_handle_t *zhp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *recvdprops;
+	zfs_cmd_t zc = { 0 };
+	int err;
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops);
+	zcmd_free_nvlists(&zc);
+	if (err != 0)
+		return (-1);
+
+	nvlist_free(zhp->zfs_recvd_props);
+	zhp->zfs_recvd_props = recvdprops;
+
+	return (0);
+}
+
 static int
 put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
@@ -401,70 +436,8 @@ zfs_refresh_properties(zfs_handle_t *zhp)
 static int
 make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
-	char *logstr;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-	/*
-	 * Preserve history log string.
-	 * any changes performed here will be
-	 * logged as an internal event.
-	 */
-	logstr = zhp->zfs_hdl->libzfs_log_str;
-	zhp->zfs_hdl->libzfs_log_str = NULL;
-
-top:
-	if (put_stats_zhdl(zhp, zc) != 0) {
-		zhp->zfs_hdl->libzfs_log_str = logstr;
+	if (put_stats_zhdl(zhp, zc) != 0)
 		return (-1);
-	}
-
-
-	if (zhp->zfs_dmustats.dds_inconsistent) {
-		zfs_cmd_t zc2 = { 0 };
-
-		/*
-		 * If it is dds_inconsistent, then we've caught it in
-		 * the middle of a 'zfs receive' or 'zfs destroy', and
-		 * it is inconsistent from the ZPL's point of view, so
-		 * can't be mounted.  However, it could also be that we
-		 * have crashed in the middle of one of those
-		 * operations, in which case we need to get rid of the
-		 * inconsistent state.  We do that by either rolling
-		 * back to the previous snapshot (which will fail if
-		 * there is none), or destroying the filesystem.  Note
-		 * that if we are still in the middle of an active
-		 * 'receive' or 'destroy', then the rollback and destroy
-		 * will fail with EBUSY and we will drive on as usual.
-		 */
-
-		(void) strlcpy(zc2.zc_name, zhp->zfs_name,
-		    sizeof (zc2.zc_name));
-
-		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
-			(void) zvol_remove_link(hdl, zhp->zfs_name);
-			zc2.zc_objset_type = DMU_OST_ZVOL;
-		} else {
-			zc2.zc_objset_type = DMU_OST_ZFS;
-		}
-
-		/*
-		 * If we can successfully destroy it, pretend that it
-		 * never existed.
-		 */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) {
-			zhp->zfs_hdl->libzfs_log_str = logstr;
-			errno = ENOENT;
-			return (-1);
-		}
-		/* If we can successfully roll it back, reset the stats */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) {
-			if (get_stats_ioctl(zhp, zc) != 0) {
-				zhp->zfs_hdl->libzfs_log_str = logstr;
-				return (-1);
-			}
-			goto top;
-		}
-	}
 
 	/*
 	 * We've managed to open the dataset and gather statistics.  Determine
@@ -486,8 +459,9 @@ top:
 	else
 		abort();	/* we should never see any other types */
 
-	zhp->zfs_hdl->libzfs_log_str = logstr;
-	zhp->zpool_hdl = zpool_handle(zhp);
+	if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL)
+		return (-1);
+
 	return (0);
 }
 
@@ -589,6 +563,7 @@ zfs_close(zfs_handle_t *zhp)
 		free(zhp->zfs_mntopts);
 	nvlist_free(zhp->zfs_props);
 	nvlist_free(zhp->zfs_user_props);
+	nvlist_free(zhp->zfs_recvd_props);
 	free(zhp);
 }
 
@@ -882,9 +857,14 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 				goto error;
 			}
 
+			/*
+			 * Encode the prop name as
+			 * userquota@<hex-rid>-domain, to make it easy
+			 * for the kernel to decode.
+			 */
 			(void) snprintf(newpropname, sizeof (newpropname),
-			    "%s%s", zfs_userquota_prop_prefixes[uqtype],
-			    domain);
+			    "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype],
+			    (longlong_t)rid, domain);
 			valary[0] = uqtype;
 			valary[1] = rid;
 			valary[2] = intval;
@@ -960,19 +940,60 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 			}
 			break;
 
-		case ZFS_PROP_SHAREISCSI:
-			if (strcmp(strval, "off") != 0 &&
-			    strcmp(strval, "on") != 0 &&
-			    strcmp(strval, "type=disk") != 0) {
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "'%s' must be 'on', 'off', or 'type=disk'"),
-				    propname);
-				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
-				goto error;
+		case ZFS_PROP_MLSLABEL:
+		{
+			/*
+			 * Verify the mlslabel string and convert to
+			 * internal hex label string.
+			 */
+
+			m_label_t *new_sl;
+			char *hex = NULL;	/* internal label string */
+
+			/* Default value is already OK. */
+			if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
+				break;
+
+			/* Verify the label can be converted to binary form */
+			if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) ||
+			    (str_to_label(strval, &new_sl, MAC_LABEL,
+			    L_NO_CORRECTION, NULL) == -1)) {
+				goto badlabel;
 			}
 
+			/* Now translate to hex internal label string */
+			if (label_to_str(new_sl, &hex, M_INTERNAL,
+			    DEF_NAMES) != 0) {
+				if (hex)
+					free(hex);
+				goto badlabel;
+			}
+			m_label_free(new_sl);
+
+			/* If string is already in internal form, we're done. */
+			if (strcmp(strval, hex) == 0) {
+				free(hex);
+				break;
+			}
+
+			/* Replace the label string with the internal form. */
+			(void) nvlist_remove(ret, zfs_prop_to_name(prop),
+			    DATA_TYPE_STRING);
+			verify(nvlist_add_string(ret, zfs_prop_to_name(prop),
+			    hex) == 0);
+			free(hex);
+
 			break;
 
+badlabel:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid mlslabel '%s'"), strval);
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+			m_label_free(new_sl);	/* OK if null */
+			goto error;
+
+		}
+
 		case ZFS_PROP_MOUNTPOINT:
 		{
 			namecheck_err_t why;
@@ -1226,6 +1247,90 @@ error:
 	return (NULL);
 }
 
+void
+zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
+    char *errbuf)
+{
+	switch (err) {
+
+	case ENOSPC:
+		/*
+		 * For quotas and reservations, ENOSPC indicates
+		 * something different; setting a quota or reservation
+		 * doesn't use any disk space.
+		 */
+		switch (prop) {
+		case ZFS_PROP_QUOTA:
+		case ZFS_PROP_REFQUOTA:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "size is less than current used or "
+			    "reserved space"));
+			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+			break;
+
+		case ZFS_PROP_RESERVATION:
+		case ZFS_PROP_REFRESERVATION:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "size is greater than available space"));
+			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+			break;
+
+		default:
+			(void) zfs_standard_error(hdl, err, errbuf);
+			break;
+		}
+		break;
+
+	case EBUSY:
+		(void) zfs_standard_error(hdl, EBUSY, errbuf);
+		break;
+
+	case EROFS:
+		(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
+		break;
+
+	case ENOTSUP:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool and or dataset must be upgraded to set this "
+		    "property or value"));
+		(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+		break;
+
+	case ERANGE:
+		if (prop == ZFS_PROP_COMPRESSION) {
+			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "property setting is not allowed on "
+			    "bootable datasets"));
+			(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+		} else {
+			(void) zfs_standard_error(hdl, err, errbuf);
+		}
+		break;
+
+	case EINVAL:
+		if (prop == ZPROP_INVAL) {
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		} else {
+			(void) zfs_standard_error(hdl, err, errbuf);
+		}
+		break;
+
+	case EOVERFLOW:
+		/*
+		 * This platform can't address a volume this big.
+		 */
+#ifdef _ILP32
+		if (prop == ZFS_PROP_VOLSIZE) {
+			(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
+			break;
+		}
+#endif
+		/* FALLTHROUGH */
+	default:
+		(void) zfs_standard_error(hdl, err, errbuf);
+	}
+}
+
 /*
  * Given a property name and value, set the property for the given dataset.
  */
@@ -1294,79 +1399,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
 	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
 
 	if (ret != 0) {
-		switch (errno) {
-
-		case ENOSPC:
-			/*
-			 * For quotas and reservations, ENOSPC indicates
-			 * something different; setting a quota or reservation
-			 * doesn't use any disk space.
-			 */
-			switch (prop) {
-			case ZFS_PROP_QUOTA:
-			case ZFS_PROP_REFQUOTA:
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "size is less than current used or "
-				    "reserved space"));
-				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
-				break;
-
-			case ZFS_PROP_RESERVATION:
-			case ZFS_PROP_REFRESERVATION:
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "size is greater than available space"));
-				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
-				break;
-
-			default:
-				(void) zfs_standard_error(hdl, errno, errbuf);
-				break;
-			}
-			break;
-
-		case EBUSY:
-			if (prop == ZFS_PROP_VOLBLOCKSIZE)
-				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
-			else
-				(void) zfs_standard_error(hdl, EBUSY, errbuf);
-			break;
-
-		case EROFS:
-			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
-			break;
-
-		case ENOTSUP:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "pool and or dataset must be upgraded to set this "
-			    "property or value"));
-			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
-			break;
-
-		case ERANGE:
-			if (prop == ZFS_PROP_COMPRESSION) {
-				(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "property setting is not allowed on "
-				    "bootable datasets"));
-				(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
-			} else {
-				(void) zfs_standard_error(hdl, errno, errbuf);
-			}
-			break;
-
-		case EOVERFLOW:
-			/*
-			 * This platform can't address a volume this big.
-			 */
-#ifdef _ILP32
-			if (prop == ZFS_PROP_VOLSIZE) {
-				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
-				break;
-			}
-#endif
-			/* FALLTHROUGH */
-		default:
-			(void) zfs_standard_error(hdl, errno, errbuf);
-		}
+		zfs_setprop_error(hdl, prop, errno, errbuf);
 	} else {
 		if (do_prefix)
 			ret = changelist_postfix(cl);
@@ -1388,10 +1421,11 @@ error:
 }
 
 /*
- * Given a property, inherit the value from the parent dataset.
+ * Given a property, inherit the value from the parent dataset, or if received
+ * is TRUE, revert to the received value, if any.
  */
 int
-zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
+zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
 {
 	zfs_cmd_t zc = { 0 };
 	int ret;
@@ -1403,6 +1437,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
 
+	zc.zc_cookie = received;
 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
 		/*
 		 * For user properties, the amount of work we have to do is very
@@ -1429,7 +1464,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
 	if (zfs_prop_readonly(prop))
 		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
 
-	if (!zfs_prop_inheritable(prop))
+	if (!zfs_prop_inheritable(prop) && !received)
 		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
 
 	/*
@@ -1534,6 +1569,26 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
 	return (value);
 }
 
+static boolean_t
+zfs_is_recvd_props_mode(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_props == zhp->zfs_recvd_props);
+}
+
+static void
+zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
+{
+	*cookie = (uint64_t)(uintptr_t)zhp->zfs_props;
+	zhp->zfs_props = zhp->zfs_recvd_props;
+}
+
+static void
+zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
+{
+	zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie;
+	*cookie = 0;
+}
+
 /*
  * Internal function for getting a numeric property.  Both zfs_prop_get() and
  * zfs_prop_get_int() are built using this interface.
@@ -1552,6 +1607,7 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
 	struct mnttab mnt;
 	char *mntopt_on = NULL;
 	char *mntopt_off = NULL;
+	boolean_t received = zfs_is_recvd_props_mode(zhp);
 
 	*source = NULL;
 
@@ -1627,6 +1683,9 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
 	case ZFS_PROP_NBMAND:
 		*val = getprop_uint64(zhp, prop, source);
 
+		if (received)
+			break;
+
 		if (hasmntopt(&mnt, mntopt_on) && !*val) {
 			*val = B_TRUE;
 			if (src)
@@ -1639,22 +1698,17 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
 		break;
 
 	case ZFS_PROP_CANMOUNT:
-		*val = getprop_uint64(zhp, prop, source);
-		if (*val != ZFS_CANMOUNT_ON)
-			*source = zhp->zfs_name;
-		else
-			*source = "";	/* default */
-		break;
-
+	case ZFS_PROP_VOLSIZE:
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFRESERVATION:
 		*val = getprop_uint64(zhp, prop, source);
-		if (*val == 0)
-			*source = "";	/* default */
-		else
+
+		if (*source == NULL) {
+			/* not default, must be local */
 			*source = zhp->zfs_name;
+		}
 		break;
 
 	case ZFS_PROP_MOUNTED:
@@ -1696,11 +1750,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
 			/*
 			 * If we tried to use a default value for a
 			 * readonly property, it means that it was not
-			 * present; return an error.
+			 * present.
 			 */
 			if (zfs_prop_readonly(prop) &&
-			    *source && (*source)[0] == '\0') {
-				return (-1);
+			    *source != NULL && (*source)[0] == '\0') {
+				*source = NULL;
 			}
 			break;
 
@@ -1730,6 +1784,8 @@ get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
 		*srctype = ZPROP_SRC_NONE;
 	} else if (source[0] == '\0') {
 		*srctype = ZPROP_SRC_DEFAULT;
+	} else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) {
+		*srctype = ZPROP_SRC_RECEIVED;
 	} else {
 		if (strcmp(source, zhp->zfs_name) == 0) {
 			*srctype = ZPROP_SRC_LOCAL;
@@ -1741,6 +1797,43 @@ get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
 
 }
 
+int
+zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,
+    size_t proplen, boolean_t literal)
+{
+	zfs_prop_t prop;
+	int err = 0;
+
+	if (zhp->zfs_recvd_props == NULL)
+		if (get_recvd_props_ioctl(zhp) != 0)
+			return (-1);
+
+	prop = zfs_name_to_prop(propname);
+
+	if (prop != ZPROP_INVAL) {
+		uint64_t cookie;
+		if (!nvlist_exists(zhp->zfs_recvd_props, propname))
+			return (-1);
+		zfs_set_recvd_props_mode(zhp, &cookie);
+		err = zfs_prop_get(zhp, prop, propbuf, proplen,
+		    NULL, NULL, 0, literal);
+		zfs_unset_recvd_props_mode(zhp, &cookie);
+	} else if (zfs_prop_userquota(propname)) {
+		return (-1);
+	} else {
+		nvlist_t *propval;
+		char *recvdval;
+		if (nvlist_lookup_nvlist(zhp->zfs_recvd_props,
+		    propname, &propval) != 0)
+			return (-1);
+		verify(nvlist_lookup_string(propval, ZPROP_VALUE,
+		    &recvdval) == 0);
+		(void) strlcpy(propbuf, recvdval, proplen);
+	}
+
+	return (err == 0 ? 0 : -1);
+}
+
 /*
  * Retrieve a property from the given object.  If 'literal' is specified, then
  * numbers are left as exact values.  Otherwise, numbers are converted to a
@@ -1756,6 +1849,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 	uint64_t val;
 	char *str;
 	const char *strval;
+	boolean_t received = zfs_is_recvd_props_mode(zhp);
 
 	/*
 	 * Check to see if this property applies to our object
@@ -1763,6 +1857,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
 		return (-1);
 
+	if (received && zfs_prop_readonly(prop))
+		return (-1);
+
 	if (src)
 		*src = ZPROP_SRC_NONE;
 
@@ -1802,10 +1899,22 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		if (str[0] == '/') {
 			char buf[MAXPATHLEN];
 			char *root = buf;
-			const char *relpath = zhp->zfs_name + strlen(source);
+			const char *relpath;
 
-			if (relpath[0] == '/')
-				relpath++;
+			/*
+			 * If we inherit the mountpoint, even from a dataset
+			 * with a received value, the source will be the path of
+			 * the dataset we inherit from. If source is
+			 * ZPROP_SOURCE_VAL_RECVD, the received value is not
+			 * inherited.
+			 */
+			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {
+				relpath = "";
+			} else {
+				relpath = zhp->zfs_name + strlen(source);
+				if (relpath[0] == '/')
+					relpath++;
+			}
 
 			if ((zpool_get_prop(zhp->zpool_hdl,
 			    ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) ||
@@ -1884,8 +1993,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 	case ZFS_PROP_COMPRESSRATIO:
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
-		(void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
-		    val / 100, (longlong_t)val % 100);
+		(void) snprintf(propbuf, proplen, "%llu.%02llux",
+		    (u_longlong_t)(val / 100),
+		    (u_longlong_t)(val % 100));
 		break;
 
 	case ZFS_PROP_TYPE:
@@ -1930,6 +2040,44 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
 		break;
 
+	case ZFS_PROP_MLSLABEL:
+		{
+			m_label_t *new_sl = NULL;
+			char *ascii = NULL;	/* human readable label */
+
+			(void) strlcpy(propbuf,
+			    getprop_string(zhp, prop, &source), proplen);
+
+			if (literal || (strcasecmp(propbuf,
+			    ZFS_MLSLABEL_DEFAULT) == 0))
+				break;
+
+			/*
+			 * Try to translate the internal hex string to
+			 * human-readable output.  If there are any
+			 * problems just use the hex string.
+			 */
+
+			if (str_to_label(propbuf, &new_sl, MAC_LABEL,
+			    L_NO_CORRECTION, NULL) == -1) {
+				m_label_free(new_sl);
+				break;
+			}
+
+			if (label_to_str(new_sl, &ascii, M_LABEL,
+			    DEF_NAMES) != 0) {
+				if (ascii)
+					free(ascii);
+				m_label_free(new_sl);
+				break;
+			}
+			m_label_free(new_sl);
+
+			(void) strlcpy(propbuf, ascii, proplen);
+			free(ascii);
+		}
+		break;
+
 	default:
 		switch (zfs_prop_get_type(prop)) {
 		case PROP_TYPE_NUMBER:
@@ -2371,6 +2519,27 @@ zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 }
 
 /*
+ * Is one dataset name a child dataset of another?
+ *
+ * Needs to handle these cases:
+ * Dataset 1	"a/foo"		"a/foo"		"a/foo"		"a/foo"
+ * Dataset 2	"a/fo"		"a/foobar"	"a/bar/baz"	"a/foo/bar"
+ * Descendant?	No.		No.		No.		Yes.
+ */
+static boolean_t
+is_descendant(const char *ds1, const char *ds2)
+{
+	size_t d1len = strlen(ds1);
+
+	/* ds2 can't be a descendant if it's smaller */
+	if (strlen(ds2) < d1len)
+		return (B_FALSE);
+
+	/* otherwise, compare strings and verify that there's a '/' char */
+	return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0));
+}
+
+/*
  * Given a complete name, return just the portion that refers to the parent.
  * Can return NULL if this is a pool.
  */
@@ -2405,6 +2574,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
 	char *slash;
 	zfs_handle_t *zhp;
 	char errbuf[1024];
+	uint64_t is_zoned;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot create '%s'"), path);
@@ -2447,9 +2617,12 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
 			return (zfs_standard_error(hdl, errno, errbuf));
 	}
 
-	*zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	if (zoned != NULL)
+		*zoned = is_zoned;
+
 	/* we are in a non-global zone, but parent is in the global zone */
-	if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
+	if (getzoneid() != GLOBAL_ZONEID && !is_zoned) {
 		(void) zfs_standard_error(hdl, EPERM, errbuf);
 		zfs_close(zhp);
 		return (-1);
@@ -2581,11 +2754,10 @@ int
 zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
 {
 	int prefix;
-	uint64_t zoned;
 	char *path_copy;
 	int rc;
 
-	if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0)
+	if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0)
 		return (-1);
 
 	if ((path_copy = strdup(path)) != NULL) {
@@ -2699,18 +2871,6 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
 	/* create the dataset */
 	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
 
-	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
-		ret = zvol_create_link(hdl, path);
-		if (ret) {
-			(void) zfs_standard_error(hdl, errno,
-			    dgettext(TEXT_DOMAIN,
-			    "Volume successfully created, but device links "
-			    "were not created"));
-			zcmd_free_nvlists(&zc);
-			return (-1);
-		}
-	}
-
 	zcmd_free_nvlists(&zc);
 
 	/* check for failure */
@@ -2773,18 +2933,6 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (ZFS_IS_VOLUME(zhp)) {
-		/*
-		 * If user doesn't have permissions to unshare volume, then
-		 * abort the request.  This would only happen for a
-		 * non-privileged user.
-		 */
-		if (zfs_unshare_iscsi(zhp) != 0) {
-			return (-1);
-		}
-
-		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
-			return (-1);
-
 		zc.zc_objset_type = DMU_OST_ZVOL;
 	} else {
 		zc.zc_objset_type = DMU_OST_ZFS;
@@ -2809,13 +2957,13 @@ struct destroydata {
 };
 
 static int
-zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
+zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct destroydata *dd = arg;
 	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	boolean_t closezhp = dd->closezhp;
-	int rv;
+	int rv = 0;
 
 	(void) strlcpy(name, zhp->zfs_name, sizeof (name));
 	(void) strlcat(name, "@", sizeof (name));
@@ -2827,17 +2975,9 @@ zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
 		zfs_close(szhp);
 	}
 
-	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		(void) zvol_remove_link(zhp->zfs_hdl, name);
-		/*
-		 * NB: this is simply a best-effort.  We don't want to
-		 * return an error, because then we wouldn't visit all
-		 * the volumes.
-		 */
-	}
-
 	dd->closezhp = B_TRUE;
-	rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
+	if (!dd->gotone)
+		rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg);
 	if (closezhp)
 		zfs_close(zhp);
 	return (rv);
@@ -2854,7 +2994,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
 	struct destroydata dd = { 0 };
 
 	dd.snapname = snapname;
-	(void) zfs_remove_link_cb(zhp, &dd);
+	(void) zfs_check_snap_cb(zhp, &dd);
 
 	if (!dd.gotone) {
 		return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
@@ -2972,70 +3112,11 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
 			return (zfs_standard_error(zhp->zfs_hdl, errno,
 			    errbuf));
 		}
-	} else if (ZFS_IS_VOLUME(zhp)) {
-		ret = zvol_create_link(zhp->zfs_hdl, target);
 	}
 
 	return (ret);
 }
 
-typedef struct promote_data {
-	char cb_mountpoint[MAXPATHLEN];
-	const char *cb_target;
-	const char *cb_errbuf;
-	uint64_t cb_pivot_txg;
-} promote_data_t;
-
-static int
-promote_snap_cb(zfs_handle_t *zhp, void *data)
-{
-	promote_data_t *pd = data;
-	zfs_handle_t *szhp;
-	char snapname[MAXPATHLEN];
-	int rv = 0;
-
-	/* We don't care about snapshots after the pivot point */
-	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
-		zfs_close(zhp);
-		return (0);
-	}
-
-	/* Remove the device link if it's a zvol. */
-	if (ZFS_IS_VOLUME(zhp))
-		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
-
-	/* Check for conflicting names */
-	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
-	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
-	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
-	if (szhp != NULL) {
-		zfs_close(szhp);
-		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
-		    "snapshot name '%s' from origin \n"
-		    "conflicts with '%s' from target"),
-		    zhp->zfs_name, snapname);
-		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
-	}
-	zfs_close(zhp);
-	return (rv);
-}
-
-static int
-promote_snap_done_cb(zfs_handle_t *zhp, void *data)
-{
-	promote_data_t *pd = data;
-
-	/* We don't care about snapshots after the pivot point */
-	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
-		/* Create the device link if it's a zvol. */
-		if (ZFS_IS_VOLUME(zhp))
-			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
-	}
-
-	zfs_close(zhp);
-	return (0);
-}
-
 /*
  * Promotes the given clone fs to be the clone parent.
  */
@@ -3045,10 +3126,7 @@ zfs_promote(zfs_handle_t *zhp)
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zfs_cmd_t zc = { 0 };
 	char parent[MAXPATHLEN];
-	char *cp;
 	int ret;
-	zfs_handle_t *pzhp;
-	promote_data_t pd;
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -3066,29 +3144,7 @@ zfs_promote(zfs_handle_t *zhp)
 		    "not a cloned filesystem"));
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
-	cp = strchr(parent, '@');
-	*cp = '\0';
-
-	/* Walk the snapshots we will be moving */
-	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
-	if (pzhp == NULL)
-		return (-1);
-	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
-	zfs_close(pzhp);
-	pd.cb_target = zhp->zfs_name;
-	pd.cb_errbuf = errbuf;
-	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
-	if (pzhp == NULL)
-		return (-1);
-	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
-	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
-	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
-	if (ret != 0) {
-		zfs_close(pzhp);
-		return (-1);
-	}
 
-	/* issue the ioctl */
 	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
 	    sizeof (zc.zc_value));
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -3097,62 +3153,18 @@ zfs_promote(zfs_handle_t *zhp)
 	if (ret != 0) {
 		int save_errno = errno;
 
-		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
-		zfs_close(pzhp);
-
 		switch (save_errno) {
 		case EEXIST:
-			/*
-			 * There is a conflicting snapshot name.  We
-			 * should have caught this above, but they could
-			 * have renamed something in the mean time.
-			 */
+			/* There is a conflicting snapshot name. */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "conflicting snapshot name from parent '%s'"),
-			    parent);
+			    "conflicting snapshot '%s' from parent '%s'"),
+			    zc.zc_string, parent);
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, save_errno, errbuf));
 		}
-	} else {
-		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
-	}
-
-	zfs_close(pzhp);
-	return (ret);
-}
-
-struct createdata {
-	const char *cd_snapname;
-	int cd_ifexists;
-};
-
-static int
-zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
-{
-	struct createdata *cd = arg;
-	int ret;
-
-	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		char name[MAXPATHLEN];
-
-		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
-		(void) strlcat(name, "@", sizeof (name));
-		(void) strlcat(name, cd->cd_snapname, sizeof (name));
-		(void) zvol_create_link_common(zhp->zfs_hdl, name,
-		    cd->cd_ifexists);
-		/*
-		 * NB: this is simply a best-effort.  We don't want to
-		 * return an error, because then we wouldn't visit all
-		 * the volumes.
-		 */
 	}
-
-	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
-
-	zfs_close(zhp);
-
 	return (ret);
 }
 
@@ -3216,31 +3228,11 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
 	 * if it was recursive, the one that actually failed will be in
 	 * zc.zc_name.
 	 */
-	if (ret != 0)
+	if (ret != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
-
-	if (ret == 0 && recursive) {
-		struct createdata cd;
-
-		cd.cd_snapname = delim + 1;
-		cd.cd_ifexists = B_FALSE;
-		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
-	}
-	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		ret = zvol_create_link(zhp->zfs_hdl, path);
-		if (ret != 0) {
-			(void) zfs_standard_error(hdl, errno,
-			    dgettext(TEXT_DOMAIN,
-			    "Volume successfully snapshotted, but device links "
-			    "were not created"));
-			zfs_close(zhp);
-			return (-1);
-		}
-	}
-
-	if (ret != 0)
 		(void) zfs_standard_error(hdl, errno, errbuf);
+	}
 
 	zfs_close(zhp);
 
@@ -3343,8 +3335,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 	 */
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
-			return (-1);
 		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
 			return (-1);
 		old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
@@ -3382,10 +3372,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 	 */
 	if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
 	    (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
-		if (err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name)) {
-			zfs_close(zhp);
-			return (err);
-		}
 		if (restore_resv) {
 			new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 			if (old_volsize != new_volsize)
@@ -3500,14 +3486,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 
 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-		uint64_t unused;
 
 		/* validate parents */
-		if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0)
+		if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0)
 			return (-1);
 
-		(void) parent_name(target, parent, sizeof (parent));
-
 		/* make sure we're in the same pool */
 		verify((delim = strchr(target, '/')) != NULL);
 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
@@ -3518,10 +3501,9 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 		}
 
 		/* new name cannot be a child of the current dataset name */
-		if (strncmp(parent, zhp->zfs_name,
-		    strlen(zhp->zfs_name)) == 0) {
+		if (is_descendant(zhp->zfs_name, target)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "New dataset name cannot be a descendent of "
+			    "New dataset name cannot be a descendant of "
 			    "current dataset name"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
@@ -3538,7 +3520,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 	}
 
 	if (recursive) {
-		struct destroydata dd;
 
 		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
 		if (parentname == NULL) {
@@ -3553,15 +3534,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 			goto error;
 		}
 
-		dd.snapname = delim + 1;
-		dd.gotone = B_FALSE;
-		dd.closezhp = B_TRUE;
-
-		/* We remove any zvol links prior to renaming them */
-		ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
-		if (ret) {
-			goto error;
-		}
 	} else {
 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL)
 			return (-1);
@@ -3609,27 +3581,10 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 		 * On failure, we still want to remount any filesystems that
 		 * were previously mounted, so we don't alter the system state.
 		 */
-		if (recursive) {
-			struct createdata cd;
-
-			/* only create links for datasets that had existed */
-			cd.cd_snapname = delim + 1;
-			cd.cd_ifexists = B_TRUE;
-			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
-			    &cd);
-		} else {
+		if (!recursive)
 			(void) changelist_postfix(cl);
-		}
 	} else {
-		if (recursive) {
-			struct createdata cd;
-
-			/* only create links for datasets that had existed */
-			cd.cd_snapname = strchr(target, '@') + 1;
-			cd.cd_ifexists = B_TRUE;
-			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
-			    &cd);
-		} else {
+		if (!recursive) {
 			changelist_rename(cl, zfs_get_name(zhp), target);
 			ret = changelist_postfix(cl);
 		}
@@ -3648,143 +3603,19 @@ error:
 	return (ret);
 }
 
-/*
- * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
- * poke devfsadm to create the /dev link, and then wait for the link to appear.
- */
-int
-zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
-{
-	return (zvol_create_link_common(hdl, dataset, B_FALSE));
-}
-
-static int
-zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
-{
-	zfs_cmd_t zc = { 0 };
-	di_devlink_handle_t dhdl;
-	priv_set_t *priv_effective;
-	int privileged;
-
-	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
-	/*
-	 * Issue the appropriate ioctl.
-	 */
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
-		switch (errno) {
-		case EEXIST:
-			/*
-			 * Silently ignore the case where the link already
-			 * exists.  This allows 'zfs volinit' to be run multiple
-			 * times without errors.
-			 */
-			return (0);
-
-		case ENOENT:
-			/*
-			 * Dataset does not exist in the kernel.  If we
-			 * don't care (see zfs_rename), then ignore the
-			 * error quietly.
-			 */
-			if (ifexists) {
-				return (0);
-			}
-
-			/* FALLTHROUGH */
-
-		default:
-			return (zfs_standard_error_fmt(hdl, errno,
-			    dgettext(TEXT_DOMAIN, "cannot create device links "
-			    "for '%s'"), dataset));
-		}
-	}
-
-	/*
-	 * If privileged call devfsadm and wait for the links to
-	 * magically appear.
-	 * Otherwise, print out an informational message.
-	 */
-
-	priv_effective = priv_allocset();
-	(void) getppriv(PRIV_EFFECTIVE, priv_effective);
-	privileged = (priv_isfullset(priv_effective) == B_TRUE);
-	priv_freeset(priv_effective);
-
-	if (privileged) {
-		if ((dhdl = di_devlink_init(ZFS_DRIVER,
-		    DI_MAKE_LINK)) == NULL) {
-			zfs_error_aux(hdl, strerror(errno));
-			(void) zfs_error_fmt(hdl, errno,
-			    dgettext(TEXT_DOMAIN, "cannot create device links "
-			    "for '%s'"), dataset);
-			(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
-			return (-1);
-		} else {
-			(void) di_devlink_fini(&dhdl);
-		}
-	} else {
-		char pathname[MAXPATHLEN];
-		struct stat64 statbuf;
-		int i;
-
-#define	MAX_WAIT	10
-
-		/*
-		 * This is the poor mans way of waiting for the link
-		 * to show up.  If after 10 seconds we still don't
-		 * have it, then print out a message.
-		 */
-		(void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
-		    dataset);
-
-		for (i = 0; i != MAX_WAIT; i++) {
-			if (stat64(pathname, &statbuf) == 0)
-				break;
-			(void) sleep(1);
-		}
-		if (i == MAX_WAIT)
-			(void) printf(gettext("%s may not be immediately "
-			    "available\n"), pathname);
-	}
-
-	return (0);
-}
-
-/*
- * Remove a minor node for the given zvol and the associated /dev links.
- */
-int
-zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+nvlist_t *
+zfs_get_user_props(zfs_handle_t *zhp)
 {
-	zfs_cmd_t zc = { 0 };
-
-	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
-	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
-		switch (errno) {
-		case ENXIO:
-			/*
-			 * Silently ignore the case where the link no longer
-			 * exists, so that 'zfs volfini' can be run multiple
-			 * times without errors.
-			 */
-			return (0);
-
-		default:
-			return (zfs_standard_error_fmt(hdl, errno,
-			    dgettext(TEXT_DOMAIN, "cannot remove device "
-			    "links for '%s'"), dataset));
-		}
-	}
-
-	return (0);
+	return (zhp->zfs_user_props);
 }
 
 nvlist_t *
-zfs_get_user_props(zfs_handle_t *zhp)
+zfs_get_recvd_props(zfs_handle_t *zhp)
 {
-	return (zhp->zfs_user_props);
+	if (zhp->zfs_recvd_props == NULL)
+		if (get_recvd_props_ioctl(zhp) != 0)
+			return (NULL);
+	return (zhp->zfs_recvd_props);
 }
 
 /*
@@ -3796,10 +3627,12 @@ zfs_get_user_props(zfs_handle_t *zhp)
  *        for new unique user properties and add them to the list.
  *
  *      - For non fixed-width properties, keep track of the maximum width seen
- *        so that we can size the column appropriately.
+ *        so that we can size the column appropriately. If the user has
+ *        requested received property values, we also need to compute the width
+ *        of the RECEIVED column.
  */
 int
-zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
+zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zprop_list_t *entry;
@@ -3870,12 +3703,24 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
 				if (strlen(buf) > entry->pl_width)
 					entry->pl_width = strlen(buf);
 			}
-		} else if (nvlist_lookup_nvlist(userprops,
-		    entry->pl_user_prop, &propval)  == 0) {
-			verify(nvlist_lookup_string(propval,
-			    ZPROP_VALUE, &strval) == 0);
-			if (strlen(strval) > entry->pl_width)
-				entry->pl_width = strlen(strval);
+			if (received && zfs_prop_get_recvd(zhp,
+			    zfs_prop_to_name(entry->pl_prop),
+			    buf, sizeof (buf), B_FALSE) == 0)
+				if (strlen(buf) > entry->pl_recvd_width)
+					entry->pl_recvd_width = strlen(buf);
+		} else {
+			if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop,
+			    &propval) == 0) {
+				verify(nvlist_lookup_string(propval,
+				    ZPROP_VALUE, &strval) == 0);
+				if (strlen(strval) > entry->pl_width)
+					entry->pl_width = strlen(strval);
+			}
+			if (received && zfs_prop_get_recvd(zhp,
+			    entry->pl_user_prop,
+			    buf, sizeof (buf), B_FALSE) == 0)
+				if (strlen(buf) > entry->pl_recvd_width)
+					entry->pl_recvd_width = strlen(buf);
 		}
 	}
 
@@ -3883,52 +3728,6 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
 }
 
 int
-zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
-{
-	zfs_cmd_t zc = { 0 };
-	nvlist_t *nvp;
-	gid_t gid;
-	uid_t uid;
-	const gid_t *groups;
-	int group_cnt;
-	int error;
-
-	if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0)
-		return (no_memory(hdl));
-
-	uid = ucred_geteuid(cred);
-	gid = ucred_getegid(cred);
-	group_cnt = ucred_getgroups(cred, &groups);
-
-	if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1)
-		return (1);
-
-	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) {
-		nvlist_free(nvp);
-		return (1);
-	}
-
-	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) {
-		nvlist_free(nvp);
-		return (1);
-	}
-
-	if (nvlist_add_uint32_array(nvp,
-	    ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) {
-		nvlist_free(nvp);
-		return (1);
-	}
-	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
-	if (zcmd_write_src_nvlist(hdl, &zc, nvp))
-		return (-1);
-
-	error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc);
-	nvlist_free(nvp);
-	return (error);
-}
-
-int
 zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
     char *resource, void *export, void *sharetab,
     int sharemax, zfs_share_op_t operation)
@@ -3966,9 +3765,11 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
 		nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr);
 
 		/*
-		 * We leave user:props in the nvlist, so there will be
-		 * some ZPROP_INVAL.  To be extra safe, don't prune
-		 * those.
+		 * User properties will result in ZPROP_INVAL, and since we
+		 * only know how to prune standard ZFS properties, we always
+		 * leave these in the list.  This can also happen if we
+		 * encounter an unknown DSL property (when running older
+		 * software, for example).
 		 */
 		if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
 			(void) nvlist_remove(zhp->zfs_props,
@@ -4097,15 +3898,18 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
 
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
-    boolean_t recursive)
+    boolean_t recursive, boolean_t temphold, boolean_t enoent_ok)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
-	(void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string));
+	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
+	    >= sizeof (zc.zc_string))
+		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
 	zc.zc_cookie = recursive;
+	zc.zc_temphold = temphold;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
 		char errbuf[ZFS_MAXNAMELEN+32];
@@ -4117,6 +3921,14 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot hold '%s@%s'"), zc.zc_name, snapname);
 		switch (errno) {
+		case E2BIG:
+			/*
+			 * Temporary tags wind up having the ds object id
+			 * prepended. So even if we passed the length check
+			 * above, it's still possible for the tag to wind
+			 * up being slightly too long.
+			 */
+			return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
@@ -4125,6 +3937,10 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		case EEXIST:
 			return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
+		case ENOENT:
+			if (enoent_ok)
+				return (0);
+			/* FALLTHROUGH */
 		default:
 			return (zfs_standard_error_fmt(hdl, errno, errbuf));
 		}
@@ -4133,6 +3949,102 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
 	return (0);
 }
 
+struct hold_range_arg {
+	zfs_handle_t	*origin;
+	const char	*fromsnap;
+	const char	*tosnap;
+	char		lastsnapheld[ZFS_MAXNAMELEN];
+	const char	*tag;
+	boolean_t	temphold;
+	boolean_t	seento;
+	boolean_t	seenfrom;
+	boolean_t	holding;
+	boolean_t	recursive;
+	snapfilter_cb_t	*filter_cb;
+	void		*filter_cb_arg;
+};
+
+static int
+zfs_hold_range_one(zfs_handle_t *zhp, void *arg)
+{
+	struct hold_range_arg *hra = arg;
+	const char *thissnap;
+	int error;
+
+	thissnap = strchr(zfs_get_name(zhp), '@') + 1;
+
+	if (hra->fromsnap && !hra->seenfrom &&
+	    strcmp(hra->fromsnap, thissnap) == 0)
+		hra->seenfrom = B_TRUE;
+
+	/* snap is older or newer than the desired range, ignore it */
+	if (hra->seento || !hra->seenfrom) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (!hra->seento && strcmp(hra->tosnap, thissnap) == 0)
+		hra->seento = B_TRUE;
+
+	if (hra->filter_cb != NULL &&
+	    hra->filter_cb(zhp, hra->filter_cb_arg) == B_FALSE) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (hra->holding) {
+		/* We could be racing with destroy, so ignore ENOENT. */
+		error = zfs_hold(hra->origin, thissnap, hra->tag,
+		    hra->recursive, hra->temphold, B_TRUE);
+		if (error == 0) {
+			(void) strlcpy(hra->lastsnapheld, zfs_get_name(zhp),
+			    sizeof (hra->lastsnapheld));
+		}
+	} else {
+		error = zfs_release(hra->origin, thissnap, hra->tag,
+		    hra->recursive);
+	}
+
+	zfs_close(zhp);
+	return (error);
+}
+
+/*
+ * Add a user hold on the set of snapshots starting with fromsnap up to
+ * and including tosnap. If we're unable to to acquire a particular hold,
+ * undo any holds up to that point.
+ */
+int
+zfs_hold_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    const char *tag, boolean_t recursive, boolean_t temphold,
+    snapfilter_cb_t filter_cb, void *cbarg)
+{
+	struct hold_range_arg arg = { 0 };
+	int error;
+
+	arg.origin = zhp;
+	arg.fromsnap = fromsnap;
+	arg.tosnap = tosnap;
+	arg.tag = tag;
+	arg.temphold = temphold;
+	arg.holding = B_TRUE;
+	arg.recursive = recursive;
+	arg.seenfrom = (fromsnap == NULL);
+	arg.filter_cb = filter_cb;
+	arg.filter_cb_arg = cbarg;
+
+	error = zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg);
+
+	/*
+	 * Make sure we either hold the entire range or none.
+	 */
+	if (error && arg.lastsnapheld[0] != '\0') {
+		(void) zfs_release_range(zhp, fromsnap,
+		    (const char *)arg.lastsnapheld, tag, recursive);
+	}
+	return (error);
+}
+
 int
 zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive)
@@ -4142,7 +4054,9 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
-	(void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string));
+	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
+	    >= sizeof (zc.zc_string))
+		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
 	zc.zc_cookie = recursive;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
@@ -4153,7 +4067,8 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
 		 * zc.zc_name.
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot release '%s@%s'"), zc.zc_name, snapname);
+		    "cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
+		    snapname);
 		switch (errno) {
 		case ESRCH:
 			return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
@@ -4170,3 +4085,61 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
 
 	return (0);
 }
+
+/*
+ * Release a user hold from the set of snapshots starting with fromsnap
+ * up to and including tosnap.
+ */
+int
+zfs_release_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    const char *tag, boolean_t recursive)
+{
+	struct hold_range_arg arg = { 0 };
+
+	arg.origin = zhp;
+	arg.fromsnap = fromsnap;
+	arg.tosnap = tosnap;
+	arg.tag = tag;
+	arg.recursive = recursive;
+	arg.seenfrom = (fromsnap == NULL);
+
+	return (zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg));
+}
+
+uint64_t
+zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
+{
+	uint64_t numdb;
+	uint64_t nblocks, volblocksize;
+	int ncopies;
+	char *strval;
+
+	if (nvlist_lookup_string(props,
+	    zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0)
+		ncopies = atoi(strval);
+	else
+		ncopies = 1;
+	if (nvlist_lookup_uint64(props,
+	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+	    &volblocksize) != 0)
+		volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
+	nblocks = volsize/volblocksize;
+	/* start with metadnode L0-L6 */
+	numdb = 7;
+	/* calculate number of indirects */
+	while (nblocks > 1) {
+		nblocks += DNODES_PER_LEVEL - 1;
+		nblocks /= DNODES_PER_LEVEL;
+		numdb += nblocks;
+	}
+	numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1);
+	volsize *= ncopies;
+	/*
+	 * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't
+	 * compressed, but in practice they compress down to about
+	 * 1100 bytes
+	 */
+	numdb *= 1ULL << DN_MAX_INDBLKSHIFT;
+	volsize += numdb;
+	return (volsize);
+}
diff --git a/lib/libzfs/libzfs_fru.c b/lib/libzfs/libzfs_fru.c
new file mode 100644
index 000000000..788fa2cfb
--- /dev/null
+++ b/lib/libzfs/libzfs_fru.c
@@ -0,0 +1,452 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <libintl.h>
+#include <link.h>
+#include <pthread.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include <fm/libtopo.h>
+#include <sys/fm/protocol.h>
+#include <sys/systeminfo.h>
+
+#include "libzfs_impl.h"
+
+/*
+ * This file is responsible for determining the relationship between I/O
+ * devices paths and physical locations.  In the world of MPxIO and external
+ * enclosures, the device path is not synonymous with the physical location.
+ * If you remove a drive and insert it into a different slot, it will end up
+ * with the same path under MPxIO.  If you recable storage enclosures, the
+ * device paths may change.  All of this makes it difficult to implement the
+ * 'autoreplace' property, which is supposed to automatically manage disk
+ * replacement based on physical slot.
+ *
+ * In order to work around these limitations, we have a per-vdev FRU property
+ * that is the libtopo path (minus disk-specific authority information) to the
+ * physical location of the device on the system.  This is an optional
+ * property, and is only needed when using the 'autoreplace' property or when
+ * generating FMA faults against vdevs.
+ */
+
+/*
+ * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
+ * it is not present.  We only need this once per library instance, so it is
+ * not part of the libzfs handle.
+ */
+static void *_topo_dlhandle;
+static topo_hdl_t *(*_topo_open)(int, const char *, int *);
+static void (*_topo_close)(topo_hdl_t *);
+static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
+static void (*_topo_snap_release)(topo_hdl_t *);
+static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
+    topo_walk_cb_t, void *, int *);
+static int (*_topo_walk_step)(topo_walk_t *, int);
+static void (*_topo_walk_fini)(topo_walk_t *);
+static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
+static char *(*_topo_node_name)(tnode_t *);
+static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
+    char **, int *);
+static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
+static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
+static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
+    const char *);
+
+#define	ZFS_FRU_HASH_SIZE	257
+
+static size_t
+fru_strhash(const char *key)
+{
+	ulong_t g, h = 0;
+	const char *p;
+
+	for (p = key; *p != '\0'; p++) {
+		h = (h << 4) + *p;
+
+		if ((g = (h & 0xf0000000)) != 0) {
+			h ^= (g >> 24);
+			h ^= g;
+		}
+	}
+
+	return (h % ZFS_FRU_HASH_SIZE);
+}
+
+static int
+libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
+{
+	libzfs_handle_t *hdl = arg;
+	nvlist_t *fru;
+	char *devpath, *frustr;
+	int err;
+	libzfs_fru_t *frup;
+	size_t idx;
+
+	/*
+	 * If this is the chassis node, and we don't yet have the system
+	 * chassis ID, then fill in this value now.
+	 */
+	if (hdl->libzfs_chassis_id[0] == '\0' &&
+	    strcmp(_topo_node_name(tn), "chassis") == 0) {
+		if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
+		    FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
+			(void) strlcpy(hdl->libzfs_chassis_id, devpath,
+			    sizeof (hdl->libzfs_chassis_id));
+	}
+
+	/*
+	 * Skip non-disk nodes.
+	 */
+	if (strcmp(_topo_node_name(tn), "disk") != 0)
+		return (TOPO_WALK_NEXT);
+
+	/*
+	 * Get the devfs path and FRU.
+	 */
+	if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
+		return (TOPO_WALK_NEXT);
+
+	if (libzfs_fru_lookup(hdl, devpath) != NULL) {
+		_topo_hdl_strfree(thp, devpath);
+		return (TOPO_WALK_NEXT);
+	}
+
+	if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
+		_topo_hdl_strfree(thp, devpath);
+		return (TOPO_WALK_NEXT);
+	}
+
+	/*
+	 * Convert the FRU into a string.
+	 */
+	if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
+		nvlist_free(fru);
+		_topo_hdl_strfree(thp, devpath);
+		return (TOPO_WALK_NEXT);
+	}
+
+	nvlist_free(fru);
+
+	/*
+	 * Finally, we have a FRU string and device path.  Add it to the hash.
+	 */
+	if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
+		_topo_hdl_strfree(thp, devpath);
+		_topo_hdl_strfree(thp, frustr);
+		return (TOPO_WALK_NEXT);
+	}
+
+	if ((frup->zf_device = strdup(devpath)) == NULL ||
+	    (frup->zf_fru = strdup(frustr)) == NULL) {
+		free(frup->zf_device);
+		free(frup);
+		_topo_hdl_strfree(thp, devpath);
+		_topo_hdl_strfree(thp, frustr);
+		return (TOPO_WALK_NEXT);
+	}
+
+	_topo_hdl_strfree(thp, devpath);
+	_topo_hdl_strfree(thp, frustr);
+
+	idx = fru_strhash(frup->zf_device);
+	frup->zf_chain = hdl->libzfs_fru_hash[idx];
+	hdl->libzfs_fru_hash[idx] = frup;
+	frup->zf_next = hdl->libzfs_fru_list;
+	hdl->libzfs_fru_list = frup;
+
+	return (TOPO_WALK_NEXT);
+}
+
+/*
+ * Called during initialization to setup the dynamic libtopo connection.
+ */
+#pragma init(libzfs_init_fru)
+static void
+libzfs_init_fru(void)
+{
+	char path[MAXPATHLEN];
+	char isa[257];
+
+#if defined(_LP64)
+	if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
+		isa[0] = '\0';
+#else
+	isa[0] = '\0';
+#endif
+	(void) snprintf(path, sizeof (path),
+	    "/usr/lib/fm/%s/libtopo.so", isa);
+
+	if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
+		return;
+
+	_topo_open = (topo_hdl_t *(*)())
+	    dlsym(_topo_dlhandle, "topo_open");
+	_topo_close = (void (*)())
+	    dlsym(_topo_dlhandle, "topo_close");
+	_topo_snap_hold = (char *(*)())
+	    dlsym(_topo_dlhandle, "topo_snap_hold");
+	_topo_snap_release = (void (*)())
+	    dlsym(_topo_dlhandle, "topo_snap_release");
+	_topo_walk_init = (topo_walk_t *(*)())
+	    dlsym(_topo_dlhandle, "topo_walk_init");
+	_topo_walk_step = (int (*)())
+	    dlsym(_topo_dlhandle, "topo_walk_step");
+	_topo_walk_fini = (void (*)())
+	    dlsym(_topo_dlhandle, "topo_walk_fini");
+	_topo_hdl_strfree = (void (*)())
+	    dlsym(_topo_dlhandle, "topo_hdl_strfree");
+	_topo_node_name = (char *(*)())
+	    dlsym(_topo_dlhandle, "topo_node_name");
+	_topo_prop_get_string = (int (*)())
+	    dlsym(_topo_dlhandle, "topo_prop_get_string");
+	_topo_node_fru = (int (*)())
+	    dlsym(_topo_dlhandle, "topo_node_fru");
+	_topo_fmri_nvl2str = (int (*)())
+	    dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
+	_topo_fmri_strcmp_noauth = (int (*)())
+	    dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
+
+	if (_topo_open == NULL || _topo_close == NULL ||
+	    _topo_snap_hold == NULL || _topo_snap_release == NULL ||
+	    _topo_walk_init == NULL || _topo_walk_step == NULL ||
+	    _topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
+	    _topo_node_name == NULL || _topo_prop_get_string == NULL ||
+	    _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
+	    _topo_fmri_strcmp_noauth == NULL) {
+		(void) dlclose(_topo_dlhandle);
+		_topo_dlhandle = NULL;
+	}
+}
+
+/*
+ * Refresh the mappings from device path -> FMRI.  We do this by walking the
+ * hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
+ * Note that we strip out the disk-specific authority information (serial,
+ * part, revision, etc) so that we are left with only the identifying
+ * characteristics of the slot (hc path and chassis-id).
+ */
+void
+libzfs_fru_refresh(libzfs_handle_t *hdl)
+{
+	int err;
+	char *uuid;
+	topo_hdl_t *thp;
+	topo_walk_t *twp;
+
+	if (_topo_dlhandle == NULL)
+		return;
+
+	/*
+	 * Clear the FRU hash and initialize our basic structures.
+	 */
+	libzfs_fru_clear(hdl, B_FALSE);
+
+	if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
+	    NULL, &err)) == NULL)
+		return;
+
+	thp = hdl->libzfs_topo_hdl;
+
+	if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
+		return;
+
+	_topo_hdl_strfree(thp, uuid);
+
+	if (hdl->libzfs_fru_hash == NULL &&
+	    (hdl->libzfs_fru_hash =
+	    calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL)
+		return;
+
+	/*
+	 * We now have a topo snapshot, so iterate over the hc topology looking
+	 * for disks to add to the hash.
+	 */
+	twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
+	    libzfs_fru_gather, hdl, &err);
+	if (twp != NULL) {
+		(void) _topo_walk_step(twp, TOPO_WALK_CHILD);
+		_topo_walk_fini(twp);
+	}
+}
+
+/*
+ * Given a devfs path, return the FRU for the device, if known.  This will
+ * automatically call libzfs_fru_refresh() if it hasn't already been called by
+ * the consumer.  The string returned is valid until the next call to
+ * libzfs_fru_refresh().
+ */
+const char *
+libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
+{
+	size_t idx = fru_strhash(devpath);
+	libzfs_fru_t *frup;
+
+	if (hdl->libzfs_fru_hash == NULL)
+		libzfs_fru_refresh(hdl);
+
+	if (hdl->libzfs_fru_hash == NULL)
+		return (NULL);
+
+	for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
+	    frup = frup->zf_chain) {
+		if (strcmp(devpath, frup->zf_device) == 0)
+			return (frup->zf_fru);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Given a fru path, return the device path.  This will automatically call
+ * libzfs_fru_refresh() if it hasn't already been called by the consumer.  The
+ * string returned is valid until the next call to libzfs_fru_refresh().
+ */
+const char *
+libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
+{
+	libzfs_fru_t *frup;
+	size_t idx;
+
+	if (hdl->libzfs_fru_hash == NULL)
+		libzfs_fru_refresh(hdl);
+
+	if (hdl->libzfs_fru_hash == NULL)
+		return (NULL);
+
+	for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
+		for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
+		    frup = frup->zf_next) {
+			if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
+			    fru, frup->zf_fru))
+				return (frup->zf_device);
+		}
+	}
+
+	return (NULL);
+}
+
+/*
+ * Change the stored FRU for the given vdev.
+ */
+int
+zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	(void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
+	zc.zc_guid = vdev_guid;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
+		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot set FRU")));
+
+	return (0);
+}
+
+/*
+ * Compare to two FRUs, ignoring any authority information.
+ */
+boolean_t
+libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
+{
+	if (hdl->libzfs_fru_hash == NULL)
+		libzfs_fru_refresh(hdl);
+
+	if (hdl->libzfs_fru_hash == NULL)
+		return (strcmp(a, b) == 0);
+
+	return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
+}
+
+/*
+ * This special function checks to see whether the FRU indicates it's supposed
+ * to be in the system chassis, but the chassis-id doesn't match.  This can
+ * happen in a clustered case, where both head nodes have the same logical
+ * disk, but opening the device on the other head node is meaningless.
+ */
+boolean_t
+libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
+{
+	const char *chassisid;
+	size_t len;
+
+	if (hdl->libzfs_fru_hash == NULL)
+		libzfs_fru_refresh(hdl);
+
+	if (hdl->libzfs_chassis_id[0] == '\0')
+		return (B_FALSE);
+
+	if (strstr(fru, "/chassis=0/") == NULL)
+		return (B_FALSE);
+
+	if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
+		return (B_FALSE);
+
+	chassisid += 12;
+	len = strlen(hdl->libzfs_chassis_id);
+	if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
+	    (chassisid[len] == '/' || chassisid[len] == ':'))
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * Clear memory associated with the FRU hash.
+ */
+void
+libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
+{
+	libzfs_fru_t *frup;
+
+	while ((frup = hdl->libzfs_fru_list) != NULL) {
+		hdl->libzfs_fru_list = frup->zf_next;
+		free(frup->zf_device);
+		free(frup->zf_fru);
+		free(frup);
+	}
+
+	hdl->libzfs_fru_list = NULL;
+
+	if (hdl->libzfs_topo_hdl != NULL) {
+		_topo_snap_release(hdl->libzfs_topo_hdl);
+		_topo_close(hdl->libzfs_topo_hdl);
+		hdl->libzfs_topo_hdl = NULL;
+	}
+
+	if (final) {
+		free(hdl->libzfs_fru_hash);
+	} else if (hdl->libzfs_fru_hash != NULL) {
+		bzero(hdl->libzfs_fru_hash,
+		    ZFS_FRU_HASH_SIZE * sizeof (void *));
+	}
+}
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index d67776889..fd3044b1d 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Pool import support functions.
  *
@@ -41,15 +39,21 @@
  * using our derived config, and record the results.
  */
 
+#include <ctype.h>
 #include <devid.h>
 #include <dirent.h>
 #include <errno.h>
 #include <libintl.h>
+#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <sys/vtoc.h>
+#include <sys/dktp/fdisk.h>
+#include <sys/efi_partition.h>
+#include <thread_pool.h>
 
 #include <sys/vdev_impl.h>
 
@@ -388,8 +392,6 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
 	}
 
 	if (err) {
-		(void) zpool_standard_error(hdl, errno,
-		    dgettext(TEXT_DOMAIN, "cannot discover pools"));
 		zcmd_free_nvlists(&zc);
 		return (NULL);
 	}
@@ -404,6 +406,21 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
 }
 
 /*
+ * Determine if the vdev id is a hole in the namespace.
+ */
+boolean_t
+vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
+{
+	for (int c = 0; c < holes; c++) {
+
+		/* Top-level is a hole */
+		if (hole_array[c] == id)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
  * Convert our list of pools into the definitive set of configurations.  We
  * start by picking the best config for each toplevel vdev.  Once that's done,
  * we assemble the toplevel vdevs into a full config for the pool.  We make a
@@ -425,17 +442,20 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 	uint64_t version, guid;
 	uint_t children = 0;
 	nvlist_t **child = NULL;
+	uint_t holes;
+	uint64_t *hole_array, max_id;
 	uint_t c;
 	boolean_t isactive;
 	uint64_t hostid;
 	nvlist_t *nvl;
 	boolean_t found_one = B_FALSE;
+	boolean_t valid_top_config = B_FALSE;
 
 	if (nvlist_alloc(&ret, 0, 0) != 0)
 		goto nomem;
 
 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
-		uint64_t id;
+		uint64_t id, max_txg = 0;
 
 		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
 			goto nomem;
@@ -463,6 +483,42 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 				}
 			}
 
+			/*
+			 * We rely on the fact that the max txg for the
+			 * pool will contain the most up-to-date information
+			 * about the valid top-levels in the vdev namespace.
+			 */
+			if (best_txg > max_txg) {
+				(void) nvlist_remove(config,
+				    ZPOOL_CONFIG_VDEV_CHILDREN,
+				    DATA_TYPE_UINT64);
+				(void) nvlist_remove(config,
+				    ZPOOL_CONFIG_HOLE_ARRAY,
+				    DATA_TYPE_UINT64_ARRAY);
+
+				max_txg = best_txg;
+				hole_array = NULL;
+				holes = 0;
+				max_id = 0;
+				valid_top_config = B_FALSE;
+
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
+					verify(nvlist_add_uint64(config,
+					    ZPOOL_CONFIG_VDEV_CHILDREN,
+					    max_id) == 0);
+					valid_top_config = B_TRUE;
+				}
+
+				if (nvlist_lookup_uint64_array(tmp,
+				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
+				    &holes) == 0) {
+					verify(nvlist_add_uint64_array(config,
+					    ZPOOL_CONFIG_HOLE_ARRAY,
+					    hole_array, holes) == 0);
+				}
+			}
+
 			if (!config_seen) {
 				/*
 				 * Copy the relevant pieces of data to the pool
@@ -522,6 +578,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
 			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
 			    &id) == 0);
+
 			if (id >= children) {
 				nvlist_t **newchild;
 
@@ -542,17 +599,82 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 
 		}
 
+		/*
+		 * If we have information about all the top-levels then
+		 * clean up the nvlist which we've constructed. This
+		 * means removing any extraneous devices that are
+		 * beyond the valid range or adding devices to the end
+		 * of our array which appear to be missing.
+		 */
+		if (valid_top_config) {
+			if (max_id < children) {
+				for (c = max_id; c < children; c++)
+					nvlist_free(child[c]);
+				children = max_id;
+			} else if (max_id > children) {
+				nvlist_t **newchild;
+
+				newchild = zfs_alloc(hdl, (max_id) *
+				    sizeof (nvlist_t *));
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				free(child);
+				child = newchild;
+				children = max_id;
+			}
+		}
+
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 
 		/*
+		 * The vdev namespace may contain holes as a result of
+		 * device removal. We must add them back into the vdev
+		 * tree before we process any missing devices.
+		 */
+		if (holes > 0) {
+			ASSERT(valid_top_config);
+
+			for (c = 0; c < children; c++) {
+				nvlist_t *holey;
+
+				if (child[c] != NULL ||
+				    !vdev_is_hole(hole_array, holes, c))
+					continue;
+
+				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+
+				/*
+				 * Holes in the namespace are treated as
+				 * "hole" top-level vdevs and have a
+				 * special flag set on them.
+				 */
+				if (nvlist_add_string(holey,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_HOLE) != 0 ||
+				    nvlist_add_uint64(holey,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(holey,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0)
+					goto nomem;
+				child[c] = holey;
+			}
+		}
+
+		/*
 		 * Look for any missing top-level vdevs.  If this is the case,
 		 * create a faked up 'missing' vdev as a placeholder.  We cannot
 		 * simply compress the child array, because the kernel performs
 		 * certain checks to make sure the vdev IDs match their location
 		 * in the configuration.
 		 */
-		for (c = 0; c < children; c++)
+		for (c = 0; c < children; c++) {
 			if (child[c] == NULL) {
 				nvlist_t *missing;
 				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
@@ -570,6 +692,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 				}
 				child[c] = missing;
 			}
+		}
 
 		/*
 		 * Put all of this pool's top-level vdevs into a root vdev.
@@ -636,8 +759,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 			continue;
 		}
 
-		if ((nvl = refresh_config(hdl, config)) == NULL)
-			goto error;
+		if ((nvl = refresh_config(hdl, config)) == NULL) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
 
 		nvlist_free(config);
 		config = nvl;
@@ -777,6 +903,212 @@ zpool_read_label(int fd, nvlist_t **config)
 	return (0);
 }
 
+typedef struct rdsk_node {
+	char *rn_name;
+	int rn_dfd;
+	libzfs_handle_t *rn_hdl;
+	nvlist_t *rn_config;
+	avl_tree_t *rn_avl;
+	avl_node_t rn_node;
+	boolean_t rn_nozpool;
+} rdsk_node_t;
+
+static int
+slice_cache_compare(const void *arg1, const void *arg2)
+{
+	const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
+	const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
+	char *nm1slice, *nm2slice;
+	int rv;
+
+	/*
+	 * slices zero and two are the most likely to provide results,
+	 * so put those first
+	 */
+	nm1slice = strstr(nm1, "s0");
+	nm2slice = strstr(nm2, "s0");
+	if (nm1slice && !nm2slice) {
+		return (-1);
+	}
+	if (!nm1slice && nm2slice) {
+		return (1);
+	}
+	nm1slice = strstr(nm1, "s2");
+	nm2slice = strstr(nm2, "s2");
+	if (nm1slice && !nm2slice) {
+		return (-1);
+	}
+	if (!nm1slice && nm2slice) {
+		return (1);
+	}
+
+	rv = strcmp(nm1, nm2);
+	if (rv == 0)
+		return (0);
+	return (rv > 0 ? 1 : -1);
+}
+
+static void
+check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
+    diskaddr_t size, uint_t blksz)
+{
+	rdsk_node_t tmpnode;
+	rdsk_node_t *node;
+	char sname[MAXNAMELEN];
+
+	tmpnode.rn_name = &sname[0];
+	(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
+	    diskname, partno);
+	/*
+	 * protect against division by zero for disk labels that
+	 * contain a bogus sector size
+	 */
+	if (blksz == 0)
+		blksz = DEV_BSIZE;
+	/* too small to contain a zpool? */
+	if ((size < (SPA_MINDEVSIZE / blksz)) &&
+	    (node = avl_find(r, &tmpnode, NULL)))
+		node->rn_nozpool = B_TRUE;
+}
+
+static void
+nozpool_all_slices(avl_tree_t *r, const char *sname)
+{
+	char diskname[MAXNAMELEN];
+	char *ptr;
+	int i;
+
+	(void) strncpy(diskname, sname, MAXNAMELEN);
+	if (((ptr = strrchr(diskname, 's')) == NULL) &&
+	    ((ptr = strrchr(diskname, 'p')) == NULL))
+		return;
+	ptr[0] = 's';
+	ptr[1] = '\0';
+	for (i = 0; i < NDKMAP; i++)
+		check_one_slice(r, diskname, i, 0, 1);
+	ptr[0] = 'p';
+	for (i = 0; i <= FD_NUMPART; i++)
+		check_one_slice(r, diskname, i, 0, 1);
+}
+
+static void
+check_slices(avl_tree_t *r, int fd, const char *sname)
+{
+	struct extvtoc vtoc;
+	struct dk_gpt *gpt;
+	char diskname[MAXNAMELEN];
+	char *ptr;
+	int i;
+
+	(void) strncpy(diskname, sname, MAXNAMELEN);
+	if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
+		return;
+	ptr[1] = '\0';
+
+	if (read_extvtoc(fd, &vtoc) >= 0) {
+		for (i = 0; i < NDKMAP; i++)
+			check_one_slice(r, diskname, i,
+			    vtoc.v_part[i].p_size, vtoc.v_sectorsz);
+	} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
+		/*
+		 * on x86 we'll still have leftover links that point
+		 * to slices s[9-15], so use NDKMAP instead
+		 */
+		for (i = 0; i < NDKMAP; i++)
+			check_one_slice(r, diskname, i,
+			    gpt->efi_parts[i].p_size, gpt->efi_lbasize);
+		/* nodes p[1-4] are never used with EFI labels */
+		ptr[0] = 'p';
+		for (i = 1; i <= FD_NUMPART; i++)
+			check_one_slice(r, diskname, i, 0, 1);
+		efi_free(gpt);
+	}
+}
+
+static void
+zpool_open_func(void *arg)
+{
+	rdsk_node_t *rn = arg;
+	struct stat64 statbuf;
+	nvlist_t *config;
+	int fd;
+
+	if (rn->rn_nozpool)
+		return;
+	if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
+		/* symlink to a device that's no longer there */
+		if (errno == ENOENT)
+			nozpool_all_slices(rn->rn_avl, rn->rn_name);
+		return;
+	}
+	/*
+	 * Ignore failed stats.  We only want regular
+	 * files, character devs and block devs.
+	 */
+	if (fstat64(fd, &statbuf) != 0 ||
+	    (!S_ISREG(statbuf.st_mode) &&
+	    !S_ISCHR(statbuf.st_mode) &&
+	    !S_ISBLK(statbuf.st_mode))) {
+		(void) close(fd);
+		return;
+	}
+	/* this file is too small to hold a zpool */
+	if (S_ISREG(statbuf.st_mode) &&
+	    statbuf.st_size < SPA_MINDEVSIZE) {
+		(void) close(fd);
+		return;
+	} else if (!S_ISREG(statbuf.st_mode)) {
+		/*
+		 * Try to read the disk label first so we don't have to
+		 * open a bunch of minor nodes that can't have a zpool.
+		 */
+		check_slices(rn->rn_avl, fd, rn->rn_name);
+	}
+
+	if ((zpool_read_label(fd, &config)) != 0) {
+		(void) close(fd);
+		(void) no_memory(rn->rn_hdl);
+		return;
+	}
+	(void) close(fd);
+
+
+	rn->rn_config = config;
+	if (config != NULL) {
+		assert(rn->rn_nozpool == B_FALSE);
+	}
+}
+
+/*
+ * Given a file descriptor, clear (zero) the label information.  This function
+ * is currently only used in the appliance stack as part of the ZFS sysevent
+ * module.
+ */
+int
+zpool_clear_label(int fd)
+{
+	struct stat64 statbuf;
+	int l;
+	vdev_label_t *label;
+	uint64_t size;
+
+	if (fstat64(fd, &statbuf) == -1)
+		return (0);
+	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
+		return (-1);
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+		if (pwrite64(fd, label, sizeof (vdev_label_t),
+		    label_offset(size, l)) != sizeof (vdev_label_t))
+			return (-1);
+	}
+
+	free(label);
+	return (0);
+}
+
 /*
  * Given a list of directories to search, find all pools stored on disk.  This
  * includes partial pools which are not available to import.  If no args are
@@ -785,30 +1117,28 @@ zpool_read_label(int fd, nvlist_t **config)
  * to import a specific pool.
  */
 static nvlist_t *
-zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
-    boolean_t active_ok, char *poolname, uint64_t guid)
+zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 {
-	int i;
+	int i, dirs = iarg->paths;
 	DIR *dirp = NULL;
 	struct dirent64 *dp;
 	char path[MAXPATHLEN];
-	char *end;
+	char *end, **dir = iarg->path;
 	size_t pathleft;
-	struct stat64 statbuf;
-	nvlist_t *ret = NULL, *config;
+	nvlist_t *ret = NULL;
 	static char *default_dir = "/dev/dsk";
-	int fd;
 	pool_list_t pools = { 0 };
 	pool_entry_t *pe, *penext;
 	vdev_entry_t *ve, *venext;
 	config_entry_t *ce, *cenext;
 	name_entry_t *ne, *nenext;
+	avl_tree_t slice_cache;
+	rdsk_node_t *slice;
+	void *cookie;
 
-	verify(poolname == NULL || guid == 0);
-
-	if (argc == 0) {
-		argc = 1;
-		argv = &default_dir;
+	if (dirs == 0) {
+		dirs = 1;
+		dir = &default_dir;
 	}
 
 	/*
@@ -816,15 +1146,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
 	 * possible device, organizing the information according to pool GUID
 	 * and toplevel GUID.
 	 */
-	for (i = 0; i < argc; i++) {
+	for (i = 0; i < dirs; i++) {
+		tpool_t *t;
 		char *rdsk;
 		int dfd;
 
 		/* use realpath to normalize the path */
-		if (realpath(argv[i], path) == 0) {
+		if (realpath(dir[i], path) == 0) {
 			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
-			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
-			    argv[i]);
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
 			goto error;
 		}
 		end = &path[strlen(path)];
@@ -851,6 +1181,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
 			goto error;
 		}
 
+		avl_create(&slice_cache, slice_cache_compare,
+		    sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
 		/*
 		 * This is not MT-safe, but we have no MT consumers of libzfs
 		 */
@@ -860,46 +1192,53 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
 			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
 				continue;
 
-			if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
-				continue;
-
-			/*
-			 * Ignore failed stats.  We only want regular
-			 * files, character devs and block devs.
-			 */
-			if (fstat64(fd, &statbuf) != 0 ||
-			    (!S_ISREG(statbuf.st_mode) &&
-			    !S_ISCHR(statbuf.st_mode) &&
-			    !S_ISBLK(statbuf.st_mode))) {
-				(void) close(fd);
-				continue;
-			}
-
-			if ((zpool_read_label(fd, &config)) != 0) {
-				(void) close(fd);
-				(void) no_memory(hdl);
-				goto error;
-			}
-
-			(void) close(fd);
-
-			if (config != NULL) {
+			slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+			slice->rn_name = zfs_strdup(hdl, name);
+			slice->rn_avl = &slice_cache;
+			slice->rn_dfd = dfd;
+			slice->rn_hdl = hdl;
+			slice->rn_nozpool = B_FALSE;
+			avl_add(&slice_cache, slice);
+		}
+		/*
+		 * create a thread pool to do all of this in parallel;
+		 * rn_nozpool is not protected, so this is racy in that
+		 * multiple tasks could decide that the same slice can
+		 * not hold a zpool, which is benign.  Also choose
+		 * double the number of processors; we hold a lot of
+		 * locks in the kernel, so going beyond this doesn't
+		 * buy us much.
+		 */
+		t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
+		    0, NULL);
+		for (slice = avl_first(&slice_cache); slice;
+		    (slice = avl_walk(&slice_cache, slice,
+		    AVL_AFTER)))
+			(void) tpool_dispatch(t, zpool_open_func, slice);
+		tpool_wait(t);
+		tpool_destroy(t);
+
+		cookie = NULL;
+		while ((slice = avl_destroy_nodes(&slice_cache,
+		    &cookie)) != NULL) {
+			if (slice->rn_config != NULL) {
+				nvlist_t *config = slice->rn_config;
 				boolean_t matched = B_TRUE;
 
-				if (poolname != NULL) {
+				if (iarg->poolname != NULL) {
 					char *pname;
 
 					matched = nvlist_lookup_string(config,
 					    ZPOOL_CONFIG_POOL_NAME,
 					    &pname) == 0 &&
-					    strcmp(poolname, pname) == 0;
-				} else if (guid != 0) {
+					    strcmp(iarg->poolname, pname) == 0;
+				} else if (iarg->guid != 0) {
 					uint64_t this_guid;
 
 					matched = nvlist_lookup_uint64(config,
 					    ZPOOL_CONFIG_POOL_GUID,
 					    &this_guid) == 0 &&
-					    guid == this_guid;
+					    iarg->guid == this_guid;
 				}
 				if (!matched) {
 					nvlist_free(config);
@@ -907,17 +1246,20 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
 					continue;
 				}
 				/* use the non-raw path for the config */
-				(void) strlcpy(end, name, pathleft);
+				(void) strlcpy(end, slice->rn_name, pathleft);
 				if (add_config(hdl, &pools, path, config) != 0)
 					goto error;
 			}
+			free(slice->rn_name);
+			free(slice);
 		}
+		avl_destroy(&slice_cache);
 
 		(void) closedir(dirp);
 		dirp = NULL;
 	}
 
-	ret = get_configs(hdl, &pools, active_ok);
+	ret = get_configs(hdl, &pools, iarg->can_be_active);
 
 error:
 	for (pe = pools.pools; pe != NULL; pe = penext) {
@@ -951,27 +1293,12 @@ error:
 nvlist_t *
 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
 {
-	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
-}
+	importargs_t iarg = { 0 };
 
-nvlist_t *
-zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
-    char *pool)
-{
-	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
-}
+	iarg.paths = argc;
+	iarg.path = argv;
 
-nvlist_t *
-zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
-    uint64_t guid)
-{
-	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
-}
-
-nvlist_t *
-zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
-{
-	return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
+	return (zpool_find_import_impl(hdl, &iarg));
 }
 
 /*
@@ -1093,6 +1420,46 @@ zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
 	return (pools);
 }
 
+static int
+name_or_guid_exists(zpool_handle_t *zhp, void *data)
+{
+	importargs_t *import = data;
+	int found = 0;
+
+	if (import->poolname != NULL) {
+		char *pool_name;
+
+		verify(nvlist_lookup_string(zhp->zpool_config,
+		    ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
+		if (strcmp(pool_name, import->poolname) == 0)
+			found = 1;
+	} else {
+		uint64_t pool_guid;
+
+		verify(nvlist_lookup_uint64(zhp->zpool_config,
+		    ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
+		if (pool_guid == import->guid)
+			found = 1;
+	}
+
+	zpool_close(zhp);
+	return (found);
+}
+
+nvlist_t *
+zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
+{
+	verify(import->poolname == NULL || import->guid == 0);
+
+	if (import->unique)
+		import->exists = zpool_iter(hdl, name_or_guid_exists, import);
+
+	if (import->cachefile != NULL)
+		return (zpool_find_import_cached(hdl, import->cachefile,
+		    import->poolname, import->guid));
+
+	return (zpool_find_import_impl(hdl, import));
+}
 
 boolean_t
 find_guid(nvlist_t *nv, uint64_t guid)
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
index 7810e5d68..0675ec229 100644
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -44,17 +43,14 @@
  *
  * 	zfs_is_shared_nfs()
  * 	zfs_is_shared_smb()
- * 	zfs_is_shared_iscsi()
  * 	zfs_share_proto()
  * 	zfs_shareall();
- * 	zfs_share_iscsi()
  * 	zfs_unshare_nfs()
  * 	zfs_unshare_smb()
  * 	zfs_unshareall_nfs()
  *	zfs_unshareall_smb()
  *	zfs_unshareall()
  *	zfs_unshareall_bypath()
- * 	zfs_unshare_iscsi()
  *
  * The following functions are available for pool consumers, and will
  * mount/unmount and share/unshare all datasets within pool:
@@ -89,11 +85,6 @@ static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
     zfs_share_proto_t);
 
-static int (*iscsitgt_zfs_share)(const char *);
-static int (*iscsitgt_zfs_unshare)(const char *);
-static int (*iscsitgt_zfs_is_shared)(const char *);
-static int (*iscsitgt_svc_online)();
-
 /*
  * The share protocols table must be in the same order as the zfs_share_prot_t
  * enum in libzfs_impl.h
@@ -125,29 +116,6 @@ zfs_share_proto_t share_all_proto[] = {
 	PROTO_END
 };
 
-#pragma init(zfs_iscsi_init)
-static void
-zfs_iscsi_init(void)
-{
-	void *libiscsitgt;
-
-	if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
-	    RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
-	    (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
-	    "iscsitgt_zfs_share")) == NULL ||
-	    (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
-	    "iscsitgt_zfs_unshare")) == NULL ||
-	    (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
-	    "iscsitgt_zfs_is_shared")) == NULL ||
-	    (iscsitgt_svc_online = (int (*)(const char *))dlsym(libiscsitgt,
-	    "iscsitgt_svc_online")) == NULL) {
-		iscsitgt_zfs_share = NULL;
-		iscsitgt_zfs_unshare = NULL;
-		iscsitgt_zfs_is_shared = NULL;
-		iscsitgt_svc_online = NULL;
-	}
-}
-
 /*
  * Search the sharetab for the given mountpoint and protocol, returning
  * a zfs_share_type_t value.
@@ -345,6 +313,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 		} else if (errno == EPERM) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Insufficient privileges"));
+		} else if (errno == ENOTSUP) {
+			char buf[256];
+			int spa_version;
+
+			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
+			(void) snprintf(buf, sizeof (buf),
+			    dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
+			    "file system on a version %d pool. Pool must be"
+			    " upgraded to mount this file system."),
+			    (u_longlong_t)zfs_prop_get_int(zhp,
+			    ZFS_PROP_VERSION), spa_version);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
 		} else {
 			zfs_error_aux(hdl, strerror(errno));
 		}
@@ -445,7 +425,7 @@ zfs_is_shared(zfs_handle_t *zhp)
 	zfs_share_proto_t *curr_proto;
 
 	if (ZFS_IS_VOLUME(zhp))
-		return (zfs_is_shared_iscsi(zhp));
+		return (B_FALSE);
 
 	for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
 	    curr_proto++)
@@ -458,7 +438,7 @@ int
 zfs_share(zfs_handle_t *zhp)
 {
 	if (ZFS_IS_VOLUME(zhp))
-		return (zfs_share_iscsi(zhp));
+		return (0);
 
 	return (zfs_share_proto(zhp, share_all_proto));
 }
@@ -467,7 +447,7 @@ int
 zfs_unshare(zfs_handle_t *zhp)
 {
 	if (ZFS_IS_VOLUME(zhp))
-		return (zfs_unshare_iscsi(zhp));
+		return (0);
 
 	return (zfs_unshareall(zhp));
 }
@@ -999,81 +979,6 @@ remove_mountpoint(zfs_handle_t *zhp)
 	}
 }
 
-boolean_t
-zfs_is_shared_iscsi(zfs_handle_t *zhp)
-{
-
-	/*
-	 * If iscsi deamon isn't running then we aren't shared
-	 */
-	if (iscsitgt_svc_online && iscsitgt_svc_online() == 1)
-		return (B_FALSE);
-	else
-		return (iscsitgt_zfs_is_shared != NULL &&
-		    iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
-}
-
-int
-zfs_share_iscsi(zfs_handle_t *zhp)
-{
-	char shareopts[ZFS_MAXPROPLEN];
-	const char *dataset = zhp->zfs_name;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-	/*
-	 * Return success if there are no share options.
-	 */
-	if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
-	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
-	    strcmp(shareopts, "off") == 0)
-		return (0);
-
-	if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) {
-		int error = EZFS_SHAREISCSIFAILED;
-
-		/*
-		 * If service isn't availabele and EPERM was
-		 * returned then use special error.
-		 */
-		if (iscsitgt_svc_online && errno == EPERM &&
-		    (iscsitgt_svc_online() != 0))
-			error = EZFS_ISCSISVCUNAVAIL;
-
-		return (zfs_error_fmt(hdl, error,
-		    dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
-	}
-
-	return (0);
-}
-
-int
-zfs_unshare_iscsi(zfs_handle_t *zhp)
-{
-	const char *dataset = zfs_get_name(zhp);
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-	/*
-	 * Return if the volume is not shared
-	 */
-	if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI)
-		return (0);
-
-	/*
-	 * If this fails with ENODEV it indicates that zvol wasn't shared so
-	 * we should return success in that case.
-	 */
-	if (iscsitgt_zfs_unshare == NULL ||
-	    (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) {
-		if (errno == EPERM)
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "Insufficient privileges to unshare iscsi"));
-		return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
-		    dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
-	}
-
-	return (0);
-}
-
 typedef struct mount_cbdata {
 	zfs_handle_t	**cb_datasets;
 	int 		cb_used;
@@ -1215,28 +1120,6 @@ out:
 	return (ret);
 }
 
-
-static int
-zvol_cb(const char *dataset, void *data)
-{
-	libzfs_handle_t *hdl = data;
-	zfs_handle_t *zhp;
-
-	/*
-	 * Ignore snapshots and ignore failures from non-existant datasets.
-	 */
-	if (strchr(dataset, '@') != NULL ||
-	    (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
-		return (0);
-
-	if (zfs_unshare_iscsi(zhp) != 0)
-		return (-1);
-
-	zfs_close(zhp);
-
-	return (0);
-}
-
 static int
 mountpoint_compare(const void *a, const void *b)
 {
@@ -1246,6 +1129,8 @@ mountpoint_compare(const void *a, const void *b)
 	return (strcmp(mountb, mounta));
 }
 
+/* alias for 2002/240 */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
 /*
  * Unshare and unmount all datasets within the given pool.  We don't want to
  * rely on traversing the DSL to discover the filesystems within the pool,
@@ -1253,7 +1138,6 @@ mountpoint_compare(const void *a, const void *b)
  * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
  * gather all the filesystems that are currently mounted.
  */
-#pragma weak zpool_unmount_datasets = zpool_disable_datasets
 int
 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
 {
@@ -1267,12 +1151,6 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
 	int ret = -1;
 	int flags = (force ? MS_FORCE : 0);
 
-	/*
-	 * First unshare all zvols.
-	 */
-	if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
-		return (-1);
-
 	namelen = strlen(zhp->zpool_name);
 
 	rewind(hdl->libzfs_mnttab);
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index fd734d8b4..7836e5873 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -20,33 +20,27 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
-#include <alloca.h>
-#include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <devid.h>
-#include <dirent.h>
 #include <fcntl.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
-#include <zone.h>
 #include <sys/efi_partition.h>
 #include <sys/vtoc.h>
 #include <sys/zfs_ioctl.h>
-#include <sys/zio.h>
-#include <strings.h>
 #include <dlfcn.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
+#include "zfs_comutil.h"
 
 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
 
@@ -193,6 +187,8 @@ zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
 	case VDEV_STATE_CANT_OPEN:
 		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
 			return (gettext("FAULTED"));
+		else if (aux == VDEV_AUX_SPLIT_POOL)
+			return (gettext("SPLIT"));
 		else
 			return (gettext("UNAVAIL"));
 	case VDEV_STATE_FAULTED:
@@ -273,8 +269,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
 
 		switch (prop) {
 		case ZPOOL_PROP_SIZE:
-		case ZPOOL_PROP_USED:
-		case ZPOOL_PROP_AVAILABLE:
+		case ZPOOL_PROP_ALLOCATED:
+		case ZPOOL_PROP_FREE:
 			(void) zfs_nicenum(intval, buf, len);
 			break;
 
@@ -283,11 +279,18 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
 			    (u_longlong_t)intval);
 			break;
 
+		case ZPOOL_PROP_DEDUPRATIO:
+			(void) snprintf(buf, len, "%llu.%02llux",
+			    (u_longlong_t)(intval / 100),
+			    (u_longlong_t)(intval % 100));
+			break;
+
 		case ZPOOL_PROP_HEALTH:
 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 			verify(nvlist_lookup_uint64_array(nvroot,
-			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
+			    == 0);
 
 			(void) strlcpy(buf, zpool_state_to_name(intval,
 			    vs->vs_aux), len);
@@ -1004,9 +1007,6 @@ zpool_destroy(zpool_handle_t *zhp)
 	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (-1);
 
-	if (zpool_remove_zvol_links(zhp) != 0)
-		return (-1);
-
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
@@ -1072,7 +1072,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "device '%s' contains an EFI label and "
 				    "cannot be used on root pools."),
-				    zpool_vdev_name(hdl, NULL, spares[s]));
+				    zpool_vdev_name(hdl, NULL, spares[s],
+				    B_FALSE));
 				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
 			}
 		}
@@ -1167,9 +1168,6 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 
-	if (zpool_remove_zvol_links(zhp) != 0)
-		return (-1);
-
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot export '%s'"), zhp->zpool_name);
 
@@ -1208,6 +1206,127 @@ zpool_export_force(zpool_handle_t *zhp)
 	return (zpool_export_common(zhp, B_TRUE, B_TRUE));
 }
 
+static void
+zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
+    nvlist_t *rbi)
+{
+	uint64_t rewindto;
+	int64_t loss = -1;
+	struct tm t;
+	char timestr[128];
+
+	if (!hdl->libzfs_printerr || rbi == NULL)
+		return;
+
+	if (nvlist_lookup_uint64(rbi, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
+		return;
+	(void) nvlist_lookup_int64(rbi, ZPOOL_CONFIG_REWIND_TIME, &loss);
+
+	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
+	    strftime(timestr, 128, 0, &t) != 0) {
+		if (dryrun) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "Would be able to return %s "
+			    "to its state as of %s.\n"),
+			    name, timestr);
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "Pool %s returned to its state as of %s.\n"),
+			    name, timestr);
+		}
+		if (loss > 120) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s approximately %lld "),
+			    dryrun ? "Would discard" : "Discarded",
+			    (loss + 30) / 60);
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "minutes of transactions.\n"));
+		} else if (loss > 0) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "%s approximately %lld "),
+			    dryrun ? "Would discard" : "Discarded", loss);
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "seconds of transactions.\n"));
+		}
+	}
+}
+
+void
+zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
+    nvlist_t *config)
+{
+	int64_t loss = -1;
+	uint64_t edata = UINT64_MAX;
+	uint64_t rewindto;
+	struct tm t;
+	char timestr[128];
+
+	if (!hdl->libzfs_printerr)
+		return;
+
+	if (reason >= 0)
+		(void) printf(dgettext(TEXT_DOMAIN, "action: "));
+	else
+		(void) printf(dgettext(TEXT_DOMAIN, "\t"));
+
+	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
+	if (nvlist_lookup_uint64(config,
+	    ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
+		goto no_info;
+
+	(void) nvlist_lookup_int64(config, ZPOOL_CONFIG_REWIND_TIME, &loss);
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
+	    &edata);
+
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "Recovery is possible, but will result in some data loss.\n"));
+
+	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
+	    strftime(timestr, 128, 0, &t) != 0) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "\tReturning the pool to its state as of %s\n"
+		    "\tshould correct the problem.  "),
+		    timestr);
+	} else {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "\tReverting the pool to an earlier state "
+		    "should correct the problem.\n\t"));
+	}
+
+	if (loss > 120) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Approximately %lld minutes of data\n"
+		    "\tmust be discarded, irreversibly.  "), (loss + 30) / 60);
+	} else if (loss > 0) {
+		(void) printf(dgettext(TEXT_DOMAIN,
+		    "Approximately %lld seconds of data\n"
+		    "\tmust be discarded, irreversibly.  "), loss);
+	}
+	if (edata != 0 && edata != UINT64_MAX) {
+		if (edata == 1) {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "After rewind, at least\n"
+			    "\tone persistent user-data error will remain.  "));
+		} else {
+			(void) printf(dgettext(TEXT_DOMAIN,
+			    "After rewind, several\n"
+			    "\tpersistent user-data errors will remain.  "));
+		}
+	}
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
+	    reason >= 0 ? "clear" : "import", name);
+
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "A scrub of the pool\n"
+	    "\tis strongly recommended after recovery.\n"));
+	return;
+
+no_info:
+	(void) printf(dgettext(TEXT_DOMAIN,
+	    "Destroy and re-create the pool from\n\ta backup source.\n"));
+}
+
 /*
  * zpool_import() is a contracted interface. Should be kept the same
  * if possible.
@@ -1257,8 +1376,11 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
     nvlist_t *props, boolean_t importfaulted)
 {
 	zfs_cmd_t zc = { 0 };
+	zpool_rewind_policy_t policy;
+	nvlist_t *nvi = NULL;
 	char *thename;
 	char *origname;
+	uint64_t returned_size;
 	int ret;
 	char errbuf[1024];
 
@@ -1302,11 +1424,30 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
 		nvlist_free(props);
 		return (-1);
 	}
+	returned_size =  zc.zc_nvlist_conf_size + 512;
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, returned_size) != 0) {
+		nvlist_free(props);
+		return (-1);
+	}
 
 	zc.zc_cookie = (uint64_t)importfaulted;
 	ret = 0;
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
 		char desc[1024];
+
+		(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
+		zpool_get_rewind_policy(config, &policy);
+		/*
+		 * Dry-run failed, but we print out what success
+		 * looks like if we found a best txg
+		 */
+		if ((policy.zrp_request & ZPOOL_TRY_REWIND) && nvi) {
+			zpool_rewind_exclaim(hdl, newname ? origname : thename,
+			    B_TRUE, nvi);
+			nvlist_free(nvi);
+			return (-1);
+		}
+
 		if (newname == NULL)
 			(void) snprintf(desc, sizeof (desc),
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
@@ -1328,8 +1469,19 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
 			break;
 
+		case EROFS:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is read only"));
+			(void) zfs_error(hdl, EZFS_BADDEV, desc);
+			break;
+
 		default:
+			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
 			(void) zpool_standard_error(hdl, errno, desc);
+			zpool_explain_recover(hdl,
+			    newname ? origname : thename, -errno, nvi);
+			nvlist_free(nvi);
+			break;
 		}
 
 		ret = -1;
@@ -1339,13 +1491,20 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
 		/*
 		 * This should never fail, but play it safe anyway.
 		 */
-		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
+		if (zpool_open_silent(hdl, thename, &zhp) != 0)
 			ret = -1;
-		} else if (zhp != NULL) {
-			ret = zpool_create_zvol_links(zhp);
+		else if (zhp != NULL)
 			zpool_close(zhp);
+		(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
+		zpool_get_rewind_policy(config, &policy);
+		if (policy.zrp_request &
+		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
+			zpool_rewind_exclaim(hdl, newname ? origname : thename,
+			    ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
+			    nvi);
 		}
-
+		nvlist_free(nvi);
+		return (0);
 	}
 
 	zcmd_free_nvlists(&zc);
@@ -1355,28 +1514,83 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
 }
 
 /*
- * Scrub the pool.
+ * Scan the pool.
  */
 int
-zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
+zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	zc.zc_cookie = type;
+	zc.zc_cookie = func;
 
-	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
+	    (errno == ENOENT && func != POOL_SCAN_NONE))
 		return (0);
 
-	(void) snprintf(msg, sizeof (msg),
-	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+	if (func == POOL_SCAN_SCRUB) {
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+	} else if (func == POOL_SCAN_NONE) {
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
+		    zc.zc_name);
+	} else {
+		assert(!"unexpected result");
+	}
 
-	if (errno == EBUSY)
-		return (zfs_error(hdl, EZFS_RESILVERING, msg));
-	else
+	if (errno == EBUSY) {
+		nvlist_t *nvroot;
+		pool_scan_stat_t *ps = NULL;
+		uint_t psc;
+
+		verify(nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+		(void) nvlist_lookup_uint64_array(nvroot,
+		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
+		if (ps && ps->pss_func == POOL_SCAN_SCRUB)
+			return (zfs_error(hdl, EZFS_SCRUBBING, msg));
+		else
+			return (zfs_error(hdl, EZFS_RESILVERING, msg));
+	} else if (errno == ENOENT) {
+		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
+	} else {
 		return (zpool_standard_error(hdl, errno, msg));
+	}
+}
+
+/*
+ * This provides a very minimal check whether a given string is likely a
+ * c#t#d# style string.  Users of this are expected to do their own
+ * verification of the s# part.
+ */
+#define	CTD_CHECK(str)  (str && str[0] == 'c' && isdigit(str[1]))
+
+/*
+ * More elaborate version for ones which may start with "/dev/dsk/"
+ * and the like.
+ */
+static int
+ctd_check_path(char *str) {
+	/*
+	 * If it starts with a slash, check the last component.
+	 */
+	if (str && str[0] == '/') {
+		char *tmp = strrchr(str, '/');
+
+		/*
+		 * If it ends in "/old", check the second-to-last
+		 * component of the string instead.
+		 */
+		if (tmp != str && strcmp(tmp, "/old") == 0) {
+			for (tmp--; *tmp != '/'; tmp--)
+				;
+		}
+		str = tmp + 1;
+	}
+	return (CTD_CHECK(str));
 }
 
 /*
@@ -1433,25 +1647,99 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
 			break;
 
 		/*
-		 * Search for the requested value. We special case the search
-		 * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise,
-		 * all other searches are simple string compares.
+		 * Search for the requested value. Special cases:
+		 *
+		 * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
+		 *   "s0" or "s0/old".  The "s0" part is hidden from the user,
+		 *   but included in the string, so this matches around it.
+		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
+		 *
+		 * Otherwise, all other searches are simple string compares.
 		 */
-		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) {
+		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
+		    ctd_check_path(val)) {
 			uint64_t wholedisk = 0;
 
 			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 			    &wholedisk);
 			if (wholedisk) {
+				int slen = strlen(srchval);
+				int vlen = strlen(val);
+
+				if (slen != vlen - 2)
+					break;
+
+				/*
+				 * make_leaf_vdev() should only set
+				 * wholedisk for ZPOOL_CONFIG_PATHs which
+				 * will include "/dev/dsk/", giving plenty of
+				 * room for the indices used next.
+				 */
+				ASSERT(vlen >= 6);
+
 				/*
-				 * For whole disks, the internal path has 's0',
-				 * but the path passed in by the user doesn't.
+				 * strings identical except trailing "s0"
 				 */
-				if (strlen(srchval) == strlen(val) - 2 &&
-				    strncmp(srchval, val, strlen(srchval)) == 0)
+				if (strcmp(&val[vlen - 2], "s0") == 0 &&
+				    strncmp(srchval, val, slen) == 0)
 					return (nv);
+
+				/*
+				 * strings identical except trailing "s0/old"
+				 */
+				if (strcmp(&val[vlen - 6], "s0/old") == 0 &&
+				    strcmp(&srchval[slen - 4], "/old") == 0 &&
+				    strncmp(srchval, val, slen - 4) == 0)
+					return (nv);
+
 				break;
 			}
+		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
+			char *type, *idx, *end, *p;
+			uint64_t id, vdev_id;
+
+			/*
+			 * Determine our vdev type, keeping in mind
+			 * that the srchval is composed of a type and
+			 * vdev id pair (i.e. mirror-4).
+			 */
+			if ((type = strdup(srchval)) == NULL)
+				return (NULL);
+
+			if ((p = strrchr(type, '-')) == NULL) {
+				free(type);
+				break;
+			}
+			idx = p + 1;
+			*p = '\0';
+
+			/*
+			 * If the types don't match then keep looking.
+			 */
+			if (strncmp(val, type, strlen(val)) != 0) {
+				free(type);
+				break;
+			}
+
+			verify(strncmp(type, VDEV_TYPE_RAIDZ,
+			    strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+			    strncmp(type, VDEV_TYPE_MIRROR,
+			    strlen(VDEV_TYPE_MIRROR)) == 0);
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+
+			errno = 0;
+			vdev_id = strtoull(idx, &end, 10);
+
+			free(type);
+			if (errno != 0)
+				return (NULL);
+
+			/*
+			 * Now verify that we have the correct vdev id.
+			 */
+			if (vdev_id == id)
+				return (nv);
 		}
 
 		/*
@@ -1537,6 +1825,18 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
 	return (ret);
 }
 
+/*
+ * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
+ */
+boolean_t
+zpool_vdev_is_interior(const char *name)
+{
+	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
@@ -1551,6 +1851,8 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
 	guid = strtoull(path, &end, 10);
 	if (guid != 0 && *end == '\0') {
 		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
+	} else if (zpool_vdev_is_interior(path)) {
+		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
 	} else if (path[0] != '/') {
 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
@@ -1721,34 +2023,6 @@ zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
 }
 
 /*
- * Returns TRUE if the given guid corresponds to the given type.
- * This is used to check for hot spares (INUSE or not), and level 2 cache
- * devices.
- */
-static boolean_t
-is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
-{
-	uint64_t target_guid;
-	nvlist_t *nvroot;
-	nvlist_t **list;
-	uint_t count;
-	int i;
-
-	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
-	    &nvroot) == 0);
-	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
-		for (i = 0; i < count; i++) {
-			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
-			    &target_guid) == 0);
-			if (guid == target_guid)
-				return (B_TRUE);
-		}
-	}
-
-	return (B_FALSE);
-}
-
-/*
  * If the device has being dynamically expanded then we need to relabel
  * the disk to use the new unallocated space.
  */
@@ -1816,8 +2090,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-	if (avail_spare ||
-	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if (flags & ZFS_ONLINE_EXPAND ||
@@ -1848,8 +2121,15 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
 	zc.zc_cookie = VDEV_STATE_ONLINE;
 	zc.zc_obj = flags;
 
-	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
+		if (errno == EINVAL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
+			    "from this pool into a new one.  Use '%s' "
+			    "instead"), "zpool detach");
+			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
+		}
 		return (zpool_standard_error(hdl, errno, msg));
+	}
 
 	*newstate = zc.zc_cookie;
 	return (0);
@@ -1877,8 +2157,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-	if (avail_spare ||
-	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	zc.zc_cookie = VDEV_STATE_OFFLINE;
@@ -1910,7 +2189,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
  * Mark the given vdev faulted.
  */
 int
-zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
+zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
@@ -1922,6 +2201,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = VDEV_STATE_FAULTED;
+	zc.zc_obj = aux;
 
 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
@@ -1944,7 +2224,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
  * Mark the given vdev degraded.
  */
 int
-zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
+zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
@@ -1956,6 +2236,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = VDEV_STATE_DEGRADED;
+	zc.zc_obj = aux;
 
 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
@@ -2053,7 +2334,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
 
-	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
+	if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
 		return (-1);
 
 	/*
@@ -2241,6 +2522,257 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
 }
 
 /*
+ * Find a mirror vdev in the source nvlist.
+ *
+ * The mchild array contains a list of disks in one of the top-level mirrors
+ * of the source pool.  The schild array contains a list of disks that the
+ * user specified on the command line.  We loop over the mchild array to
+ * see if any entry in the schild array matches.
+ *
+ * If a disk in the mchild array is found in the schild array, we return
+ * the index of that entry.  Otherwise we return -1.
+ */
+static int
+find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
+    nvlist_t **schild, uint_t schildren)
+{
+	uint_t mc;
+
+	for (mc = 0; mc < mchildren; mc++) {
+		uint_t sc;
+		char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
+		    mchild[mc], B_FALSE);
+
+		for (sc = 0; sc < schildren; sc++) {
+			char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
+			    schild[sc], B_FALSE);
+			boolean_t result = (strcmp(mpath, spath) == 0);
+
+			free(spath);
+			if (result) {
+				free(mpath);
+				return (mc);
+			}
+		}
+
+		free(mpath);
+	}
+
+	return (-1);
+}
+
+/*
+ * Split a mirror pool.  If newroot points to null, then a new nvlist
+ * is generated and it is the responsibility of the caller to free it.
+ */
+int
+zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
+    nvlist_t *props, splitflags_t flags)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
+	nvlist_t **varray = NULL, *zc_props = NULL;
+	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	uint64_t vers;
+	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
+	int retval = 0;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
+
+	if (!zpool_name_valid(hdl, B_FALSE, newname))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+
+	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
+		(void) fprintf(stderr, gettext("Internal error: unable to "
+		    "retrieve pool configuration\n"));
+		return (-1);
+	}
+
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
+	    == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
+
+	if (props) {
+		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
+		    props, vers, B_TRUE, msg)) == NULL)
+			return (-1);
+	}
+
+	if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "Source pool is missing vdev tree"));
+		if (zc_props)
+			nvlist_free(zc_props);
+		return (-1);
+	}
+
+	varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
+	vcount = 0;
+
+	if (*newroot == NULL ||
+	    nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
+	    &newchild, &newchildren) != 0)
+		newchildren = 0;
+
+	for (c = 0; c < children; c++) {
+		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
+		char *type;
+		nvlist_t **mchild, *vdev;
+		uint_t mchildren;
+		int entry;
+
+		/*
+		 * Unlike cache & spares, slogs are stored in the
+		 * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
+		 */
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+		    &is_log);
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+		    &is_hole);
+		if (is_log || is_hole) {
+			/*
+			 * Create a hole vdev and put it in the config.
+			 */
+			if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
+				goto out;
+			if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
+			    VDEV_TYPE_HOLE) != 0)
+				goto out;
+			if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
+			    1) != 0)
+				goto out;
+			if (lastlog == 0)
+				lastlog = vcount;
+			varray[vcount++] = vdev;
+			continue;
+		}
+		lastlog = 0;
+		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
+		    == 0);
+		if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Source pool must be composed only of mirrors\n"));
+			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+			goto out;
+		}
+
+		verify(nvlist_lookup_nvlist_array(child[c],
+		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
+
+		/* find or add an entry for this top-level vdev */
+		if (newchildren > 0 &&
+		    (entry = find_vdev_entry(zhp, mchild, mchildren,
+		    newchild, newchildren)) >= 0) {
+			/* We found a disk that the user specified. */
+			vdev = mchild[entry];
+			++found;
+		} else {
+			/* User didn't specify a disk for this vdev. */
+			vdev = mchild[mchildren - 1];
+		}
+
+		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
+			goto out;
+	}
+
+	/* did we find every disk the user specified? */
+	if (found != newchildren) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
+		    "include at most one disk from each mirror"));
+		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		goto out;
+	}
+
+	/* Prepare the nvlist for populating. */
+	if (*newroot == NULL) {
+		if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
+			goto out;
+		freelist = B_TRUE;
+		if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0)
+			goto out;
+	} else {
+		verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
+	}
+
+	/* Add all the children we found */
+	if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
+	    lastlog == 0 ? vcount : lastlog) != 0)
+		goto out;
+
+	/*
+	 * If we're just doing a dry run, exit now with success.
+	 */
+	if (flags.dryrun) {
+		memory_err = B_FALSE;
+		freelist = B_FALSE;
+		goto out;
+	}
+
+	/* now build up the config list & call the ioctl */
+	if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
+		goto out;
+
+	if (nvlist_add_nvlist(newconfig,
+	    ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
+	    nvlist_add_string(newconfig,
+	    ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
+	    nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
+		goto out;
+
+	/*
+	 * The new pool is automatically part of the namespace unless we
+	 * explicitly export it.
+	 */
+	if (!flags.import)
+		zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
+	if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
+		goto out;
+	if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
+		goto out;
+
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
+		retval = zpool_standard_error(hdl, errno, msg);
+		goto out;
+	}
+
+	freelist = B_FALSE;
+	memory_err = B_FALSE;
+
+out:
+	if (varray != NULL) {
+		int v;
+
+		for (v = 0; v < vcount; v++)
+			nvlist_free(varray[v]);
+		free(varray);
+	}
+	zcmd_free_nvlists(&zc);
+	if (zc_props)
+		nvlist_free(zc_props);
+	if (newconfig)
+		nvlist_free(newconfig);
+	if (freelist) {
+		nvlist_free(*newroot);
+		*newroot = NULL;
+	}
+
+	if (retval != 0)
+		return (retval);
+
+	if (memory_err)
+		return (no_memory(hdl));
+
+	return (0);
+}
+
+/*
  * Remove the given device.  Currently, this is supported only for hot spares
  * and level 2 cache devices.
  */
@@ -2250,24 +2782,34 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare, l2cache;
+	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	uint64_t version;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
-	    NULL)) == 0)
+	    &islog)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
-	if (!avail_spare && !l2cache) {
+	/*
+	 * XXX - this should just go away.
+	 */
+	if (!avail_spare && !l2cache && !islog) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "only inactive hot spares or cache devices "
-		    "can be removed"));
+		    "only inactive hot spares, cache, top-level, "
+		    "or log devices can be removed"));
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	}
 
+	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
+	if (islog && version < SPA_VERSION_HOLES) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool must be upgrade to support log removal"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
@@ -2280,13 +2822,15 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
  * Clear the errors for the pool, or the particular device if specified.
  */
 int
-zpool_clear(zpool_handle_t *zhp, const char *path)
+zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
+	zpool_rewind_policy_t policy;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	nvlist_t *nvi = NULL;
 
 	if (path)
 		(void) snprintf(msg, sizeof (msg),
@@ -2314,9 +2858,31 @@ zpool_clear(zpool_handle_t *zhp, const char *path)
 		    &zc.zc_guid) == 0);
 	}
 
-	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
+	zpool_get_rewind_policy(rewindnvl, &policy);
+	zc.zc_cookie = policy.zrp_request;
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 8192) != 0)
+		return (-1);
+
+	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, rewindnvl) != 0)
+		return (-1);
+
+	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0 ||
+	    ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
+	    errno != EPERM && errno != EACCES)) {
+		if (policy.zrp_request &
+		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
+			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
+			zpool_rewind_exclaim(hdl, zc.zc_name,
+			    ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
+			    nvi);
+			nvlist_free(nvi);
+		}
+		zcmd_free_nvlists(&zc);
 		return (0);
+	}
 
+	zcmd_free_nvlists(&zc);
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
@@ -2336,6 +2902,7 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
+	zc.zc_cookie = ZPOOL_NO_REWIND;
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
 		return (0);
@@ -2344,173 +2911,6 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 }
 
 /*
- * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
- * hierarchy.
- */
-int
-zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
-    void *data)
-{
-	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	char (*paths)[MAXPATHLEN];
-	size_t size = 4;
-	int curr, fd, base, ret = 0;
-	DIR *dirp;
-	struct dirent *dp;
-	struct stat st;
-
-	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
-		return (errno == ENOENT ? 0 : -1);
-
-	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
-		int err = errno;
-		(void) close(base);
-		return (err == ENOENT ? 0 : -1);
-	}
-
-	/*
-	 * Oddly this wasn't a directory -- ignore that failure since we
-	 * know there are no links lower in the (non-existant) hierarchy.
-	 */
-	if (!S_ISDIR(st.st_mode)) {
-		(void) close(base);
-		return (0);
-	}
-
-	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
-		(void) close(base);
-		return (-1);
-	}
-
-	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
-	curr = 0;
-
-	while (curr >= 0) {
-		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
-			goto err;
-
-		if (S_ISDIR(st.st_mode)) {
-			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
-				goto err;
-
-			if ((dirp = fdopendir(fd)) == NULL) {
-				(void) close(fd);
-				goto err;
-			}
-
-			while ((dp = readdir(dirp)) != NULL) {
-				if (dp->d_name[0] == '.')
-					continue;
-
-				if (curr + 1 == size) {
-					paths = zfs_realloc(hdl, paths,
-					    size * sizeof (paths[0]),
-					    size * 2 * sizeof (paths[0]));
-					if (paths == NULL) {
-						(void) closedir(dirp);
-						(void) close(fd);
-						goto err;
-					}
-
-					size *= 2;
-				}
-
-				(void) strlcpy(paths[curr + 1], paths[curr],
-				    sizeof (paths[curr + 1]));
-				(void) strlcat(paths[curr], "/",
-				    sizeof (paths[curr]));
-				(void) strlcat(paths[curr], dp->d_name,
-				    sizeof (paths[curr]));
-				curr++;
-			}
-
-			(void) closedir(dirp);
-
-		} else {
-			if ((ret = cb(paths[curr], data)) != 0)
-				break;
-		}
-
-		curr--;
-	}
-
-	free(paths);
-	(void) close(base);
-
-	return (ret);
-
-err:
-	free(paths);
-	(void) close(base);
-	return (-1);
-}
-
-typedef struct zvol_cb {
-	zpool_handle_t *zcb_pool;
-	boolean_t zcb_create;
-} zvol_cb_t;
-
-/*ARGSUSED*/
-static int
-do_zvol_create(zfs_handle_t *zhp, void *data)
-{
-	int ret = 0;
-
-	if (ZFS_IS_VOLUME(zhp)) {
-		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
-		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
-	}
-
-	if (ret == 0)
-		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
-
-	zfs_close(zhp);
-
-	return (ret);
-}
-
-/*
- * Iterate over all zvols in the pool and make any necessary minor nodes.
- */
-int
-zpool_create_zvol_links(zpool_handle_t *zhp)
-{
-	zfs_handle_t *zfp;
-	int ret;
-
-	/*
-	 * If the pool is unavailable, just return success.
-	 */
-	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
-	    zhp->zpool_name)) == NULL)
-		return (0);
-
-	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
-
-	zfs_close(zfp);
-	return (ret);
-}
-
-static int
-do_zvol_remove(const char *dataset, void *data)
-{
-	zpool_handle_t *zhp = data;
-
-	return (zvol_remove_link(zhp->zpool_hdl, dataset));
-}
-
-/*
- * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
- * by examining the /dev links so that a corrupted pool doesn't impede this
- * operation.
- */
-int
-zpool_remove_zvol_links(zpool_handle_t *zhp)
-{
-	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
-}
-
-/*
  * Convert from a devid string to a path.
  */
 static char *
@@ -2602,7 +3002,8 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
  * of these checks.
  */
 char *
-zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
+    boolean_t verbose)
 {
 	char *path, *devid;
 	uint64_t value;
@@ -2625,7 +3026,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
 		 * open a misbehaving device, which can have undesirable
 		 * effects.
 		 */
-		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &vsc) != 0 ||
 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
 		    zhp != NULL &&
@@ -2662,10 +3063,23 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    &value) == 0 && value) {
+			int pathlen = strlen(path);
 			char *tmp = zfs_strdup(hdl, path);
-			if (tmp == NULL)
-				return (NULL);
-			tmp[strlen(path) - 2] = '\0';
+
+			/*
+			 * If it starts with c#, and ends with "s0", chop
+			 * the "s0" off, or if it ends with "s0/old", remove
+			 * the "s0" from the middle.
+			 */
+			if (CTD_CHECK(tmp)) {
+				if (strcmp(&tmp[pathlen - 2], "s0") == 0) {
+					tmp[pathlen - 2] = '\0';
+				} else if (pathlen > 6 &&
+				    strcmp(&tmp[pathlen - 6], "s0/old") == 0) {
+					(void) strcpy(&tmp[pathlen - 6],
+					    "/old");
+				}
+			}
 			return (tmp);
 		}
 	} else {
@@ -2681,6 +3095,20 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
 			    (u_longlong_t)value);
 			path = buf;
 		}
+
+		/*
+		 * We identify each top-level vdev by using a <type-id>
+		 * naming convention.
+		 */
+		if (verbose) {
+			uint64_t id;
+
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+			(void) snprintf(buf, sizeof (buf), "%s-%llu", path,
+			    (u_longlong_t)id);
+			path = buf;
+		}
 	}
 
 	return (zfs_strdup(hdl, path));
@@ -2899,7 +3327,7 @@ get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
  * into 'records'.  'leftover' is set to the number of bytes that weren't
  * processed as there wasn't a complete record.
  */
-static int
+int
 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
     nvlist_t ***records, uint_t *numrecords)
 {
@@ -3218,6 +3646,7 @@ supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
 	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
 	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
 	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
+	    strcmp(type, VDEV_TYPE_HOLE) == 0 ||
 	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "vdev type '%s' is not supported"), type);
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 1ffb6294c..672e004ef 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -20,14 +20,12 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
-#include <libdevinfo.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -36,21 +34,394 @@
 #include <stddef.h>
 #include <fcntl.h>
 #include <sys/mount.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <sys/avl.h>
-#include <stddef.h>
+#include <pthread.h>
+#include <umem.h>
 
 #include <libzfs.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
+#include "zfs_fletcher.h"
 #include "libzfs_impl.h"
+#include <sha2.h>
+#include <sys/zio_checksum.h>
+#include <sys/ddt.h>
 
-#include <fletcher.c> /* XXX */
+/* in libzfs_dataset.c */
+extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
 
 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
-    int, avl_tree_t *, char **);
+    int, const char *, nvlist_t *, avl_tree_t *, char **);
+
+static const zio_cksum_t zero_cksum = { 0 };
+
+typedef struct dedup_arg {
+	int	inputfd;
+	int	outputfd;
+	libzfs_handle_t  *dedup_hdl;
+} dedup_arg_t;
+
+typedef struct dataref {
+	uint64_t ref_guid;
+	uint64_t ref_object;
+	uint64_t ref_offset;
+} dataref_t;
+
+typedef struct dedup_entry {
+	struct dedup_entry	*dde_next;
+	zio_cksum_t dde_chksum;
+	uint64_t dde_prop;
+	dataref_t dde_ref;
+} dedup_entry_t;
+
+#define	MAX_DDT_PHYSMEM_PERCENT		20
+#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
+
+typedef struct dedup_table {
+	dedup_entry_t	**dedup_hash_array;
+	umem_cache_t	*ddecache;
+	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
+	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
+	uint64_t	ddt_count;
+	int		numhashbits;
+	boolean_t	ddt_full;
+} dedup_table_t;
+
+static int
+high_order_bit(uint64_t n)
+{
+	int count;
+
+	for (count = 0; n != 0; count++)
+		n >>= 1;
+	return (count);
+}
+
+static size_t
+ssread(void *buf, size_t len, FILE *stream)
+{
+	size_t outlen;
+
+	if ((outlen = fread(buf, len, 1, stream)) == 0)
+		return (0);
+
+	return (outlen);
+}
+
+static void
+ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
+    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
+{
+	dedup_entry_t	*dde;
+
+	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
+		if (ddt->ddt_full == B_FALSE) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Dedup table full.  Deduplication will continue "
+			    "with existing table entries"));
+			ddt->ddt_full = B_TRUE;
+		}
+		return;
+	}
+
+	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
+	    != NULL) {
+		assert(*ddepp == NULL);
+		dde->dde_next = NULL;
+		dde->dde_chksum = *cs;
+		dde->dde_prop = prop;
+		dde->dde_ref = *dr;
+		*ddepp = dde;
+		ddt->cur_ddt_size += sizeof (dedup_entry_t);
+		ddt->ddt_count++;
+	}
+}
+
+/*
+ * Using the specified dedup table, do a lookup for an entry with
+ * the checksum cs.  If found, return the block's reference info
+ * in *dr. Otherwise, insert a new entry in the dedup table, using
+ * the reference information specified by *dr.
+ *
+ * return value:  true - entry was found
+ *		  false - entry was not found
+ */
+static boolean_t
+ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
+    uint64_t prop, dataref_t *dr)
+{
+	uint32_t hashcode;
+	dedup_entry_t **ddepp;
+
+	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
+
+	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
+	    ddepp = &((*ddepp)->dde_next)) {
+		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
+		    (*ddepp)->dde_prop == prop) {
+			*dr = (*ddepp)->dde_ref;
+			return (B_TRUE);
+		}
+	}
+	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
+	return (B_FALSE);
+}
+
+static int
+cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+{
+	fletcher_4_incremental_native(buf, len, zc);
+	return (write(outfd, buf, len));
+}
+
+/*
+ * This function is started in a separate thread when the dedup option
+ * has been requested.  The main send thread determines the list of
+ * snapshots to be included in the send stream and makes the ioctl calls
+ * for each one.  But instead of having the ioctl send the output to the
+ * the output fd specified by the caller of zfs_send()), the
+ * ioctl is told to direct the output to a pipe, which is read by the
+ * alternate thread running THIS function.  This function does the
+ * dedup'ing by:
+ *  1. building a dedup table (the DDT)
+ *  2. doing checksums on each data block and inserting a record in the DDT
+ *  3. looking for matching checksums, and
+ *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
+ *      a duplicate block is found.
+ * The output of this function then goes to the output fd requested
+ * by the caller of zfs_send().
+ */
+static void *
+cksummer(void *arg)
+{
+	dedup_arg_t *dda = arg;
+	char *buf = malloc(1<<20);
+	dmu_replay_record_t thedrr;
+	dmu_replay_record_t *drr = &thedrr;
+	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
+	struct drr_end *drre = &thedrr.drr_u.drr_end;
+	struct drr_object *drro = &thedrr.drr_u.drr_object;
+	struct drr_write *drrw = &thedrr.drr_u.drr_write;
+	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
+	FILE *ofp;
+	int outfd;
+	dmu_replay_record_t wbr_drr = {0};
+	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
+	dedup_table_t ddt;
+	zio_cksum_t stream_cksum;
+	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
+	uint64_t numbuckets;
+
+	ddt.max_ddt_size =
+	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
+	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+
+	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+
+	/*
+	 * numbuckets must be a power of 2.  Increase number to
+	 * a power of 2 if necessary.
+	 */
+	if (!ISP2(numbuckets))
+		numbuckets = 1 << high_order_bit(numbuckets);
+
+	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
+	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
+	    NULL, NULL, NULL, NULL, NULL, 0);
+	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
+	ddt.numhashbits = high_order_bit(numbuckets) - 1;
+	ddt.ddt_full = B_FALSE;
+
+	/* Initialize the write-by-reference block. */
+	wbr_drr.drr_type = DRR_WRITE_BYREF;
+	wbr_drr.drr_payloadlen = 0;
+
+	outfd = dda->outputfd;
+	ofp = fdopen(dda->inputfd, "r");
+	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+
+		switch (drr->drr_type) {
+		case DRR_BEGIN:
+		{
+			int	fflags;
+			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+
+			/* set the DEDUP feature flag for this stream */
+			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
+			    DMU_BACKUP_FEATURE_DEDUPPROPS);
+			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
+
+			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
+				int sz = drr->drr_payloadlen;
+
+				if (sz > 1<<20) {
+					free(buf);
+					buf = malloc(sz);
+				}
+				(void) ssread(buf, sz, ofp);
+				if (ferror(stdin))
+					perror("fread");
+				if (cksum_and_write(buf, sz, &stream_cksum,
+				    outfd) == -1)
+					goto out;
+			}
+			break;
+		}
+
+		case DRR_END:
+		{
+			/* use the recalculated checksum */
+			ZIO_SET_CHECKSUM(&drre->drr_checksum,
+			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
+			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
+			if ((write(outfd, drr,
+			    sizeof (dmu_replay_record_t))) == -1)
+				goto out;
+			break;
+		}
+
+		case DRR_OBJECT:
+		{
+			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			if (drro->drr_bonuslen > 0) {
+				(void) ssread(buf,
+				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+				    ofp);
+				if (cksum_and_write(buf,
+				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+				    &stream_cksum, outfd) == -1)
+					goto out;
+			}
+			break;
+		}
+
+		case DRR_SPILL:
+		{
+			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			(void) ssread(buf, drrs->drr_length, ofp);
+			if (cksum_and_write(buf, drrs->drr_length,
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			break;
+		}
+
+		case DRR_FREEOBJECTS:
+		{
+			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			break;
+		}
+
+		case DRR_WRITE:
+		{
+			dataref_t	dataref;
+
+			(void) ssread(buf, drrw->drr_length, ofp);
+
+			/*
+			 * Use the existing checksum if it's dedup-capable,
+			 * else calculate a SHA256 checksum for it.
+			 */
+
+			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
+			    zero_cksum) ||
+			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
+				SHA256_CTX	ctx;
+				zio_cksum_t	tmpsha256;
+
+				SHA256Init(&ctx);
+				SHA256Update(&ctx, buf, drrw->drr_length);
+				SHA256Final(&tmpsha256, &ctx);
+				drrw->drr_key.ddk_cksum.zc_word[0] =
+				    BE_64(tmpsha256.zc_word[0]);
+				drrw->drr_key.ddk_cksum.zc_word[1] =
+				    BE_64(tmpsha256.zc_word[1]);
+				drrw->drr_key.ddk_cksum.zc_word[2] =
+				    BE_64(tmpsha256.zc_word[2]);
+				drrw->drr_key.ddk_cksum.zc_word[3] =
+				    BE_64(tmpsha256.zc_word[3]);
+				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
+				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
+			}
+
+			dataref.ref_guid = drrw->drr_toguid;
+			dataref.ref_object = drrw->drr_object;
+			dataref.ref_offset = drrw->drr_offset;
+
+			if (ddt_update(dda->dedup_hdl, &ddt,
+			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
+			    &dataref)) {
+				/* block already present in stream */
+				wbr_drrr->drr_object = drrw->drr_object;
+				wbr_drrr->drr_offset = drrw->drr_offset;
+				wbr_drrr->drr_length = drrw->drr_length;
+				wbr_drrr->drr_toguid = drrw->drr_toguid;
+				wbr_drrr->drr_refguid = dataref.ref_guid;
+				wbr_drrr->drr_refobject =
+				    dataref.ref_object;
+				wbr_drrr->drr_refoffset =
+				    dataref.ref_offset;
+
+				wbr_drrr->drr_checksumtype =
+				    drrw->drr_checksumtype;
+				wbr_drrr->drr_checksumflags =
+				    drrw->drr_checksumtype;
+				wbr_drrr->drr_key.ddk_cksum =
+				    drrw->drr_key.ddk_cksum;
+				wbr_drrr->drr_key.ddk_prop =
+				    drrw->drr_key.ddk_prop;
+
+				if (cksum_and_write(&wbr_drr,
+				    sizeof (dmu_replay_record_t), &stream_cksum,
+				    outfd) == -1)
+					goto out;
+			} else {
+				/* block not previously seen */
+				if (cksum_and_write(drr,
+				    sizeof (dmu_replay_record_t), &stream_cksum,
+				    outfd) == -1)
+					goto out;
+				if (cksum_and_write(buf,
+				    drrw->drr_length,
+				    &stream_cksum, outfd) == -1)
+					goto out;
+			}
+			break;
+		}
+
+		case DRR_FREE:
+		{
+			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			break;
+		}
+
+		default:
+			(void) printf("INVALID record type 0x%x\n",
+			    drr->drr_type);
+			/* should never happen, so assert */
+			assert(B_FALSE);
+		}
+	}
+out:
+	umem_cache_destroy(ddt.ddecache);
+	free(ddt.dedup_hash_array);
+	free(buf);
+	(void) fclose(ofp);
+
+	return (NULL);
+}
 
 /*
  * Routines for dealing with the AVL tree of fs-nvlists
@@ -173,6 +544,7 @@ typedef struct send_data {
 	nvlist_t *snapprops;
 	const char *fromsnap;
 	const char *tosnap;
+	boolean_t recursive;
 
 	/*
 	 * The header nvlist is of the following format:
@@ -240,27 +612,50 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		nvlist_t *propnv;
 
-		assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
+		if (!zfs_prop_user(propname)) {
+			/*
+			 * Realistically, this should never happen.  However,
+			 * we want the ability to add DSL properties without
+			 * needing to make incompatible version changes.  We
+			 * need to ignore unknown properties to allow older
+			 * software to still send datasets containing these
+			 * properties, with the unknown properties elided.
+			 */
+			if (prop == ZPROP_INVAL)
+				continue;
 
-		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
-			continue;
+			if (zfs_prop_readonly(prop))
+				continue;
+		}
 
 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
 		    prop == ZFS_PROP_REFQUOTA ||
 		    prop == ZFS_PROP_REFRESERVATION) {
-			/* these guys are modifyable, but have no source */
+			char *source;
 			uint64_t value;
 			verify(nvlist_lookup_uint64(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
 				continue;
+			/*
+			 * May have no source before SPA_VERSION_RECVD_PROPS,
+			 * but is still modifiable.
+			 */
+			if (nvlist_lookup_string(propnv,
+			    ZPROP_SOURCE, &source) == 0) {
+				if ((strcmp(source, zhp->zfs_name) != 0) &&
+				    (strcmp(source,
+				    ZPROP_SOURCE_VAL_RECVD) != 0))
+					continue;
+			}
 		} else {
 			char *source;
 			if (nvlist_lookup_string(propnv,
 			    ZPROP_SOURCE, &source) != 0)
 				continue;
-			if (strcmp(source, zhp->zfs_name) != 0)
+			if ((strcmp(source, zhp->zfs_name) != 0) &&
+			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
 				continue;
 		}
 
@@ -289,7 +684,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
 {
 	send_data_t *sd = arg;
 	nvlist_t *nvfs, *nv;
-	int rv;
+	int rv = 0;
 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
 	char guidstring[64];
@@ -331,7 +726,8 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
 	nvlist_free(nvfs);
 
 	/* iterate over children */
-	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
+	if (sd->recursive)
+		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
 
 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
 
@@ -341,7 +737,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
 
 static int
 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
-    const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
+    const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
 {
 	zfs_handle_t *zhp;
 	send_data_t sd = { 0 };
@@ -354,6 +750,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
 	sd.fromsnap = fromsnap;
 	sd.tosnap = tosnap;
+	sd.recursive = recursive;
 
 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
 		nvlist_free(sd.fss);
@@ -415,7 +812,7 @@ zfs_snapshot_compare(const void *larg, const void *rarg)
 		return (0);
 }
 
-static int
+int
 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
 {
 	int ret = 0;
@@ -446,13 +843,16 @@ typedef struct send_dump_data {
 	/* these are all just the short snapname (the part after the @) */
 	const char *fromsnap;
 	const char *tosnap;
-	char lastsnap[ZFS_MAXNAMELEN];
+	char prevsnap[ZFS_MAXNAMELEN];
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
 	boolean_t verbose;
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
 	avl_tree_t *fsavl;
+	snapfilter_cb_t *filter_cb;
+	void *filter_cb_arg;
+	nvlist_t *debugnv;
 } send_dump_data_t;
 
 /*
@@ -461,10 +861,11 @@ typedef struct send_dump_data {
  */
 static int
 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
-    int outfd)
+    int outfd, boolean_t enoent_ok, boolean_t *got_enoent, nvlist_t *debugnv)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *thisdbg;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
@@ -475,11 +876,26 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
 	zc.zc_cookie = outfd;
 	zc.zc_obj = fromorigin;
 
+	*got_enoent = B_FALSE;
+
+	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
+	if (fromsnap && fromsnap[0] != '\0') {
+		VERIFY(0 == nvlist_add_string(thisdbg,
+		    "fromsnap", fromsnap));
+	}
+
 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);
 
+		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
+		if (debugnv) {
+			VERIFY(0 == nvlist_add_nvlist(debugnv,
+			    zhp->zfs_name, thisdbg));
+		}
+		nvlist_free(thisdbg);
+
 		switch (errno) {
 
 		case EXDEV:
@@ -488,6 +904,10 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
+			if (enoent_ok) {
+				*got_enoent = B_TRUE;
+				return (0);
+			}
 			if (zfs_dataset_exists(hdl, zc.zc_name,
 			    ZFS_TYPE_SNAPSHOT)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -515,6 +935,10 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
 		}
 	}
 
+	if (debugnv)
+		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
+	nvlist_free(thisdbg);
+
 	return (0);
 }
 
@@ -524,13 +948,17 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
 	send_dump_data_t *sdd = arg;
 	const char *thissnap;
 	int err;
+	boolean_t got_enoent;
+	boolean_t isfromsnap, istosnap;
+	boolean_t exclude = B_FALSE;
 
 	thissnap = strchr(zhp->zfs_name, '@') + 1;
+	isfromsnap = (sdd->fromsnap != NULL &&
+	    strcmp(sdd->fromsnap, thissnap) == 0);
 
-	if (sdd->fromsnap && !sdd->seenfrom &&
-	    strcmp(sdd->fromsnap, thissnap) == 0) {
+	if (!sdd->seenfrom && isfromsnap) {
 		sdd->seenfrom = B_TRUE;
-		(void) strcpy(sdd->lastsnap, thissnap);
+		(void) strcpy(sdd->prevsnap, thissnap);
 		zfs_close(zhp);
 		return (0);
 	}
@@ -540,20 +968,63 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
 		return (0);
 	}
 
+	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
+	if (istosnap)
+		sdd->seento = B_TRUE;
+
+	if (!sdd->doall && !isfromsnap && !istosnap) {
+		if (sdd->replicate) {
+			char *snapname;
+			nvlist_t *snapprops;
+			/*
+			 * Filter out all intermediate snapshots except origin
+			 * snapshots needed to replicate clones.
+			 */
+			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
+			    zhp->zfs_dmustats.dds_guid, &snapname);
+
+			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
+			    "snapprops", &snapprops));
+			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
+			    thissnap, &snapprops));
+			exclude = !nvlist_exists(snapprops, "is_clone_origin");
+		} else {
+			exclude = B_TRUE;
+		}
+	}
+
+	/*
+	 * If a filter function exists, call it to determine whether
+	 * this snapshot will be sent.
+	 */
+	if (exclude || (sdd->filter_cb != NULL &&
+	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
+		/*
+		 * This snapshot is filtered out.  Don't send it, and don't
+		 * set prevsnap, so it will be as if this snapshot didn't
+		 * exist, and the next accepted snapshot will be sent as
+		 * an incremental from the last accepted one, or as the
+		 * first (and full) snapshot in the case of a replication,
+		 * non-incremental send.
+		 */
+		zfs_close(zhp);
+		return (0);
+	}
+
 	/* send it */
 	if (sdd->verbose) {
 		(void) fprintf(stderr, "sending from @%s to %s\n",
-		    sdd->lastsnap, zhp->zfs_name);
+		    sdd->prevsnap, zhp->zfs_name);
 	}
 
-	err = dump_ioctl(zhp, sdd->lastsnap,
-	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
-	    sdd->outfd);
+	err = dump_ioctl(zhp, sdd->prevsnap,
+	    sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
+	    sdd->outfd, B_TRUE, &got_enoent, sdd->debugnv);
 
-	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
-		sdd->seento = B_TRUE;
-
-	(void) strcpy(sdd->lastsnap, thissnap);
+	if (got_enoent)
+		err = 0;
+	else
+		(void) strcpy(sdd->prevsnap, thissnap);
 	zfs_close(zhp);
 	return (err);
 }
@@ -592,51 +1063,32 @@ dump_filesystem(zfs_handle_t *zhp, void *arg)
 		}
 	}
 
-	if (sdd->doall) {
-		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
-		if (sdd->fromsnap == NULL || missingfrom)
-			sdd->seenfrom = B_TRUE;
+	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
+	if (sdd->fromsnap == NULL || missingfrom)
+		sdd->seenfrom = B_TRUE;
 
-		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
-		if (!sdd->seenfrom) {
+	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
+	if (!sdd->seenfrom) {
+		(void) fprintf(stderr,
+		    "WARNING: could not send %s@%s:\n"
+		    "incremental source (%s@%s) does not exist\n",
+		    zhp->zfs_name, sdd->tosnap,
+		    zhp->zfs_name, sdd->fromsnap);
+		sdd->err = B_TRUE;
+	} else if (!sdd->seento) {
+		if (sdd->fromsnap) {
 			(void) fprintf(stderr,
 			    "WARNING: could not send %s@%s:\n"
-			    "incremental source (%s@%s) does not exist\n",
+			    "incremental source (%s@%s) "
+			    "is not earlier than it\n",
 			    zhp->zfs_name, sdd->tosnap,
 			    zhp->zfs_name, sdd->fromsnap);
-			sdd->err = B_TRUE;
-		} else if (!sdd->seento) {
-			if (sdd->fromsnap) {
-				(void) fprintf(stderr,
-				    "WARNING: could not send %s@%s:\n"
-				    "incremental source (%s@%s) "
-				    "is not earlier than it\n",
-				    zhp->zfs_name, sdd->tosnap,
-				    zhp->zfs_name, sdd->fromsnap);
-			} else {
-				(void) fprintf(stderr, "WARNING: "
-				    "could not send %s@%s: does not exist\n",
-				    zhp->zfs_name, sdd->tosnap);
-			}
-			sdd->err = B_TRUE;
-		}
-	} else {
-		zfs_handle_t *snapzhp;
-		char snapname[ZFS_MAXNAMELEN];
-
-		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
-		    zfs_get_name(zhp), sdd->tosnap);
-		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
-		if (snapzhp == NULL) {
-			rv = -1;
 		} else {
-			rv = dump_ioctl(snapzhp,
-			    missingfrom ? NULL : sdd->fromsnap,
-			    sdd->fromorigin || missingfrom,
-			    sdd->outfd);
-			sdd->seento = B_TRUE;
-			zfs_close(snapzhp);
+			(void) fprintf(stderr, "WARNING: "
+			    "could not send %s@%s: does not exist\n",
+			    zhp->zfs_name, sdd->tosnap);
 		}
+		sdd->err = B_TRUE;
 	}
 
 	return (rv);
@@ -652,6 +1104,29 @@ dump_filesystems(zfs_handle_t *rzhp, void *arg)
 	if (!sdd->replicate)
 		return (dump_filesystem(rzhp, sdd));
 
+	/* Mark the clone origin snapshots. */
+	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
+	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
+		nvlist_t *nvfs;
+		uint64_t origin_guid = 0;
+
+		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
+		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
+		if (origin_guid != 0) {
+			char *snapname;
+			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
+			    origin_guid, &snapname);
+			if (origin_nv != NULL) {
+				nvlist_t *snapprops;
+				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
+				    "snapprops", &snapprops));
+				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
+				    snapname, &snapprops));
+				VERIFY(0 == nvlist_add_boolean(
+				    snapprops, "is_clone_origin"));
+			}
+		}
+	}
 again:
 	needagain = progress = B_FALSE;
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
@@ -661,7 +1136,6 @@ again:
 		zfs_handle_t *zhp;
 		int err;
 		uint64_t origin_guid = 0;
-		nvlist_t *origin_nv;
 
 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
@@ -670,15 +1144,19 @@ again:
 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
 
-		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
-		if (origin_nv &&
-		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
-			/*
-			 * origin has not been sent yet;
-			 * skip this clone.
-			 */
-			needagain = B_TRUE;
-			continue;
+		if (origin_guid != 0) {
+			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
+			    origin_guid, NULL);
+			if (origin_nv != NULL &&
+			    nvlist_lookup_boolean(origin_nv,
+			    "sent") == ENOENT) {
+				/*
+				 * origin has not been sent yet;
+				 * skip this clone.
+				 */
+				needagain = B_TRUE;
+				continue;
+			}
 		}
 
 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
@@ -709,21 +1187,37 @@ again:
  *	 is TRUE.
  *
  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
- * uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION)
+ * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
  * if "replicate" is set.  If "doall" is set, dump all the intermediate
- * snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall"
- * case too.
+ * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
+ * case too. If "props" is set, send properties.
  */
 int
 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
-    boolean_t replicate, boolean_t doall, boolean_t fromorigin,
-    boolean_t verbose, int outfd)
+    sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
+    void *cb_arg, nvlist_t **debugnvp)
 {
 	char errbuf[1024];
 	send_dump_data_t sdd = { 0 };
 	int err;
 	nvlist_t *fss = NULL;
 	avl_tree_t *fsavl = NULL;
+	char holdtag[128];
+	static uint64_t holdseq;
+	int spa_version;
+	boolean_t holdsnaps = B_FALSE;
+	pthread_t tid;
+	int pipefd[2];
+	dedup_arg_t dda = { 0 };
+	int featureflags = 0;
+
+	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
+		uint64_t version;
+		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+		if (version >= ZPL_VERSION_SA) {
+			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+		}
+	}
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot send '%s'"), zhp->zfs_name);
@@ -734,15 +1228,48 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 	}
 
-	if (replicate || doall) {
+	if (zfs_spa_version(zhp, &spa_version) == 0 &&
+	    spa_version >= SPA_VERSION_USERREFS)
+		holdsnaps = B_TRUE;
+
+	if (flags.dedup) {
+		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
+		    DMU_BACKUP_FEATURE_DEDUPPROPS);
+		if (err = pipe(pipefd)) {
+			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
+			    errbuf));
+		}
+		dda.outputfd = outfd;
+		dda.inputfd = pipefd[1];
+		dda.dedup_hdl = zhp->zfs_hdl;
+		if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
+			(void) close(pipefd[0]);
+			(void) close(pipefd[1]);
+			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_THREADCREATEFAILED, errbuf));
+		}
+	}
+
+	if (flags.replicate || flags.doall || flags.props) {
 		dmu_replay_record_t drr = { 0 };
 		char *packbuf = NULL;
 		size_t buflen = 0;
 		zio_cksum_t zc = { 0 };
 
-		assert(fromsnap || doall);
+		if (holdsnaps) {
+			(void) snprintf(holdtag, sizeof (holdtag),
+			    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
+			++holdseq;
+			err = zfs_hold_range(zhp, fromsnap, tosnap,
+			    holdtag, flags.replicate, B_TRUE, filter_func,
+			    cb_arg);
+			if (err)
+				goto err_out;
+		}
 
-		if (replicate) {
+		if (flags.replicate || flags.props) {
 			nvlist_t *hdrnv;
 
 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
@@ -751,45 +1278,65 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 				    "fromsnap", fromsnap));
 			}
 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
+			if (!flags.replicate) {
+				VERIFY(0 == nvlist_add_boolean(hdrnv,
+				    "not_recursive"));
+			}
 
 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
-			    fromsnap, tosnap, &fss, &fsavl);
-			if (err)
-				return (err);
+			    fromsnap, tosnap, flags.replicate, &fss, &fsavl);
+			if (err) {
+				if (holdsnaps) {
+					(void) zfs_release_range(zhp, fromsnap,
+					    tosnap, holdtag, flags.replicate);
+				}
+				goto err_out;
+			}
 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
 			    NV_ENCODE_XDR, 0);
-			nvlist_free(hdrnv);
+			if (debugnvp)
+				*debugnvp = hdrnv;
+			else
+				nvlist_free(hdrnv);
 			if (err) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
-				return (zfs_standard_error(zhp->zfs_hdl,
-				    err, errbuf));
+				if (holdsnaps) {
+					(void) zfs_release_range(zhp, fromsnap,
+					    tosnap, holdtag, flags.replicate);
+				}
+				goto stderr_out;
 			}
 		}
 
 		/* write first begin record */
 		drr.drr_type = DRR_BEGIN;
 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
-		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
+		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo,
+		    DMU_COMPOUNDSTREAM);
+		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo,
+		    featureflags);
 		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
 		    sizeof (drr.drr_u.drr_begin.drr_toname),
 		    "%s@%s", zhp->zfs_name, tosnap);
 		drr.drr_payloadlen = buflen;
-		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
-		err = write(outfd, &drr, sizeof (drr));
+		err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
 
 		/* write header nvlist */
-		if (err != -1) {
-			fletcher_4_incremental_native(packbuf, buflen, &zc);
-			err = write(outfd, packbuf, buflen);
+		if (err != -1 && packbuf != NULL) {
+			err = cksum_and_write(packbuf, buflen, &zc, outfd);
 		}
 		free(packbuf);
 		if (err == -1) {
 			fsavl_destroy(fsavl);
 			nvlist_free(fss);
-			return (zfs_standard_error(zhp->zfs_hdl,
-			    errno, errbuf));
+			if (holdsnaps) {
+				(void) zfs_release_range(zhp, fromsnap, tosnap,
+				    holdtag, flags.replicate);
+			}
+			err = errno;
+			goto stderr_out;
 		}
 
 		/* write end record */
@@ -801,8 +1348,12 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 			if (err == -1) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
-				return (zfs_standard_error(zhp->zfs_hdl,
-				    errno, errbuf));
+				err = errno;
+				if (holdsnaps) {
+					(void) zfs_release_range(zhp, fromsnap,
+					    tosnap, holdtag, flags.replicate);
+				}
+				goto stderr_out;
 			}
 		}
 	}
@@ -810,18 +1361,30 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 	/* dump each stream */
 	sdd.fromsnap = fromsnap;
 	sdd.tosnap = tosnap;
-	sdd.outfd = outfd;
-	sdd.replicate = replicate;
-	sdd.doall = doall;
-	sdd.fromorigin = fromorigin;
+	if (flags.dedup)
+		sdd.outfd = pipefd[0];
+	else
+		sdd.outfd = outfd;
+	sdd.replicate = flags.replicate;
+	sdd.doall = flags.doall;
+	sdd.fromorigin = flags.fromorigin;
 	sdd.fss = fss;
 	sdd.fsavl = fsavl;
-	sdd.verbose = verbose;
+	sdd.verbose = flags.verbose;
+	sdd.filter_cb = filter_func;
+	sdd.filter_cb_arg = cb_arg;
+	if (debugnvp)
+		sdd.debugnv = *debugnvp;
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
 
-	if (replicate || doall) {
+	if (flags.dedup) {
+		(void) close(pipefd[0]);
+		(void) pthread_join(tid, NULL);
+	}
+
+	if (flags.replicate || flags.doall || flags.props) {
 		/*
 		 * write final end record.  NB: want to do this even if
 		 * there was some error, because it might not be totally
@@ -829,6 +1392,10 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 		 */
 		dmu_replay_record_t drr = { 0 };
 		drr.drr_type = DRR_END;
+		if (holdsnaps) {
+			(void) zfs_release_range(zhp, fromsnap, tosnap,
+			    holdtag, flags.replicate);
+		}
 		if (write(outfd, &drr, sizeof (drr)) == -1) {
 			return (zfs_standard_error(zhp->zfs_hdl,
 			    errno, errbuf));
@@ -836,6 +1403,16 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 	}
 
 	return (err || sdd.err);
+
+stderr_out:
+	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
+err_out:
+	if (flags.dedup) {
+		(void) pthread_cancel(tid);
+		(void) pthread_join(tid, NULL);
+		(void) close(pipefd[0]);
+	}
+	return (err);
 }
 
 /*
@@ -1017,12 +1594,13 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
 	changelist_free(clp);
 
 	/*
-	 * Deferred destroy should always succeed. Since we can't tell
-	 * if it destroyed the dataset or just marked it for deferred
-	 * destroy, always do the rename just in case.
+	 * Deferred destroy might destroy the snapshot or only mark it to be
+	 * destroyed later, and it returns success in either case.
 	 */
-	if (err != 0 || defer)
+	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
+	    ZFS_TYPE_SNAPSHOT))) {
 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
+	}
 
 	return (err);
 }
@@ -1040,6 +1618,7 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg)
 
 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
 		(void) strcpy(gtnd->name, zhp->zfs_name);
+		zfs_close(zhp);
 		return (EEXIST);
 	}
 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
@@ -1130,18 +1709,22 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
 
 static int
 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
-    recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
+    recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
+    nvlist_t *renamed)
 {
 	nvlist_t *local_nv;
 	avl_tree_t *local_avl;
 	nvpair_t *fselem, *nextfselem;
-	char *tosnap, *fromsnap;
+	char *fromsnap;
 	char newname[ZFS_MAXNAMELEN];
 	int error;
-	boolean_t needagain, progress;
+	boolean_t needagain, progress, recursive;
+	char *s1, *s2;
 
 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
-	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
+
+	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
+	    ENOENT);
 
 	if (flags.dryrun)
 		return (0);
@@ -1150,7 +1733,7 @@ again:
 	needagain = progress = B_FALSE;
 
 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
-	    &local_nv, &local_avl)) != 0)
+	    recursive, &local_nv, &local_avl)) != 0)
 		return (error);
 
 	/*
@@ -1273,7 +1856,7 @@ again:
 			    stream_snapname, &props)) {
 				zfs_cmd_t zc = { 0 };
 
-				zc.zc_cookie = B_TRUE; /* clear current props */
+				zc.zc_cookie = B_TRUE; /* received */
 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
 				    "%s@%s", fsname, nvpair_name(snapelem));
 				if (zcmd_write_src_nvlist(hdl, &zc,
@@ -1321,10 +1904,13 @@ again:
 			continue;
 		}
 
-		if (fromguid == 0 && flags.verbose) {
-			(void) printf("local fs %s does not have fromsnap "
-			    "(%s in stream); must have been deleted locally; "
-			    "ignoring\n", fsname, fromsnap);
+		if (fromguid == 0) {
+			if (flags.verbose) {
+				(void) printf("local fs %s does not have "
+				    "fromsnap (%s in stream); must have "
+				    "been deleted locally; ignoring\n",
+				    fsname, fromsnap);
+			}
 			continue;
 		}
 
@@ -1333,11 +1919,19 @@ again:
 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
 		    "parentfromsnap", &stream_parent_fromsnap_guid));
 
-		/* check for rename */
+		s1 = strrchr(fsname, '/');
+		s2 = strrchr(stream_fsname, '/');
+
+		/*
+		 * Check for rename. If the exact receive path is specified, it
+		 * does not count as a rename, but we still need to check the
+		 * datasets beneath it.
+		 */
 		if ((stream_parent_fromsnap_guid != 0 &&
+		    parent_fromsnap_guid != 0 &&
 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
-		    strcmp(strrchr(fsname, '/'),
-		    strrchr(stream_fsname, '/')) != 0) {
+		    ((flags.isprefix || strcmp(tofs, fsname) != 0) &&
+		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
 			nvlist_t *parent;
 			char tryname[ZFS_MAXNAMELEN];
 
@@ -1365,8 +1959,16 @@ again:
 				}
 			}
 
+			newname[0] = '\0';
+
 			error = recv_rename(hdl, fsname, tryname,
 			    strlen(tofs)+1, newname, flags);
+
+			if (renamed != NULL && newname[0] != '\0') {
+				VERIFY(0 == nvlist_add_boolean(renamed,
+				    newname));
+			}
+
 			if (error)
 				needagain = B_TRUE;
 			else
@@ -1395,37 +1997,28 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
 	nvlist_t *stream_nv = NULL;
 	avl_tree_t *stream_avl = NULL;
 	char *fromsnap = NULL;
+	char *cp;
 	char tofs[ZFS_MAXNAMELEN];
+	char sendfs[ZFS_MAXNAMELEN];
 	char errbuf[1024];
 	dmu_replay_record_t drre;
 	int error;
 	boolean_t anyerr = B_FALSE;
 	boolean_t softerr = B_FALSE;
+	boolean_t recursive;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
-	if (strchr(destname, '@')) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "can not specify snapshot name for multi-snapshot stream"));
-		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
-	}
-
 	assert(drr->drr_type == DRR_BEGIN);
 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
-	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
+	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
+	    DMU_COMPOUNDSTREAM);
 
 	/*
 	 * Read in the nvlist from the stream.
 	 */
 	if (drr->drr_payloadlen != 0) {
-		if (!flags.isprefix) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "must use -d to receive replication "
-			    "(send -R) stream"));
-			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
-		}
-
 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
 		    &stream_nv, flags.byteswap, zc);
 		if (error) {
@@ -1434,6 +2027,16 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
 		}
 	}
 
+	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
+	    ENOENT);
+
+	if (recursive && strchr(destname, '@')) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "cannot specify snapshot name for multi-snapshot stream"));
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+
 	/*
 	 * Read in the end record and verify checksum.
 	 */
@@ -1477,21 +2080,73 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
 		}
 
 		if (fromsnap != NULL) {
+			nvlist_t *renamed = NULL;
+			nvpair_t *pair = NULL;
+
 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
 			if (flags.isprefix) {
-				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
-				    "/@");
+				struct drr_begin *drrb = &drr->drr_u.drr_begin;
+				int i;
+
+				if (flags.istail) {
+					cp = strrchr(drrb->drr_toname, '/');
+					if (cp == NULL) {
+						(void) strlcat(tofs, "/",
+						    ZFS_MAXNAMELEN);
+						i = 0;
+					} else {
+						i = (cp - drrb->drr_toname);
+					}
+				} else {
+					i = strcspn(drrb->drr_toname, "/@");
+				}
 				/* zfs_receive_one() will create_parents() */
-				(void) strlcat(tofs,
-				    &drr->drr_u.drr_begin.drr_toname[i],
+				(void) strlcat(tofs, &drrb->drr_toname[i],
 				    ZFS_MAXNAMELEN);
 				*strchr(tofs, '@') = '\0';
 			}
-			softerr = recv_incremental_replication(hdl, tofs,
-			    flags, stream_nv, stream_avl);
+
+			if (recursive && !flags.dryrun && !flags.nomount) {
+				VERIFY(0 == nvlist_alloc(&renamed,
+				    NV_UNIQUE_NAME, 0));
+			}
+
+			softerr = recv_incremental_replication(hdl, tofs, flags,
+			    stream_nv, stream_avl, renamed);
+
+			/* Unmount renamed filesystems before receiving. */
+			while ((pair = nvlist_next_nvpair(renamed,
+			    pair)) != NULL) {
+				zfs_handle_t *zhp;
+				prop_changelist_t *clp = NULL;
+
+				zhp = zfs_open(hdl, nvpair_name(pair),
+				    ZFS_TYPE_FILESYSTEM);
+				if (zhp != NULL) {
+					clp = changelist_gather(zhp,
+					    ZFS_PROP_MOUNTPOINT, 0, 0);
+					zfs_close(zhp);
+					if (clp != NULL) {
+						softerr |=
+						    changelist_prefix(clp);
+						changelist_free(clp);
+					}
+				}
+			}
+
+			nvlist_free(renamed);
 		}
 	}
 
+	/*
+	 * Get the fs specified by the first path in the stream (the top level
+	 * specified by 'zfs send') and pass it to each invocation of
+	 * zfs_receive_one().
+	 */
+	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
+	    ZFS_MAXNAMELEN);
+	if ((cp = strchr(sendfs, '@')) != NULL)
+		*cp = '\0';
 
 	/* Finally, receive each contained stream */
 	do {
@@ -1503,7 +2158,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
 		 * recv_skip() and return 0).
 		 */
 		error = zfs_receive_impl(hdl, destname, flags, fd,
-		    stream_avl, top_zfs);
+		    sendfs, stream_nv, stream_avl, top_zfs);
 		if (error == ENODATA) {
 			error = 0;
 			break;
@@ -1517,7 +2172,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
 		 * renames again.
 		 */
 		softerr = recv_incremental_replication(hdl, tofs, flags,
-		    stream_nv, stream_avl);
+		    stream_nv, stream_avl, NULL);
 	}
 
 out:
@@ -1531,11 +2186,28 @@ out:
 	return (error);
 }
 
+static void
+trunc_prop_errs(int truncated)
+{
+	ASSERT(truncated != 0);
+
+	if (truncated == 1)
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "1 more property could not be set\n"));
+	else
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+		    "%d more properties could not be set\n"), truncated);
+}
+
 static int
 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 {
 	dmu_replay_record_t *drr;
 	void *buf = malloc(1<<20);
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive:"));
 
 	/* XXX would be great to use lseek if possible... */
 	drr = buf;
@@ -1548,7 +2220,11 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 			/* NB: not to be used on v2 stream packages */
-			assert(drr->drr_payloadlen == 0);
+			if (drr->drr_payloadlen != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "invalid substream header"));
+				return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+			}
 			break;
 
 		case DRR_END:
@@ -1574,13 +2250,23 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 			(void) recv_read(hdl, fd, buf,
 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
 			break;
-
+		case DRR_SPILL:
+			if (byteswap) {
+				drr->drr_u.drr_write.drr_length =
+				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
+			break;
+		case DRR_WRITE_BYREF:
 		case DRR_FREEOBJECTS:
 		case DRR_FREE:
 			break;
 
 		default:
-			assert(!"invalid record type");
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid record type"));
+			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 		}
 	}
 
@@ -1594,27 +2280,33 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 static int
 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
     recvflags_t flags, dmu_replay_record_t *drr,
-    dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
-    char **top_zfs)
+    dmu_replay_record_t *drr_noswap, const char *sendfs,
+    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs)
 {
 	zfs_cmd_t zc = { 0 };
 	time_t begin_time;
-	int ioctl_err, ioctl_errno, err, choplen;
+	int ioctl_err, ioctl_errno, err;
 	char *cp;
 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
 	char errbuf[1024];
-	char chopprefix[ZFS_MAXNAMELEN];
+	char prop_errbuf[1024];
+	const char *chopprefix;
 	boolean_t newfs = B_FALSE;
 	boolean_t stream_wantsnewfs;
 	uint64_t parent_snapguid = 0;
 	prop_changelist_t *clp = NULL;
 	nvlist_t *snapprops_nvlist = NULL;
+	zprop_errflags_t prop_errflags;
+	boolean_t recursive;
 
 	begin_time = time(NULL);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
+	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
+	    ENOENT);
+
 	if (stream_avl != NULL) {
 		char *snapname;
 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
@@ -1645,6 +2337,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 			return (-1);
 	}
 
+	cp = NULL;
+
 	/*
 	 * Determine how much of the snapshot name stored in the stream
 	 * we are going to tack on to the name they specified on the
@@ -1653,38 +2347,77 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 	 * If they specified a snapshot, chop the entire name stored in
 	 * the stream.
 	 */
-	(void) strcpy(chopprefix, drrb->drr_toname);
-	if (flags.isprefix) {
+	if (flags.istail) {
+		/*
+		 * A filesystem was specified with -e. We want to tack on only
+		 * the tail of the sent snapshot path.
+		 */
+		if (strchr(tosnap, '@')) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "argument - snapshot not allowed with -e"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+
+		chopprefix = strrchr(sendfs, '/');
+
+		if (chopprefix == NULL) {
+			/*
+			 * The tail is the poolname, so we need to
+			 * prepend a path separator.
+			 */
+			int len = strlen(drrb->drr_toname);
+			cp = malloc(len + 2);
+			cp[0] = '/';
+			(void) strcpy(&cp[1], drrb->drr_toname);
+			chopprefix = cp;
+		} else {
+			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
+		}
+	} else if (flags.isprefix) {
 		/*
-		 * They specified a fs with -d, we want to tack on
-		 * everything but the pool name stored in the stream
+		 * A filesystem was specified with -d. We want to tack on
+		 * everything but the first element of the sent snapshot path
+		 * (all but the pool name).
 		 */
 		if (strchr(tosnap, '@')) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 			    "argument - snapshot not allowed with -d"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
-		cp = strchr(chopprefix, '/');
-		if (cp == NULL)
-			cp = strchr(chopprefix, '@');
-		*cp = '\0';
+
+		chopprefix = strchr(drrb->drr_toname, '/');
+		if (chopprefix == NULL)
+			chopprefix = strchr(drrb->drr_toname, '@');
 	} else if (strchr(tosnap, '@') == NULL) {
 		/*
-		 * If they specified a filesystem without -d, we want to
-		 * tack on everything after the fs specified in the
-		 * first name from the stream.
+		 * If a filesystem was specified without -d or -e, we want to
+		 * tack on everything after the fs specified by 'zfs send'.
 		 */
-		cp = strchr(chopprefix, '@');
-		*cp = '\0';
+		chopprefix = drrb->drr_toname + strlen(sendfs);
+	} else {
+		/* A snapshot was specified as an exact path (no -d or -e). */
+		if (recursive) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cannot specify snapshot name for multi-snapshot "
+			    "stream"));
+			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+		}
+		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
 	}
-	choplen = strlen(chopprefix);
+
+	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
+	ASSERT(chopprefix > drrb->drr_toname);
+	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
+	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
+	    chopprefix[0] == '\0');
 
 	/*
 	 * Determine name of destination snapshot, store in zc_value.
 	 */
+	(void) strcpy(zc.zc_top_ds, tosnap);
 	(void) strcpy(zc.zc_value, tosnap);
-	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
-	    sizeof (zc.zc_value));
+	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
+	free(cp);
 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
 		zcmd_free_nvlists(&zc);
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
@@ -1742,7 +2475,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		(void) strcpy(zc.zc_name, zc.zc_value);
 		*strchr(zc.zc_name, '@') = '\0';
 
-		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+		/*
+		 * If the exact receive path was specified and this is the
+		 * topmost path in the stream, then if the fs does not exist we
+		 * should look no further.
+		 */
+		if ((flags.isprefix || (*(chopprefix = drrb->drr_toname +
+		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
+		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 			char snap[ZFS_MAXNAMELEN];
 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
 			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
@@ -1758,6 +2498,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 
 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 		zfs_handle_t *zhp;
+
 		/*
 		 * Destination fs exists.  Therefore this should either
 		 * be an incremental, or the stream specifies a new fs
@@ -1765,7 +2506,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		 * away (and have therefore specified -F and removed any
 		 * snapshots).
 		 */
-
 		if (stream_wantsnewfs) {
 			if (!flags.force) {
 				zcmd_free_nvlists(&zc);
@@ -1819,12 +2559,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 				return (-1);
 			}
 		}
-		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
-		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
-			zfs_close(zhp);
-			zcmd_free_nvlists(&zc);
-			return (-1);
-		}
 		zfs_close(zhp);
 	} else {
 		/*
@@ -1848,7 +2582,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		 */
 		*cp = '\0';
 
-		if (flags.isprefix && !flags.dryrun &&
+		if (flags.isprefix && !flags.istail && !flags.dryrun &&
 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
@@ -1873,21 +2607,59 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		return (recv_skip(hdl, infd, flags.byteswap));
 	}
 
+	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
+	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
+
 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
 	ioctl_errno = errno;
+	prop_errflags = (zprop_errflags_t)zc.zc_obj;
+
+	if (err == 0) {
+		nvlist_t *prop_errors;
+		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
+		    zc.zc_nvlist_dst_size, &prop_errors, 0));
+
+		nvpair_t *prop_err = NULL;
+
+		while ((prop_err = nvlist_next_nvpair(prop_errors,
+		    prop_err)) != NULL) {
+			char tbuf[1024];
+			zfs_prop_t prop;
+			int intval;
+
+			prop = zfs_name_to_prop(nvpair_name(prop_err));
+			(void) nvpair_value_int32(prop_err, &intval);
+			if (strcmp(nvpair_name(prop_err),
+			    ZPROP_N_MORE_ERRORS) == 0) {
+				trunc_prop_errs(intval);
+				break;
+			} else {
+				(void) snprintf(tbuf, sizeof (tbuf),
+				    dgettext(TEXT_DOMAIN,
+				    "cannot receive %s property on %s"),
+				    nvpair_name(prop_err), zc.zc_name);
+				zfs_setprop_error(hdl, prop, intval, tbuf);
+			}
+		}
+		nvlist_free(prop_errors);
+	}
+
+	zc.zc_nvlist_dst = 0;
+	zc.zc_nvlist_dst_size = 0;
 	zcmd_free_nvlists(&zc);
 
 	if (err == 0 && snapprops_nvlist) {
 		zfs_cmd_t zc2 = { 0 };
 
 		(void) strcpy(zc2.zc_name, zc.zc_value);
+		zc2.zc_cookie = B_TRUE; /* received */
 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
 			zcmd_free_nvlists(&zc2);
 		}
 	}
 
-	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
+	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
 		/*
 		 * It may be that this snapshot already exists,
 		 * in which case we want to consume & ignore it
@@ -1895,7 +2667,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		 */
 		avl_tree_t *local_avl;
 		nvlist_t *local_nv, *fs;
-		char *cp = strchr(zc.zc_value, '@');
+		cp = strchr(zc.zc_value, '@');
 
 		/*
 		 * XXX Do this faster by just iterating over snaps in
@@ -1903,7 +2675,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		 * get a strange "does not exist" error message.
 		 */
 		*cp = '\0';
-		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
+		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
 		    &local_nv, &local_avl) == 0) {
 			*cp = '@';
 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
@@ -1915,14 +2687,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 					(void) printf("snap %s already exists; "
 					    "ignoring\n", zc.zc_value);
 				}
-				ioctl_err = recv_skip(hdl, infd,
+				err = ioctl_err = recv_skip(hdl, infd,
 				    flags.byteswap);
 			}
 		}
 		*cp = '@';
 	}
 
-
 	if (ioctl_err != 0) {
 		switch (ioctl_errno) {
 		case ENODEV:
@@ -1961,18 +2732,25 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 			    "invalid stream (checksum mismatch)"));
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to receive this stream."));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+		case EDQUOT:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination %s space quota exceeded"), zc.zc_name);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			break;
 		default:
 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
 		}
 	}
 
 	/*
-	 * Mount or recreate the /dev links for the target filesystem
-	 * (if created, or if we tore them down to do an incremental
-	 * restore), and the /dev links for the new snapshot (if
-	 * created). Also mount any children of the target filesystem
-	 * if we did a replication receive (indicated by stream_avl
-	 * being non-NULL).
+	 * Mount the target filesystem (if created).  Also mount any
+	 * children of the target filesystem if we did a replication
+	 * receive (indicated by stream_avl being non-NULL).
 	 */
 	cp = strchr(zc.zc_value, '@');
 	if (cp && (ioctl_err == 0 || !newfs)) {
@@ -1984,10 +2762,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		if (h != NULL) {
 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
 				*cp = '@';
-				err = zvol_create_link(hdl, h->zfs_name);
-				if (err == 0 && ioctl_err == 0)
-					err = zvol_create_link(hdl,
-					    zc.zc_value);
 			} else if (newfs || stream_avl) {
 				/*
 				 * Track the first/top of hierarchy fs,
@@ -2006,6 +2780,19 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		changelist_free(clp);
 	}
 
+	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
+		    "failed to clear unreceived properties on %s"),
+		    zc.zc_name);
+		(void) fprintf(stderr, "\n");
+	}
+	if (prop_errflags & ZPROP_ERR_NORESTORE) {
+		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
+		    "failed to restore original properties on %s"),
+		    zc.zc_name);
+		(void) fprintf(stderr, "\n");
+	}
+
 	if (err || ioctl_err)
 		return (-1);
 
@@ -2028,13 +2815,16 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 
 static int
 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
-    int infd, avl_tree_t *stream_avl, char **top_zfs)
+    int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
+    char **top_zfs)
 {
 	int err;
 	dmu_replay_record_t drr, drr_noswap;
 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
 	char errbuf[1024];
 	zio_cksum_t zcksum = { 0 };
+	uint64_t featureflags;
+	int hdrtype;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
@@ -2072,7 +2862,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
 		drr.drr_type = BSWAP_32(drr.drr_type);
 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
-		drrb->drr_version = BSWAP_64(drrb->drr_version);
+		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
 		drrb->drr_type = BSWAP_32(drrb->drr_type);
 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
@@ -2086,23 +2876,45 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
+	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
+
+	if (!DMU_STREAM_SUPPORTED(featureflags) ||
+	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "stream has unsupported feature, feature flags = %lx"),
+		    featureflags);
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
 	if (strchr(drrb->drr_toname, '@') == NULL) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (bad snapshot name)"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
-	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
+	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
+		char nonpackage_sendfs[ZFS_MAXNAMELEN];
+		if (sendfs == NULL) {
+			/*
+			 * We were not called from zfs_receive_package(). Get
+			 * the fs specified by 'zfs send'.
+			 */
+			char *cp;
+			(void) strlcpy(nonpackage_sendfs,
+			    drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
+			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
+				*cp = '\0';
+			sendfs = nonpackage_sendfs;
+		}
 		return (zfs_receive_one(hdl, infd, tosnap, flags,
-		    &drr, &drr_noswap, stream_avl, top_zfs));
-	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
+		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
+		    top_zfs));
+	} else {
+		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+		    DMU_COMPOUNDSTREAM);
 		return (zfs_receive_package(hdl, infd, tosnap, flags,
 		    &drr, &zcksum, top_zfs));
-	} else {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "stream is unsupported version %llu"),
-		    drrb->drr_version);
-		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 }
 
@@ -2119,7 +2931,8 @@ zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
 	char *top_zfs = NULL;
 	int err;
 
-	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
+	err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
+	    stream_avl, &top_zfs);
 
 	if (err == 0 && !flags.nomount && top_zfs) {
 		zfs_handle_t *zhp;
diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
index 44faf02af..24725ec04 100644
--- a/lib/libzfs/libzfs_status.c
+++ b/lib/libzfs/libzfs_status.c
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -138,7 +137,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
 			if (find_vdev_problem(child[c], func))
 				return (B_TRUE);
 	} else {
-		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
+		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &c) == 0);
 
 		if (func(vs->vs_state, vs->vs_aux,
@@ -173,7 +172,8 @@ check_status(nvlist_t *config, boolean_t isimport)
 {
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
-	uint_t vsc;
+	pool_scan_stat_t *ps = NULL;
+	uint_t vsc, psc;
 	uint64_t nerr;
 	uint64_t version;
 	uint64_t stateval;
@@ -184,15 +184,24 @@ check_status(nvlist_t *config, boolean_t isimport)
 	    &version) == 0);
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
-	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &vsc) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    &stateval) == 0);
-	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+	/*
+	 * Currently resilvering a vdev
+	 */
+	(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
+	    (uint64_t **)&ps, &psc);
+	if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
+	    ps->pss_state == DSS_SCANNING)
+		return (ZPOOL_STATUS_RESILVERING);
 
 	/*
 	 * Pool last accessed by another system.
 	 */
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
 	    stateval == POOL_STATE_ACTIVE)
 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
@@ -289,12 +298,6 @@ check_status(nvlist_t *config, boolean_t isimport)
 		return (ZPOOL_STATUS_REMOVED_DEV);
 
 	/*
-	 * Currently resilvering
-	 */
-	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
-		return (ZPOOL_STATUS_RESILVERING);
-
-	/*
 	 * Outdated, but usable, version
 	 */
 	if (version < SPA_VERSION)
@@ -328,3 +331,68 @@ zpool_import_status(nvlist_t *config, char **msgid)
 
 	return (ret);
 }
+
+static void
+dump_ddt_stat(const ddt_stat_t *dds, int h)
+{
+	char refcnt[6];
+	char blocks[6], lsize[6], psize[6], dsize[6];
+	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
+
+	if (dds == NULL || dds->dds_blocks == 0)
+		return;
+
+	if (h == -1)
+		(void) strcpy(refcnt, "Total");
+	else
+		zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
+
+	zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
+	zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
+	zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
+	zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
+	zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
+	zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
+	zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
+	zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
+
+	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
+	    refcnt,
+	    blocks, lsize, psize, dsize,
+	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
+}
+
+/*
+ * Print the DDT histogram and the column totals.
+ */
+void
+zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
+{
+	int h;
+
+	(void) printf("\n");
+
+	(void) printf("bucket   "
+	    "           allocated             "
+	    "          referenced          \n");
+	(void) printf("______   "
+	    "______________________________   "
+	    "______________________________\n");
+
+	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
+	    "refcnt",
+	    "blocks", "LSIZE", "PSIZE", "DSIZE",
+	    "blocks", "LSIZE", "PSIZE", "DSIZE");
+
+	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
+	    "------",
+	    "------", "-----", "-----", "-----",
+	    "------", "-----", "-----", "-----");
+
+	for (h = 0; h < 64; h++)
+		dump_ddt_stat(&ddh->ddh_stat[h], h);
+
+	dump_ddt_stat(dds_total, -1);
+
+	(void) printf("\n");
+}
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 4da0fb44b..2e73f76ea 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -94,8 +93,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
 	case EZFS_VOLTOOBIG:
 		return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
 		    "this system"));
-	case EZFS_VOLHASDATA:
-		return (dgettext(TEXT_DOMAIN, "volume has data"));
 	case EZFS_INVALIDNAME:
 		return (dgettext(TEXT_DOMAIN, "invalid name"));
 	case EZFS_BADRESTORE:
@@ -138,16 +135,12 @@ libzfs_error_description(libzfs_handle_t *hdl)
 		return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
 	case EZFS_SHARESMBFAILED:
 		return (dgettext(TEXT_DOMAIN, "smb add share failed"));
-	case EZFS_ISCSISVCUNAVAIL:
-		return (dgettext(TEXT_DOMAIN,
-		    "iscsitgt service need to be enabled by "
-		    "a privileged user"));
-	case EZFS_DEVLINKS:
-		return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
 	case EZFS_PERM:
 		return (dgettext(TEXT_DOMAIN, "permission denied"));
 	case EZFS_NOSPC:
 		return (dgettext(TEXT_DOMAIN, "out of space"));
+	case EZFS_FAULT:
+		return (dgettext(TEXT_DOMAIN, "bad address"));
 	case EZFS_IO:
 		return (dgettext(TEXT_DOMAIN, "I/O error"));
 	case EZFS_INTR:
@@ -161,12 +154,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
 		return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
 	case EZFS_NOHISTORY:
 		return (dgettext(TEXT_DOMAIN, "no history available"));
-	case EZFS_UNSHAREISCSIFAILED:
-		return (dgettext(TEXT_DOMAIN,
-		    "iscsitgtd failed request to unshare"));
-	case EZFS_SHAREISCSIFAILED:
-		return (dgettext(TEXT_DOMAIN,
-		    "iscsitgtd failed request to share"));
 	case EZFS_POOLPROPS:
 		return (dgettext(TEXT_DOMAIN, "failed to retrieve "
 		    "pool properties"));
@@ -218,6 +205,20 @@ libzfs_error_description(libzfs_handle_t *hdl)
 	case EZFS_REFTAG_HOLD:
 		return (dgettext(TEXT_DOMAIN, "tag already exists on this "
 		    "dataset"));
+	case EZFS_TAGTOOLONG:
+		return (dgettext(TEXT_DOMAIN, "tag too long"));
+	case EZFS_PIPEFAILED:
+		return (dgettext(TEXT_DOMAIN, "pipe create failed"));
+	case EZFS_THREADCREATEFAILED:
+		return (dgettext(TEXT_DOMAIN, "thread create failed"));
+	case EZFS_POSTSPLIT_ONLINE:
+		return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
+		    "into a new one"));
+	case EZFS_SCRUBBING:
+		return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
+		    "use 'zpool scrub -s' to cancel current scrub"));
+	case EZFS_NO_SCRUB:
+		return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
@@ -306,6 +307,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
 		zfs_verror(hdl, EZFS_IO, fmt, ap);
 		return (-1);
 
+	case EFAULT:
+		zfs_verror(hdl, EZFS_FAULT, fmt, ap);
+		return (-1);
+
 	case EINTR:
 		zfs_verror(hdl, EZFS_INTR, fmt, ap);
 		return (-1);
@@ -378,7 +383,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 		zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
 		break;
 	default:
-		zfs_error_aux(hdl, strerror(errno));
+		zfs_error_aux(hdl, strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
 		break;
 	}
@@ -610,6 +615,7 @@ libzfs_fini(libzfs_handle_t *hdl)
 	if (hdl->libzfs_log_str)
 		(void) free(hdl->libzfs_log_str);
 	zpool_free_handles(hdl);
+	libzfs_fru_clear(hdl, B_TRUE);
 	namespace_clear(hdl);
 	libzfs_mnttab_fini(hdl);
 	free(hdl);
@@ -686,7 +692,7 @@ int
 zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
 {
 	if (len == 0)
-		len = 2048;
+		len = 4*1024;
 	zc->zc_nvlist_dst_size = len;
 	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
 	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
@@ -812,6 +818,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 	    "PROPERTY"));
 	cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
 	    "VALUE"));
+	cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN,
+	    "RECEIVED"));
 	cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
 	    "SOURCE"));
 
@@ -825,7 +833,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 	 * inheriting from the longest name.  This is acceptable because in the
 	 * majority of cases 'SOURCE' is the last column displayed, and we don't
 	 * use the width anyway.  Note that the 'VALUE' column can be oversized,
-	 * if the name of the property is much longer the any values we find.
+	 * if the name of the property is much longer than any values we find.
 	 */
 	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
 		/*
@@ -856,6 +864,11 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 		    pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
 			cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
 
+		/* 'RECEIVED' column. */
+		if (pl != cbp->cb_proplist &&
+		    pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD])
+			cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width;
+
 		/*
 		 * 'NAME' and 'SOURCE' columns
 		 */
@@ -871,7 +884,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 	/*
 	 * Now go through and print the headers.
 	 */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < ZFS_GET_NCOLS; i++) {
 		switch (cbp->cb_columns[i]) {
 		case GET_COL_NAME:
 			title = dgettext(TEXT_DOMAIN, "NAME");
@@ -882,6 +895,9 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 		case GET_COL_VALUE:
 			title = dgettext(TEXT_DOMAIN, "VALUE");
 			break;
+		case GET_COL_RECVD:
+			title = dgettext(TEXT_DOMAIN, "RECEIVED");
+			break;
 		case GET_COL_SOURCE:
 			title = dgettext(TEXT_DOMAIN, "SOURCE");
 			break;
@@ -890,7 +906,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 		}
 
 		if (title != NULL) {
-			if (i == 3 || cbp->cb_columns[i + 1] == 0)
+			if (i == (ZFS_GET_NCOLS - 1) ||
+			    cbp->cb_columns[i + 1] == GET_COL_NONE)
 				(void) printf("%s", title);
 			else
 				(void) printf("%-*s  ",
@@ -908,7 +925,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 void
 zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
     const char *propname, const char *value, zprop_source_t sourcetype,
-    const char *source)
+    const char *source, const char *recvd_value)
 {
 	int i;
 	const char *str;
@@ -923,7 +940,7 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
 	if (cbp->cb_first)
 		zprop_print_headers(cbp, cbp->cb_type);
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < ZFS_GET_NCOLS; i++) {
 		switch (cbp->cb_columns[i]) {
 		case GET_COL_NAME:
 			str = name;
@@ -960,14 +977,21 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
 				    "inherited from %s", source);
 				str = buf;
 				break;
+			case ZPROP_SRC_RECEIVED:
+				str = "received";
+				break;
 			}
 			break;
 
+		case GET_COL_RECVD:
+			str = (recvd_value == NULL ? "-" : recvd_value);
+			break;
+
 		default:
 			continue;
 		}
 
-		if (cbp->cb_columns[i + 1] == 0)
+		if (cbp->cb_columns[i + 1] == GET_COL_NONE)
 			(void) printf("%s", str);
 		else if (cbp->cb_scripted)
 			(void) printf("%s\t", str);
@@ -975,7 +999,6 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
 			(void) printf("%-*s  ",
 			    cbp->cb_colwidths[cbp->cb_columns[i]],
 			    str);
-
 	}
 
 	(void) printf("\n");
@@ -1037,7 +1060,7 @@ zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
 		return (-1);
 	}
 
-	/* Rely on stroull() to process the numeric portion.  */
+	/* Rely on strtoull() to process the numeric portion.  */
 	errno = 0;
 	*num = strtoull(value, &end, 10);
author	Brian Behlendorf <[email protected]>	2010-05-28 13:45:14 -0700
committer	Brian Behlendorf <[email protected]>	2010-05-28 13:45:14 -0700
commit	428870ff734fdaccc342b33fc53cf94724409a46 (patch)
tree	164e83c0ceda52a843795ed7cd9e95637d02c177 /lib/libzfs
parent	6119cb885a976e175a6e827894accf657ff1984f (diff)