Merge branch 'gcc-branch' into refs/top-bases/zfs-branch

author: Brian Behlendorf <[email protected]> 2008-12-11 11:25:42 -0800
committer: Brian Behlendorf <[email protected]> 2008-12-11 11:25:42 -0800
commit: 756a122d375586b0fd2d2ac34e7a6d1e7404d977 (patch)
tree: 278cb353aadac7f40cd6b51d5690fb777bfcabdf /lib/libzfs
parent: 764d9b1916581d3223ae504278e0d660323577c2 (diff)
parent: 11cf0f3f8c85345c26f029626bc8eaff518cb9f4 (diff)
12 files changed, 16372 insertions, 0 deletions
diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h
new file mode 100644
index 000000000..c650865f3
--- /dev/null
+++ b/lib/libzfs/include/libzfs.h
@@ -0,0 +1,570 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBZFS_H
+#define	_LIBZFS_H
+
+#include <assert.h>
+#include <libnvpair.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <sys/fs/zfs.h>
+#include <sys/avl.h>
+#include <ucred.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Miscellaneous ZFS constants
+ */
+#define	ZFS_MAXNAMELEN		MAXNAMELEN
+#define	ZPOOL_MAXNAMELEN	MAXNAMELEN
+#define	ZFS_MAXPROPLEN		MAXPATHLEN
+#define	ZPOOL_MAXPROPLEN	MAXPATHLEN
+
+/*
+ * libzfs errors
+ */
+enum {
+	EZFS_NOMEM = 2000,	/* out of memory */
+	EZFS_BADPROP,		/* invalid property value */
+	EZFS_PROPREADONLY,	/* cannot set readonly property */
+	EZFS_PROPTYPE,		/* property does not apply to dataset type */
+	EZFS_PROPNONINHERIT,	/* property is not inheritable */
+	EZFS_PROPSPACE,		/* bad quota or reservation */
+	EZFS_BADTYPE,		/* dataset is not of appropriate type */
+	EZFS_BUSY,		/* pool or dataset is busy */
+	EZFS_EXISTS,		/* pool or dataset already exists */
+	EZFS_NOENT,		/* no such pool or dataset */
+	EZFS_BADSTREAM,		/* bad backup stream */
+	EZFS_DSREADONLY,	/* dataset is readonly */
+	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
+	EZFS_VOLHASDATA,	/* volume already contains data */
+	EZFS_INVALIDNAME,	/* invalid dataset name */
+	EZFS_BADRESTORE,	/* unable to restore to destination */
+	EZFS_BADBACKUP,		/* backup failed */
+	EZFS_BADTARGET,		/* bad attach/detach/replace target */
+	EZFS_NODEVICE,		/* no such device in pool */
+	EZFS_BADDEV,		/* invalid device to add */
+	EZFS_NOREPLICAS,	/* no valid replicas */
+	EZFS_RESILVERING,	/* currently resilvering */
+	EZFS_BADVERSION,	/* unsupported version */
+	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
+	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
+	EZFS_BADPATH,		/* must be an absolute path */
+	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
+	EZFS_ZONED,		/* used improperly in local zone */
+	EZFS_MOUNTFAILED,	/* failed to mount dataset */
+	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
+	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
+	EZFS_SHARENFSFAILED,	/* share(1M) failed */
+	EZFS_DEVLINKS,		/* failed to create zvol links */
+	EZFS_PERM,		/* permission denied */
+	EZFS_NOSPC,		/* out of space */
+	EZFS_IO,		/* I/O error */
+	EZFS_INTR,		/* signal received */
+	EZFS_ISSPARE,		/* device is a hot spare */
+	EZFS_INVALCONFIG,	/* invalid vdev configuration */
+	EZFS_RECURSIVE,		/* recursive dependency */
+	EZFS_NOHISTORY,		/* no history object */
+	EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
+	EZFS_SHAREISCSIFAILED,	/* iscsitgtd failed request to share */
+	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
+	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
+	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
+	EZFS_NAMETOOLONG,	/* dataset name is too long */
+	EZFS_OPENFAILED,	/* open of device failed */
+	EZFS_NOCAP,		/* couldn't get capacity */
+	EZFS_LABELFAILED,	/* write of label failed */
+	EZFS_ISCSISVCUNAVAIL,	/* iscsi service unavailable */
+	EZFS_BADWHO,		/* invalid permission who */
+	EZFS_BADPERM,		/* invalid permission */
+	EZFS_BADPERMSET,	/* invalid permission set name */
+	EZFS_NODELEGATION,	/* delegated administration is disabled */
+	EZFS_PERMRDONLY,	/* pemissions are readonly */
+	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
+	EZFS_SHARESMBFAILED,	/* failed to share over smb */
+	EZFS_BADCACHE,		/* bad cache file */
+	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
+	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
+	EZFS_NOTSUP,		/* ops not supported on this dataset */
+	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
+	EZFS_UNKNOWN
+};
+
+/*
+ * The following data structures are all part
+ * of the zfs_allow_t data structure which is
+ * used for printing 'allow' permissions.
+ * It is a linked list of zfs_allow_t's which
+ * then contain avl tree's for user/group/sets/...
+ * and each one of the entries in those trees have
+ * avl tree's for the permissions they belong to and
+ * whether they are local,descendent or local+descendent
+ * permissions.  The AVL trees are used primarily for
+ * sorting purposes, but also so that we can quickly find
+ * a given user and or permission.
+ */
+typedef struct zfs_perm_node {
+	avl_node_t z_node;
+	char z_pname[MAXPATHLEN];
+} zfs_perm_node_t;
+
+typedef struct zfs_allow_node {
+	avl_node_t z_node;
+	char z_key[MAXPATHLEN];		/* name, such as joe */
+	avl_tree_t z_localdescend;	/* local+descendent perms */
+	avl_tree_t z_local;		/* local permissions */
+	avl_tree_t z_descend;		/* descendent permissions */
+} zfs_allow_node_t;
+
+typedef struct zfs_allow {
+	struct zfs_allow *z_next;
+	char z_setpoint[MAXPATHLEN];
+	avl_tree_t z_sets;
+	avl_tree_t z_crperms;
+	avl_tree_t z_user;
+	avl_tree_t z_group;
+	avl_tree_t z_everyone;
+} zfs_allow_t;
+
+/*
+ * Basic handle types
+ */
+typedef struct zfs_handle zfs_handle_t;
+typedef struct zpool_handle zpool_handle_t;
+typedef struct libzfs_handle libzfs_handle_t;
+
+/*
+ * Library initialization
+ */
+extern libzfs_handle_t *libzfs_init(void);
+extern void libzfs_fini(libzfs_handle_t *);
+
+extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
+extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
+
+extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+
+extern int libzfs_errno(libzfs_handle_t *);
+extern const char *libzfs_error_action(libzfs_handle_t *);
+extern const char *libzfs_error_description(libzfs_handle_t *);
+
+/*
+ * Basic handle functions
+ */
+extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
+extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
+extern void zpool_close(zpool_handle_t *);
+extern const char *zpool_get_name(zpool_handle_t *);
+extern int zpool_get_state(zpool_handle_t *);
+extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
+extern void zpool_free_handles(libzfs_handle_t *);
+
+/*
+ * Iterate over all active pools in the system.
+ */
+typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
+extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
+
+/*
+ * Functions to create and destroy pools
+ */
+extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
+    nvlist_t *, nvlist_t *);
+extern int zpool_destroy(zpool_handle_t *);
+extern int zpool_add(zpool_handle_t *, nvlist_t *);
+
+/*
+ * Functions to manipulate pool and vdev state
+ */
+extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
+extern int zpool_clear(zpool_handle_t *, const char *);
+
+extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
+    vdev_state_t *);
+extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
+extern int zpool_vdev_attach(zpool_handle_t *, const char *,
+    const char *, nvlist_t *, int);
+extern int zpool_vdev_detach(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+
+extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
+
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
+    boolean_t *, boolean_t *);
+extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
+
+/*
+ * Functions to manage pool properties
+ */
+extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
+extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
+    size_t proplen, zprop_source_t *);
+extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
+    zprop_source_t *);
+
+extern const char *zpool_prop_to_name(zpool_prop_t);
+extern const char *zpool_prop_values(zpool_prop_t);
+
+/*
+ * Pool health statistics.
+ */
+typedef enum {
+	/*
+	 * The following correspond to faults as defined in the (fault.fs.zfs.*)
+	 * event namespace.  Each is associated with a corresponding message ID.
+	 */
+	ZPOOL_STATUS_CORRUPT_CACHE,	/* corrupt /kernel/drv/zpool.cache */
+	ZPOOL_STATUS_MISSING_DEV_R,	/* missing device with replicas */
+	ZPOOL_STATUS_MISSING_DEV_NR,	/* missing device with no replicas */
+	ZPOOL_STATUS_CORRUPT_LABEL_R,	/* bad device label with replicas */
+	ZPOOL_STATUS_CORRUPT_LABEL_NR,	/* bad device label with no replicas */
+	ZPOOL_STATUS_BAD_GUID_SUM,	/* sum of device guids didn't match */
+	ZPOOL_STATUS_CORRUPT_POOL,	/* pool metadata is corrupted */
+	ZPOOL_STATUS_CORRUPT_DATA,	/* data errors in user (meta)data */
+	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
+	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
+	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
+	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
+	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
+	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
+	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
+	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
+
+	/*
+	 * The following are not faults per se, but still an error possibly
+	 * requiring administrative attention.  There is no corresponding
+	 * message ID.
+	 */
+	ZPOOL_STATUS_VERSION_OLDER,	/* older on-disk version */
+	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
+	ZPOOL_STATUS_OFFLINE_DEV,	/* device online */
+
+	/*
+	 * Finally, the following indicates a healthy pool.
+	 */
+	ZPOOL_STATUS_OK
+} zpool_status_t;
+
+extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
+extern zpool_status_t zpool_import_status(nvlist_t *, char **);
+
+/*
+ * Statistics and configuration functions.
+ */
+extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
+extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
+extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
+
+/*
+ * Import and export functions
+ */
+extern int zpool_export(zpool_handle_t *, boolean_t);
+extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
+    char *altroot);
+extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
+    nvlist_t *, boolean_t);
+
+/*
+ * Search for pools to import
+ */
+extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
+extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
+    char *, uint64_t);
+extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
+    char *);
+extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
+    uint64_t);
+extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);
+
+/*
+ * Miscellaneous pool functions
+ */
+struct zfs_cmd;
+
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
+extern int zpool_upgrade(zpool_handle_t *, uint64_t);
+extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
+extern void zpool_set_history_str(const char *subcommand, int argc,
+    char **argv, char *history_str);
+extern int zpool_stage_history(libzfs_handle_t *, const char *);
+extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
+    size_t len);
+extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
+extern int zpool_get_physpath(zpool_handle_t *, char *);
+/*
+ * Basic handle manipulations.  These functions do not create or destroy the
+ * underlying datasets, only the references to them.
+ */
+extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
+extern void zfs_close(zfs_handle_t *);
+extern zfs_type_t zfs_get_type(const zfs_handle_t *);
+extern const char *zfs_get_name(const zfs_handle_t *);
+extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
+
+/*
+ * Property management functions.  Some functions are shared with the kernel,
+ * and are found in sys/fs/zfs.h.
+ */
+
+/*
+ * zfs dataset property management
+ */
+extern const char *zfs_prop_default_string(zfs_prop_t);
+extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
+extern const char *zfs_prop_column_name(zfs_prop_t);
+extern boolean_t zfs_prop_align_right(zfs_prop_t);
+
+extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
+    nvlist_t *, uint64_t, zfs_handle_t *, const char *);
+
+extern const char *zfs_prop_to_name(zfs_prop_t);
+extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
+extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
+    zprop_source_t *, char *, size_t, boolean_t);
+extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
+    zprop_source_t *, char *, size_t);
+extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
+extern int zfs_prop_inherit(zfs_handle_t *, const char *);
+extern const char *zfs_prop_values(zfs_prop_t);
+extern int zfs_prop_is_string(zfs_prop_t prop);
+extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
+
+typedef struct zprop_list {
+	int		pl_prop;
+	char		*pl_user_prop;
+	struct zprop_list *pl_next;
+	boolean_t	pl_all;
+	size_t		pl_width;
+	boolean_t	pl_fixed;
+} zprop_list_t;
+
+extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
+
+#define	ZFS_MOUNTPOINT_NONE	"none"
+#define	ZFS_MOUNTPOINT_LEGACY	"legacy"
+
+/*
+ * zpool property management
+ */
+extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
+extern const char *zpool_prop_default_string(zpool_prop_t);
+extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
+extern const char *zpool_prop_column_name(zpool_prop_t);
+extern boolean_t zpool_prop_align_right(zpool_prop_t);
+
+/*
+ * Functions shared by zfs and zpool property management.
+ */
+extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
+    boolean_t ordered, zfs_type_t type);
+extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
+    zfs_type_t);
+extern void zprop_free_list(zprop_list_t *);
+
+/*
+ * Functions for printing zfs or zpool properties
+ */
+typedef struct zprop_get_cbdata {
+	int cb_sources;
+	int cb_columns[4];
+	int cb_colwidths[5];
+	boolean_t cb_scripted;
+	boolean_t cb_literal;
+	boolean_t cb_first;
+	zprop_list_t *cb_proplist;
+	zfs_type_t cb_type;
+} zprop_get_cbdata_t;
+
+void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
+    const char *, const char *, zprop_source_t, const char *);
+
+#define	GET_COL_NAME		1
+#define	GET_COL_PROPERTY	2
+#define	GET_COL_VALUE		3
+#define	GET_COL_SOURCE		4
+
+/*
+ * Iterator functions.
+ */
+typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
+extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
+extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
+
+/*
+ * Functions to create and destroy datasets.
+ */
+extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
+    nvlist_t *);
+extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
+extern int zfs_destroy(zfs_handle_t *);
+extern int zfs_destroy_snaps(zfs_handle_t *, char *);
+extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
+extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
+extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
+extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
+extern int zfs_send(zfs_handle_t *, const char *, const char *,
+    boolean_t, boolean_t, boolean_t, boolean_t, int);
+extern int zfs_promote(zfs_handle_t *);
+
+typedef struct recvflags {
+	/* print informational messages (ie, -v was specified) */
+	int verbose : 1;
+
+	/* the destination is a prefix, not the exact fs (ie, -d) */
+	int isprefix : 1;
+
+	/* do not actually do the recv, just check if it would work (ie, -n) */
+	int dryrun : 1;
+
+	/* rollback/destroy filesystems as necessary (eg, -F) */
+	int force : 1;
+
+	/* set "canmount=off" on all modified filesystems */
+	int canmountoff : 1;
+
+	/* byteswap flag is used internally; callers need not specify */
+	int byteswap : 1;
+} recvflags_t;
+
+extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t,
+    int, avl_tree_t *);
+
+/*
+ * Miscellaneous functions.
+ */
+extern const char *zfs_type_to_name(zfs_type_t);
+extern void zfs_refresh_properties(zfs_handle_t *);
+extern int zfs_name_valid(const char *, zfs_type_t);
+extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
+    zfs_type_t);
+extern int zfs_spa_version(zfs_handle_t *, int *);
+
+/*
+ * dataset permission functions.
+ */
+extern int zfs_perm_set(zfs_handle_t *, nvlist_t *);
+extern int zfs_perm_remove(zfs_handle_t *, nvlist_t *);
+extern int zfs_build_perms(zfs_handle_t *, char *, char *,
+    zfs_deleg_who_type_t, zfs_deleg_inherit_t, nvlist_t **nvlist_t);
+extern int zfs_perm_get(zfs_handle_t *, zfs_allow_t **);
+extern void zfs_free_allows(zfs_allow_t *);
+extern void zfs_deleg_permissions(void);
+
+/*
+ * Mount support functions.
+ */
+extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
+extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
+extern int zfs_mount(zfs_handle_t *, const char *, int);
+extern int zfs_unmount(zfs_handle_t *, const char *, int);
+extern int zfs_unmountall(zfs_handle_t *, int);
+
+/*
+ * Share support functions.
+ */
+extern boolean_t zfs_is_shared(zfs_handle_t *);
+extern int zfs_share(zfs_handle_t *);
+extern int zfs_unshare(zfs_handle_t *);
+
+/*
+ * Protocol-specific share support functions.
+ */
+extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
+extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
+extern int zfs_share_nfs(zfs_handle_t *);
+extern int zfs_share_smb(zfs_handle_t *);
+extern int zfs_shareall(zfs_handle_t *);
+extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
+extern int zfs_unshare_smb(zfs_handle_t *, const char *);
+extern int zfs_unshareall_nfs(zfs_handle_t *);
+extern int zfs_unshareall_smb(zfs_handle_t *);
+extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
+extern int zfs_unshareall(zfs_handle_t *);
+extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
+extern int zfs_share_iscsi(zfs_handle_t *);
+extern int zfs_unshare_iscsi(zfs_handle_t *);
+extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
+extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *,
+    void *, void *, int, zfs_share_op_t);
+
+/*
+ * When dealing with nvlists, verify() is extremely useful
+ */
+#ifdef NDEBUG
+#define	verify(EX)	((void)(EX))
+#else
+#define	verify(EX)	assert(EX)
+#endif
+
+/*
+ * Utility function to convert a number to a human-readable form.
+ */
+extern void zfs_nicenum(uint64_t, char *, size_t);
+extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
+
+/*
+ * Given a device or file, determine if it is part of a pool.
+ */
+extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
+    boolean_t *);
+
+/*
+ * ftyp special.  Read the label from a given device.
+ */
+extern int zpool_read_label(int, nvlist_t **);
+
+/*
+ * Create and remove zvol /dev links.
+ */
+extern int zpool_create_zvol_links(zpool_handle_t *);
+extern int zpool_remove_zvol_links(zpool_handle_t *);
+
+/* is this zvol valid for use as a dump device? */
+extern int zvol_check_dump_config(char *);
+
+/*
+ * Enable and disable datasets within a pool by mounting/unmounting and
+ * sharing/unsharing them.
+ */
+extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
+extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBZFS_H */
diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h
new file mode 100644
index 000000000..9f1f66d51
--- /dev/null
+++ b/lib/libzfs/include/libzfs_impl.h
@@ -0,0 +1,193 @@
+/*
+ * CDDL HEADER SART
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBFS_IMPL_H
+#define	_LIBFS_IMPL_H
+
+#include <sys/dmu.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_acl.h>
+#include <sys/spa.h>
+#include <sys/nvpair.h>
+
+#include <libuutil.h>
+#include <libzfs.h>
+#include <libshare.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef	VERIFY
+#undef	VERIFY
+#endif
+#define	VERIFY	verify
+
+struct libzfs_handle {
+	int libzfs_error;
+	int libzfs_fd;
+	FILE *libzfs_mnttab;
+	FILE *libzfs_sharetab;
+	zpool_handle_t *libzfs_pool_handles;
+	uu_avl_pool_t *libzfs_ns_avlpool;
+	uu_avl_t *libzfs_ns_avl;
+	uint64_t libzfs_ns_gen;
+	int libzfs_desc_active;
+	char libzfs_action[1024];
+	char libzfs_desc[1024];
+	char *libzfs_log_str;
+	int libzfs_printerr;
+	void *libzfs_sharehdl; /* libshare handle */
+	uint_t libzfs_shareflags;
+};
+#define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */
+
+struct zfs_handle {
+	libzfs_handle_t *zfs_hdl;
+	zpool_handle_t *zpool_hdl;
+	char zfs_name[ZFS_MAXNAMELEN];
+	zfs_type_t zfs_type; /* type including snapshot */
+	zfs_type_t zfs_head_type; /* type excluding snapshot */
+	dmu_objset_stats_t zfs_dmustats;
+	nvlist_t *zfs_props;
+	nvlist_t *zfs_user_props;
+	boolean_t zfs_mntcheck;
+	char *zfs_mntopts;
+};
+
+/*
+ * This is different from checking zfs_type, because it will also catch
+ * snapshots of volumes.
+ */
+#define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
+
+struct zpool_handle {
+	libzfs_handle_t *zpool_hdl;
+	zpool_handle_t *zpool_next;
+	char zpool_name[ZPOOL_MAXNAMELEN];
+	int zpool_state;
+	size_t zpool_config_size;
+	nvlist_t *zpool_config;
+	nvlist_t *zpool_old_config;
+	nvlist_t *zpool_props;
+	diskaddr_t zpool_start_block;
+};
+
+typedef  enum {
+	PROTO_NFS = 0,
+	PROTO_SMB = 1,
+	PROTO_END = 2
+} zfs_share_proto_t;
+
+/*
+ * The following can be used as a bitmask and any new values
+ * added must preserve that capability.
+ */
+typedef enum {
+	SHARED_NOT_SHARED = 0x0,
+	SHARED_ISCSI = 0x1,
+	SHARED_NFS = 0x2,
+	SHARED_SMB = 0x4
+} zfs_share_type_t;
+
+int zfs_error(libzfs_handle_t *, int, const char *);
+int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
+void zfs_error_aux(libzfs_handle_t *, const char *, ...);
+void *zfs_alloc(libzfs_handle_t *, size_t);
+void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
+char *zfs_strdup(libzfs_handle_t *, const char *);
+int no_memory(libzfs_handle_t *);
+
+int zfs_standard_error(libzfs_handle_t *, int, const char *);
+int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+int zpool_standard_error(libzfs_handle_t *, int, const char *);
+int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+
+int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
+    size_t *);
+
+
+int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
+    nvlist_t *, char **, uint64_t *, const char *);
+int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp,
+    zfs_type_t type);
+
+/*
+ * Use this changelist_gather() flag to force attempting mounts
+ * on each change node regardless of whether or not it is currently
+ * mounted.
+ */
+#define	CL_GATHER_MOUNT_ALWAYS	1
+
+typedef struct prop_changelist prop_changelist_t;
+
+int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
+int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
+int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
+int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
+int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
+void zcmd_free_nvlists(zfs_cmd_t *);
+
+int changelist_prefix(prop_changelist_t *);
+int changelist_postfix(prop_changelist_t *);
+void changelist_rename(prop_changelist_t *, const char *, const char *);
+void changelist_remove(prop_changelist_t *, const char *);
+void changelist_free(prop_changelist_t *);
+prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int);
+int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *);
+int changelist_haszonedchild(prop_changelist_t *);
+
+void remove_mountpoint(zfs_handle_t *);
+int create_parents(libzfs_handle_t *, char *, int);
+boolean_t isa_child_of(const char *dataset, const char *parent);
+
+zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
+
+int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
+
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
+int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
+boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
+
+void namespace_clear(libzfs_handle_t *);
+
+/*
+ * libshare (sharemgr) interfaces used internally.
+ */
+
+extern int zfs_init_libshare(libzfs_handle_t *, int);
+extern void zfs_uninit_libshare(libzfs_handle_t *);
+extern int zfs_parse_options(char *, zfs_share_proto_t);
+
+extern int zfs_unshare_proto(zfs_handle_t *zhp,
+    const char *, zfs_share_proto_t *);
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBFS_IMPL_H */
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
new file mode 100644
index 000000000..8823b2e69
--- /dev/null
+++ b/lib/libzfs/libzfs_changelist.c
@@ -0,0 +1,717 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Portions Copyright 2007 Ramprakash Jelari
+ */
+
+#include <libintl.h>
+#include <libuutil.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <zone.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+/*
+ * Structure to keep track of dataset state.  Before changing the 'sharenfs' or
+ * 'mountpoint' property, we record whether the filesystem was previously
+ * mounted/shared.  This prior state dictates whether we remount/reshare the
+ * dataset after the property has been changed.
+ *
+ * The interface consists of the following sequence of functions:
+ *
+ * 	changelist_gather()
+ * 	changelist_prefix()
+ * 	< change property >
+ * 	changelist_postfix()
+ * 	changelist_free()
+ *
+ * Other interfaces:
+ *
+ * changelist_remove() - remove a node from a gathered list
+ * changelist_rename() - renames all datasets appropriately when doing a rename
+ * changelist_unshare() - unshares all the nodes in a given changelist
+ * changelist_haszonedchild() - check if there is any child exported to
+ *				a local zone
+ */
+typedef struct prop_changenode {
+	zfs_handle_t		*cn_handle;
+	int			cn_shared;
+	int			cn_mounted;
+	int			cn_zoned;
+	boolean_t		cn_needpost;	/* is postfix() needed? */
+	uu_list_node_t		cn_listnode;
+} prop_changenode_t;
+
+struct prop_changelist {
+	zfs_prop_t		cl_prop;
+	zfs_prop_t		cl_realprop;
+	zfs_prop_t		cl_shareprop;  /* used with sharenfs/sharesmb */
+	uu_list_pool_t		*cl_pool;
+	uu_list_t		*cl_list;
+	boolean_t		cl_waslegacy;
+	boolean_t		cl_allchildren;
+	boolean_t		cl_alldependents;
+	int			cl_mflags;	/* Mount flags */
+	int			cl_gflags;	/* Gather request flags */
+	boolean_t		cl_haszonedchild;
+	boolean_t		cl_sorted;
+};
+
+/*
+ * If the property is 'mountpoint', go through and unmount filesystems as
+ * necessary.  We don't do the same for 'sharenfs', because we can just re-share
+ * with different options without interrupting service. We do handle 'sharesmb'
+ * since there may be old resource names that need to be removed.
+ */
+int
+changelist_prefix(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	int ret = 0;
+
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
+		return (0);
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+
+		/* if a previous loop failed, set the remaining to false */
+		if (ret == -1) {
+			cn->cn_needpost = B_FALSE;
+			continue;
+		}
+
+		/*
+		 * If we are in the global zone, but this dataset is exported
+		 * to a local zone, do nothing.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			continue;
+
+		if (ZFS_IS_VOLUME(cn->cn_handle)) {
+			switch (clp->cl_realprop) {
+			case ZFS_PROP_NAME:
+				/*
+				 * If this was a rename, unshare the zvol, and
+				 * remove the /dev/zvol links.
+				 */
+				(void) zfs_unshare_iscsi(cn->cn_handle);
+
+				if (zvol_remove_link(cn->cn_handle->zfs_hdl,
+				    cn->cn_handle->zfs_name) != 0) {
+					ret = -1;
+					cn->cn_needpost = B_FALSE;
+					(void) zfs_share_iscsi(cn->cn_handle);
+				}
+				break;
+
+			case ZFS_PROP_VOLSIZE:
+				/*
+				 * If this was a change to the volume size, we
+				 * need to unshare and reshare the volume.
+				 */
+				(void) zfs_unshare_iscsi(cn->cn_handle);
+				break;
+			default:
+				break;
+			}
+		} else {
+			/*
+			 * Do the property specific processing.
+			 */
+			switch (clp->cl_prop) {
+			case ZFS_PROP_MOUNTPOINT:
+				if (zfs_unmount(cn->cn_handle, NULL,
+				    clp->cl_mflags) != 0) {
+					ret = -1;
+					cn->cn_needpost = B_FALSE;
+				}
+				break;
+			case ZFS_PROP_SHARESMB:
+				(void) zfs_unshare_smb(cn->cn_handle, NULL);
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	if (ret == -1)
+		(void) changelist_postfix(clp);
+
+	return (ret);
+}
+
+/*
+ * If the property is 'mountpoint' or 'sharenfs', go through and remount and/or
+ * reshare the filesystems as necessary.  In changelist_gather() we recorded
+ * whether the filesystem was previously shared or mounted.  The action we take
+ * depends on the previous state, and whether the value was previously 'legacy'.
+ * For non-legacy properties, we only remount/reshare the filesystem if it was
+ * previously mounted/shared.  Otherwise, we always remount/reshare the
+ * filesystem.
+ */
+int
+changelist_postfix(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	char shareopts[ZFS_MAXPROPLEN];
+	int errors = 0;
+	libzfs_handle_t *hdl;
+
+	/*
+	 * If we're changing the mountpoint, attempt to destroy the underlying
+	 * mountpoint.  All other datasets will have inherited from this dataset
+	 * (in which case their mountpoints exist in the filesystem in the new
+	 * location), or have explicit mountpoints set (in which case they won't
+	 * be in the changelist).
+	 */
+	if ((cn = uu_list_last(clp->cl_list)) == NULL)
+		return (0);
+
+	if (clp->cl_prop == ZFS_PROP_MOUNTPOINT)
+		remove_mountpoint(cn->cn_handle);
+
+	/*
+	 * It is possible that the changelist_prefix() used libshare
+	 * to unshare some entries. Since libshare caches data, an
+	 * attempt to reshare during postfix can fail unless libshare
+	 * is uninitialized here so that it will reinitialize later.
+	 */
+	if (cn->cn_handle != NULL) {
+		hdl = cn->cn_handle->zfs_hdl;
+		assert(hdl != NULL);
+		zfs_uninit_libshare(hdl);
+	}
+
+	/*
+	 * We walk the datasets in reverse, because we want to mount any parent
+	 * datasets before mounting the children.  We walk all datasets even if
+	 * there are errors.
+	 */
+	for (cn = uu_list_last(clp->cl_list); cn != NULL;
+	    cn = uu_list_prev(clp->cl_list, cn)) {
+
+		boolean_t sharenfs;
+		boolean_t sharesmb;
+
+		/*
+		 * If we are in the global zone, but this dataset is exported
+		 * to a local zone, do nothing.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			continue;
+
+		/* Only do post-processing if it's required */
+		if (!cn->cn_needpost)
+			continue;
+		cn->cn_needpost = B_FALSE;
+
+		zfs_refresh_properties(cn->cn_handle);
+
+		if (ZFS_IS_VOLUME(cn->cn_handle)) {
+			/*
+			 * If we're doing a rename, recreate the /dev/zvol
+			 * links.
+			 */
+			if (clp->cl_realprop == ZFS_PROP_NAME &&
+			    zvol_create_link(cn->cn_handle->zfs_hdl,
+			    cn->cn_handle->zfs_name) != 0) {
+				errors++;
+			} else if (cn->cn_shared ||
+			    clp->cl_prop == ZFS_PROP_SHAREISCSI) {
+				if (zfs_prop_get(cn->cn_handle,
+				    ZFS_PROP_SHAREISCSI, shareopts,
+				    sizeof (shareopts), NULL, NULL, 0,
+				    B_FALSE) == 0 &&
+				    strcmp(shareopts, "off") == 0) {
+					errors +=
+					    zfs_unshare_iscsi(cn->cn_handle);
+				} else {
+					errors +=
+					    zfs_share_iscsi(cn->cn_handle);
+				}
+			}
+
+			continue;
+		}
+
+		/*
+		 * Remount if previously mounted or mountpoint was legacy,
+		 * or sharenfs or sharesmb  property is set.
+		 */
+		sharenfs = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
+		    shareopts, sizeof (shareopts), NULL, NULL, 0,
+		    B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+
+		sharesmb = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARESMB,
+		    shareopts, sizeof (shareopts), NULL, NULL, 0,
+		    B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+
+		if ((cn->cn_mounted || clp->cl_waslegacy || sharenfs ||
+		    sharesmb) && !zfs_is_mounted(cn->cn_handle, NULL) &&
+		    zfs_mount(cn->cn_handle, NULL, 0) != 0)
+			errors++;
+
+		/*
+		 * We always re-share even if the filesystem is currently
+		 * shared, so that we can adopt any new options.
+		 */
+		if (sharenfs)
+			errors += zfs_share_nfs(cn->cn_handle);
+		else if (cn->cn_shared || clp->cl_waslegacy)
+			errors += zfs_unshare_nfs(cn->cn_handle, NULL);
+		if (sharesmb)
+			errors += zfs_share_smb(cn->cn_handle);
+		else if (cn->cn_shared || clp->cl_waslegacy)
+			errors += zfs_unshare_smb(cn->cn_handle, NULL);
+	}
+
+	return (errors ? -1 : 0);
+}
+
+/*
+ * Is this "dataset" a child of "parent"?
+ */
+boolean_t
+isa_child_of(const char *dataset, const char *parent)
+{
+	int len;
+
+	len = strlen(parent);
+
+	if (strncmp(dataset, parent, len) == 0 &&
+	    (dataset[len] == '@' || dataset[len] == '/' ||
+	    dataset[len] == '\0'))
+		return (B_TRUE);
+	else
+		return (B_FALSE);
+
+}
+
+/*
+ * If we rename a filesystem, child filesystem handles are no longer valid
+ * since we identify each dataset by its name in the ZFS namespace.  As a
+ * result, we have to go through and fix up all the names appropriately.  We
+ * could do this automatically if libzfs kept track of all open handles, but
+ * this is a lot less work.
+ */
+void
+changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
+{
+	prop_changenode_t *cn;
+	char newname[ZFS_MAXNAMELEN];
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		/*
+		 * Do not rename a clone that's not in the source hierarchy.
+		 */
+		if (!isa_child_of(cn->cn_handle->zfs_name, src))
+			continue;
+
+		/*
+		 * Destroy the previous mountpoint if needed.
+		 */
+		remove_mountpoint(cn->cn_handle);
+
+		(void) strlcpy(newname, dst, sizeof (newname));
+		(void) strcat(newname, cn->cn_handle->zfs_name + strlen(src));
+
+		(void) strlcpy(cn->cn_handle->zfs_name, newname,
+		    sizeof (cn->cn_handle->zfs_name));
+	}
+}
+
+/*
+ * Given a gathered changelist for the 'sharenfs' or 'sharesmb' property,
+ * unshare all the datasets in the list.
+ */
+int
+changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
+{
+	prop_changenode_t *cn;
+	int ret = 0;
+
+	if (clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
+		return (0);
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		if (zfs_unshare_proto(cn->cn_handle, NULL, proto) != 0)
+			ret = -1;
+	}
+
+	return (ret);
+}
+
+/*
+ * Check if there is any child exported to a local zone in a given changelist.
+ * This information has already been recorded while gathering the changelist
+ * via changelist_gather().
+ */
+int
+changelist_haszonedchild(prop_changelist_t *clp)
+{
+	return (clp->cl_haszonedchild);
+}
+
+/*
+ * Remove a node from a gathered list.
+ */
+void
+changelist_remove(prop_changelist_t *clp, const char *name)
+{
+	prop_changenode_t *cn;
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+
+		if (strcmp(cn->cn_handle->zfs_name, name) == 0) {
+			uu_list_remove(clp->cl_list, cn);
+			zfs_close(cn->cn_handle);
+			free(cn);
+			return;
+		}
+	}
+}
+
+/*
+ * Release any memory associated with a changelist.
+ */
+void
+changelist_free(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	void *cookie;
+
+	if (clp->cl_list) {
+		cookie = NULL;
+		while ((cn = uu_list_teardown(clp->cl_list, &cookie)) != NULL) {
+			zfs_close(cn->cn_handle);
+			free(cn);
+		}
+
+		uu_list_destroy(clp->cl_list);
+	}
+	if (clp->cl_pool)
+		uu_list_pool_destroy(clp->cl_pool);
+
+	free(clp);
+}
+
+static int
+change_one(zfs_handle_t *zhp, void *data)
+{
+	prop_changelist_t *clp = data;
+	char property[ZFS_MAXPROPLEN];
+	char where[64];
+	prop_changenode_t *cn;
+	zprop_source_t sourcetype;
+	zprop_source_t share_sourcetype;
+
+	/*
+	 * We only want to unmount/unshare those filesystems that may inherit
+	 * from the target filesystem.  If we find any filesystem with a
+	 * locally set mountpoint, we ignore any children since changing the
+	 * property will not affect them.  If this is a rename, we iterate
+	 * over all children regardless, since we need them unmounted in
+	 * order to do the rename.  Also, if this is a volume and we're doing
+	 * a rename, then always add it to the changelist.
+	 */
+
+	if (!(ZFS_IS_VOLUME(zhp) && clp->cl_realprop == ZFS_PROP_NAME) &&
+	    zfs_prop_get(zhp, clp->cl_prop, property,
+	    sizeof (property), &sourcetype, where, sizeof (where),
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/*
+	 * If we are "watching" sharenfs or sharesmb
+	 * then check out the companion property which is tracked
+	 * in cl_shareprop
+	 */
+	if (clp->cl_shareprop != ZPROP_INVAL &&
+	    zfs_prop_get(zhp, clp->cl_shareprop, property,
+	    sizeof (property), &share_sourcetype, where, sizeof (where),
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (clp->cl_alldependents || clp->cl_allchildren ||
+	    sourcetype == ZPROP_SRC_DEFAULT ||
+	    sourcetype == ZPROP_SRC_INHERITED ||
+	    (clp->cl_shareprop != ZPROP_INVAL &&
+	    (share_sourcetype == ZPROP_SRC_DEFAULT ||
+	    share_sourcetype == ZPROP_SRC_INHERITED))) {
+		if ((cn = zfs_alloc(zfs_get_handle(zhp),
+		    sizeof (prop_changenode_t))) == NULL) {
+			zfs_close(zhp);
+			return (-1);
+		}
+
+		cn->cn_handle = zhp;
+		cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) ||
+		    zfs_is_mounted(zhp, NULL);
+		cn->cn_shared = zfs_is_shared(zhp);
+		cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+		cn->cn_needpost = B_TRUE;
+
+		/* Indicate if any child is exported to a local zone. */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			clp->cl_haszonedchild = B_TRUE;
+
+		uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
+
+		if (clp->cl_sorted) {
+			uu_list_index_t idx;
+
+			(void) uu_list_find(clp->cl_list, cn, NULL,
+			    &idx);
+			uu_list_insert(clp->cl_list, cn, idx);
+		} else {
+			ASSERT(!clp->cl_alldependents);
+			verify(uu_list_insert_before(clp->cl_list,
+			    uu_list_first(clp->cl_list), cn) == 0);
+		}
+
+		if (!clp->cl_alldependents)
+			return (zfs_iter_children(zhp, change_one, data));
+	} else {
+		zfs_close(zhp);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+compare_mountpoints(const void *a, const void *b, void *unused)
+{
+	const prop_changenode_t *ca = a;
+	const prop_changenode_t *cb = b;
+
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+
+	boolean_t hasmounta, hasmountb;
+
+	/*
+	 * When unsharing or unmounting filesystems, we need to do it in
+	 * mountpoint order.  This allows the user to have a mountpoint
+	 * hierarchy that is different from the dataset hierarchy, and still
+	 * allow it to be changed.  However, if either dataset doesn't have a
+	 * mountpoint (because it is a volume or a snapshot), we place it at the
+	 * end of the list, because it doesn't affect our change at all.
+	 */
+	hasmounta = (zfs_prop_get(ca->cn_handle, ZFS_PROP_MOUNTPOINT, mounta,
+	    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	hasmountb = (zfs_prop_get(cb->cn_handle, ZFS_PROP_MOUNTPOINT, mountb,
+	    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (!hasmounta && hasmountb)
+		return (-1);
+	else if (hasmounta && !hasmountb)
+		return (1);
+	else if (!hasmounta && !hasmountb)
+		return (0);
+	else
+		return (strcmp(mountb, mounta));
+}
+
+/*
+ * Given a ZFS handle and a property, construct a complete list of datasets
+ * that need to be modified as part of this process.  For anything but the
+ * 'mountpoint' and 'sharenfs' properties, this just returns an empty list.
+ * Otherwise, we iterate over all children and look for any datasets that
+ * inherit the property.  For each such dataset, we add it to the list and
+ * mark whether it was shared beforehand.
+ */
+prop_changelist_t *
+changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
+    int mnt_flags)
+{
+	prop_changelist_t *clp;
+	prop_changenode_t *cn;
+	zfs_handle_t *temp;
+	char property[ZFS_MAXPROPLEN];
+	uu_compare_fn_t *compare = NULL;
+
+	if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
+		return (NULL);
+
+	/*
+	 * For mountpoint-related tasks, we want to sort everything by
+	 * mountpoint, so that we mount and unmount them in the appropriate
+	 * order, regardless of their position in the hierarchy.
+	 */
+	if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
+	    prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS ||
+	    prop == ZFS_PROP_SHARESMB) {
+		compare = compare_mountpoints;
+		clp->cl_sorted = B_TRUE;
+	}
+
+	clp->cl_pool = uu_list_pool_create("changelist_pool",
+	    sizeof (prop_changenode_t),
+	    offsetof(prop_changenode_t, cn_listnode),
+	    compare, 0);
+	if (clp->cl_pool == NULL) {
+		assert(uu_error() == UU_ERROR_NO_MEMORY);
+		(void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	clp->cl_list = uu_list_create(clp->cl_pool, NULL,
+	    clp->cl_sorted ? UU_LIST_SORTED : 0);
+	clp->cl_gflags = gather_flags;
+	clp->cl_mflags = mnt_flags;
+
+	if (clp->cl_list == NULL) {
+		assert(uu_error() == UU_ERROR_NO_MEMORY);
+		(void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * If this is a rename or the 'zoned' property, we pretend we're
+	 * changing the mountpoint and flag it so we can catch all children in
+	 * change_one().
+	 *
+	 * Flag cl_alldependents to catch all children plus the dependents
+	 * (clones) that are not in the hierarchy.
+	 */
+	if (prop == ZFS_PROP_NAME) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+		clp->cl_alldependents = B_TRUE;
+	} else if (prop == ZFS_PROP_ZONED) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+		clp->cl_allchildren = B_TRUE;
+	} else if (prop == ZFS_PROP_CANMOUNT) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else if (prop == ZFS_PROP_VOLSIZE) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else if (prop == ZFS_PROP_VERSION) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else {
+		clp->cl_prop = prop;
+	}
+	clp->cl_realprop = prop;
+
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
+	    clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB &&
+	    clp->cl_prop != ZFS_PROP_SHAREISCSI)
+		return (clp);
+
+	/*
+	 * If watching SHARENFS or SHARESMB then
+	 * also watch its companion property.
+	 */
+	if (clp->cl_prop == ZFS_PROP_SHARENFS)
+		clp->cl_shareprop = ZFS_PROP_SHARESMB;
+	else if (clp->cl_prop == ZFS_PROP_SHARESMB)
+		clp->cl_shareprop = ZFS_PROP_SHARENFS;
+
+	if (clp->cl_alldependents) {
+		if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) {
+			changelist_free(clp);
+			return (NULL);
+		}
+	} else if (zfs_iter_children(zhp, change_one, clp) != 0) {
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * We have to re-open ourselves because we auto-close all the handles
+	 * and can't tell the difference.
+	 */
+	if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
+	    ZFS_TYPE_DATASET)) == NULL) {
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * Always add ourself to the list.  We add ourselves to the end so that
+	 * we're the last to be unmounted.
+	 */
+	if ((cn = zfs_alloc(zhp->zfs_hdl,
+	    sizeof (prop_changenode_t))) == NULL) {
+		zfs_close(temp);
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	cn->cn_handle = temp;
+	cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) ||
+	    zfs_is_mounted(temp, NULL);
+	cn->cn_shared = zfs_is_shared(temp);
+	cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	cn->cn_needpost = B_TRUE;
+
+	uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
+	if (clp->cl_sorted) {
+		uu_list_index_t idx;
+		(void) uu_list_find(clp->cl_list, cn, NULL, &idx);
+		uu_list_insert(clp->cl_list, cn, idx);
+	} else {
+		verify(uu_list_insert_after(clp->cl_list,
+		    uu_list_last(clp->cl_list), cn) == 0);
+	}
+
+	/*
+	 * If the mountpoint property was previously 'legacy', or 'none',
+	 * record it as the behavior of changelist_postfix() will be different.
+	 */
+	if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) &&
+	    (zfs_prop_get(zhp, prop, property, sizeof (property),
+	    NULL, NULL, 0, B_FALSE) == 0 &&
+	    (strcmp(property, "legacy") == 0 ||
+	    strcmp(property, "none") == 0))) {
+		/*
+		 * do not automatically mount ex-legacy datasets if
+		 * we specifically set canmount to noauto
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) !=
+		    ZFS_CANMOUNT_NOAUTO)
+			clp->cl_waslegacy = B_TRUE;
+	}
+
+	return (clp);
+}
diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c
new file mode 100644
index 000000000..781153225
--- /dev/null
+++ b/lib/libzfs/libzfs_config.c
@@ -0,0 +1,360 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
+ * single packed nvlist.  While it would be nice to just read in this
+ * file from userland, this wouldn't work from a local zone.  So we have to have
+ * a zpool ioctl to return the complete configuration for all pools.  In the
+ * global zone, this will be identical to reading the file and unpacking it in
+ * userland.
+ */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <libuutil.h>
+
+#include "libzfs_impl.h"
+
+typedef struct config_node {
+	char		*cn_name;
+	nvlist_t	*cn_config;
+	uu_avl_node_t	cn_avl;
+} config_node_t;
+
+/* ARGSUSED */
+static int
+config_node_compare(const void *a, const void *b, void *unused)
+{
+	int ret;
+
+	const config_node_t *ca = (config_node_t *)a;
+	const config_node_t *cb = (config_node_t *)b;
+
+	ret = strcmp(ca->cn_name, cb->cn_name);
+
+	if (ret < 0)
+		return (-1);
+	else if (ret > 0)
+		return (1);
+	else
+		return (0);
+}
+
+void
+namespace_clear(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_ns_avl) {
+		config_node_t *cn;
+		void *cookie = NULL;
+
+		while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl,
+		    &cookie)) != NULL) {
+			nvlist_free(cn->cn_config);
+			free(cn->cn_name);
+			free(cn);
+		}
+
+		uu_avl_destroy(hdl->libzfs_ns_avl);
+		hdl->libzfs_ns_avl = NULL;
+	}
+
+	if (hdl->libzfs_ns_avlpool) {
+		uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
+		hdl->libzfs_ns_avlpool = NULL;
+	}
+}
+
+/*
+ * Loads the pool namespace, or re-loads it if the cache has changed.
+ */
+static int
+namespace_reload(libzfs_handle_t *hdl)
+{
+	nvlist_t *config;
+	config_node_t *cn;
+	nvpair_t *elem;
+	zfs_cmd_t zc = { 0 };
+	void *cookie;
+
+	if (hdl->libzfs_ns_gen == 0) {
+		/*
+		 * This is the first time we've accessed the configuration
+		 * cache.  Initialize the AVL tree and then fall through to the
+		 * common code.
+		 */
+		if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
+		    sizeof (config_node_t),
+		    offsetof(config_node_t, cn_avl),
+		    config_node_compare, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
+
+		if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
+		    NULL, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
+	}
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 32768) != 0)
+		return (-1);
+
+	for (;;) {
+		zc.zc_cookie = hdl->libzfs_ns_gen;
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
+			switch (errno) {
+			case EEXIST:
+				/*
+				 * The namespace hasn't changed.
+				 */
+				zcmd_free_nvlists(&zc);
+				return (0);
+
+			case ENOMEM:
+				if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+					zcmd_free_nvlists(&zc);
+					return (-1);
+				}
+				break;
+
+			default:
+				zcmd_free_nvlists(&zc);
+				return (zfs_standard_error(hdl, errno,
+				    dgettext(TEXT_DOMAIN, "failed to read "
+				    "pool configuration")));
+			}
+		} else {
+			hdl->libzfs_ns_gen = zc.zc_cookie;
+			break;
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/*
+	 * Clear out any existing configuration information.
+	 */
+	cookie = NULL;
+	while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) {
+		nvlist_free(cn->cn_config);
+		free(cn->cn_name);
+		free(cn);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
+		nvlist_t *child;
+		uu_avl_index_t where;
+
+		if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if ((cn->cn_name = zfs_strdup(hdl,
+		    nvpair_name(elem))) == NULL) {
+			free(cn);
+			nvlist_free(config);
+			return (-1);
+		}
+
+		verify(nvpair_value_nvlist(elem, &child) == 0);
+		if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
+			free(cn->cn_name);
+			free(cn);
+			nvlist_free(config);
+			return (no_memory(hdl));
+		}
+		verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
+		    == NULL);
+
+		uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
+	}
+
+	nvlist_free(config);
+	return (0);
+}
+
+/*
+ * Retrieve the configuration for the given pool.  The configuration is a nvlist
+ * describing the vdevs, as well as the statistics associated with each one.
+ */
+nvlist_t *
+zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig)
+{
+	if (oldconfig)
+		*oldconfig = zhp->zpool_old_config;
+	return (zhp->zpool_config);
+}
+
+/*
+ * Refresh the vdev statistics associated with the given pool.  This is used in
+ * iostat to show configuration changes and determine the delta from the last
+ * time the function was called.  This function can fail, in case the pool has
+ * been destroyed.
+ */
+int
+zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	nvlist_t *config;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	*missing = B_FALSE;
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+
+	if (zhp->zpool_config_size == 0)
+		zhp->zpool_config_size = 1 << 16;
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0)
+		return (-1);
+
+	for (;;) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
+		    &zc) == 0) {
+			/*
+			 * The real error is returned in the zc_cookie field.
+			 */
+			error = zc.zc_cookie;
+			break;
+		}
+
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			if (errno == ENOENT || errno == EINVAL)
+				*missing = B_TRUE;
+			zhp->zpool_state = POOL_STATE_UNAVAIL;
+			return (0);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	zhp->zpool_config_size = zc.zc_nvlist_dst_size;
+
+	if (zhp->zpool_config != NULL) {
+		uint64_t oldtxg, newtxg;
+
+		verify(nvlist_lookup_uint64(zhp->zpool_config,
+		    ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0);
+		verify(nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0);
+
+		if (zhp->zpool_old_config != NULL)
+			nvlist_free(zhp->zpool_old_config);
+
+		if (oldtxg != newtxg) {
+			nvlist_free(zhp->zpool_config);
+			zhp->zpool_old_config = NULL;
+		} else {
+			zhp->zpool_old_config = zhp->zpool_config;
+		}
+	}
+
+	zhp->zpool_config = config;
+	if (error)
+		zhp->zpool_state = POOL_STATE_UNAVAIL;
+	else
+		zhp->zpool_state = POOL_STATE_ACTIVE;
+
+	return (0);
+}
+
+/*
+ * Iterate over all pools in the system.
+ */
+int
+zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
+{
+	config_node_t *cn;
+	zpool_handle_t *zhp;
+	int ret;
+
+	if (namespace_reload(hdl) != 0)
+		return (-1);
+
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
+
+		if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
+			return (-1);
+
+		if (zhp == NULL)
+			continue;
+
+		if ((ret = func(zhp, data)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * Iterate over root datasets, calling the given function for each.  The zfs
+ * handle passed each time must be explicitly closed by the callback.
+ */
+int
+zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
+{
+	config_node_t *cn;
+	zfs_handle_t *zhp;
+	int ret;
+
+	if (namespace_reload(hdl) != 0)
+		return (-1);
+
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
+
+		if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
+			continue;
+
+		if ((ret = func(zhp, data)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
new file mode 100644
index 000000000..3f49652ab
--- /dev/null
+++ b/lib/libzfs/libzfs_dataset.c
@@ -0,0 +1,4267 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libdevinfo.h>
+#include <libintl.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <zone.h>
+#include <fcntl.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/avl.h>
+#include <priv.h>
+#include <pwd.h>
+#include <grp.h>
+#include <stddef.h>
+#include <ucred.h>
+
+#include <sys/spa.h>
+#include <sys/zap.h>
+#include <libzfs.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+#include "zfs_deleg.h"
+
+static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
+
+/*
+ * Given a single type (not a mask of types), return the type in a human
+ * readable form.
+ */
+const char *
+zfs_type_to_name(zfs_type_t type)
+{
+	switch (type) {
+	case ZFS_TYPE_FILESYSTEM:
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+	case ZFS_TYPE_SNAPSHOT:
+		return (dgettext(TEXT_DOMAIN, "snapshot"));
+	case ZFS_TYPE_VOLUME:
+		return (dgettext(TEXT_DOMAIN, "volume"));
+	default:
+		break;
+	}
+
+	return (NULL);
+}
+
+/*
+ * Given a path and mask of ZFS types, return a string describing this dataset.
+ * This is used when we fail to open a dataset and we cannot get an exact type.
+ * We guess what the type would have been based on the path and the mask of
+ * acceptable types.
+ */
+static const char *
+path_to_str(const char *path, int types)
+{
+	/*
+	 * When given a single type, always report the exact type.
+	 */
+	if (types == ZFS_TYPE_SNAPSHOT)
+		return (dgettext(TEXT_DOMAIN, "snapshot"));
+	if (types == ZFS_TYPE_FILESYSTEM)
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+	if (types == ZFS_TYPE_VOLUME)
+		return (dgettext(TEXT_DOMAIN, "volume"));
+
+	/*
+	 * The user is requesting more than one type of dataset.  If this is the
+	 * case, consult the path itself.  If we're looking for a snapshot, and
+	 * a '@' is found, then report it as "snapshot".  Otherwise, remove the
+	 * snapshot attribute and try again.
+	 */
+	if (types & ZFS_TYPE_SNAPSHOT) {
+		if (strchr(path, '@') != NULL)
+			return (dgettext(TEXT_DOMAIN, "snapshot"));
+		return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
+	}
+
+
+	/*
+	 * The user has requested either filesystems or volumes.
+	 * We have no way of knowing a priori what type this would be, so always
+	 * report it as "filesystem" or "volume", our two primitive types.
+	 */
+	if (types & ZFS_TYPE_FILESYSTEM)
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+
+	assert(types & ZFS_TYPE_VOLUME);
+	return (dgettext(TEXT_DOMAIN, "volume"));
+}
+
+/*
+ * Validate a ZFS path.  This is used even before trying to open the dataset, to
+ * provide a more meaningful error message.  We place a more useful message in
+ * 'buf' detailing exactly why the name was not valid.
+ */
+static int
+zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
+    boolean_t modifying)
+{
+	namecheck_err_t why;
+	char what;
+
+	if (dataset_namecheck(path, &why, &what) != 0) {
+		if (hdl != NULL) {
+			switch (why) {
+			case NAME_ERR_TOOLONG:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is too long"));
+				break;
+
+			case NAME_ERR_LEADING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
+				break;
+
+			case NAME_ERR_EMPTY_COMPONENT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
+				break;
+
+			case NAME_ERR_TRAILING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
+				break;
+
+			case NAME_ERR_INVALCHAR:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "invalid character "
+				    "'%c' in name"), what);
+				break;
+
+			case NAME_ERR_MULTIPLE_AT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
+				break;
+
+			case NAME_ERR_NOLETTER:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool doesn't begin with a letter"));
+				break;
+
+			case NAME_ERR_RESERVED:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
+				break;
+
+			case NAME_ERR_DISKLIKE:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "reserved disk name"));
+				break;
+			default:
+				break;
+			}
+		}
+
+		return (0);
+	}
+
+	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot delimiter '@' in filesystem name"));
+		return (0);
+	}
+
+	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "missing '@' delimiter in snapshot name"));
+		return (0);
+	}
+
+	if (modifying && strchr(path, '%') != NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid character %c in name"), '%');
+		return (0);
+	}
+
+	return (-1);
+}
+
+int
+zfs_name_valid(const char *name, zfs_type_t type)
+{
+	if (type == ZFS_TYPE_POOL)
+		return (zpool_name_valid(NULL, B_FALSE, name));
+	return (zfs_validate_name(NULL, name, type, B_FALSE));
+}
+
+/*
+ * This function takes the raw DSL properties, and filters out the user-defined
+ * properties into a separate nvlist.
+ */
+static nvlist_t *
+process_user_props(zfs_handle_t *zhp, nvlist_t *props)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvpair_t *elem;
+	nvlist_t *propval;
+	nvlist_t *nvl;
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+		if (!zfs_prop_user(nvpair_name(elem)))
+			continue;
+
+		verify(nvpair_value_nvlist(elem, &propval) == 0);
+		if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) {
+			nvlist_free(nvl);
+			(void) no_memory(hdl);
+			return (NULL);
+		}
+	}
+
+	return (nvl);
+}
+
+static zpool_handle_t *
+zpool_add_handle(zfs_handle_t *zhp, const char *pool_name)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zpool_handle_t *zph;
+
+	if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) {
+		if (hdl->libzfs_pool_handles != NULL)
+			zph->zpool_next = hdl->libzfs_pool_handles;
+		hdl->libzfs_pool_handles = zph;
+	}
+	return (zph);
+}
+
+static zpool_handle_t *
+zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zpool_handle_t *zph = hdl->libzfs_pool_handles;
+
+	while ((zph != NULL) &&
+	    (strncmp(pool_name, zpool_get_name(zph), len) != 0))
+		zph = zph->zpool_next;
+	return (zph);
+}
+
+/*
+ * Returns a handle to the pool that contains the provided dataset.
+ * If a handle to that pool already exists then that handle is returned.
+ * Otherwise, a new handle is created and added to the list of handles.
+ */
+static zpool_handle_t *
+zpool_handle(zfs_handle_t *zhp)
+{
+	char *pool_name;
+	int len;
+	zpool_handle_t *zph;
+
+	len = strcspn(zhp->zfs_name, "/@") + 1;
+	pool_name = zfs_alloc(zhp->zfs_hdl, len);
+	(void) strlcpy(pool_name, zhp->zfs_name, len);
+
+	zph = zpool_find_handle(zhp, pool_name, len);
+	if (zph == NULL)
+		zph = zpool_add_handle(zhp, pool_name);
+
+	free(pool_name);
+	return (zph);
+}
+
+void
+zpool_free_handles(libzfs_handle_t *hdl)
+{
+	zpool_handle_t *next, *zph = hdl->libzfs_pool_handles;
+
+	while (zph != NULL) {
+		next = zph->zpool_next;
+		zpool_close(zph);
+		zph = next;
+	}
+	hdl->libzfs_pool_handles = NULL;
+}
+
+/*
+ * Utility function to gather stats (objset and zpl) for the given object.
+ */
+static int
+get_stats(zfs_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *allprops, *userprops;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &allprops) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
+		nvlist_free(allprops);
+		return (-1);
+	}
+
+	nvlist_free(zhp->zfs_props);
+	nvlist_free(zhp->zfs_user_props);
+
+	zhp->zfs_props = allprops;
+	zhp->zfs_user_props = userprops;
+
+	return (0);
+}
+
+/*
+ * Refresh the properties currently stored in the handle.
+ */
+void
+zfs_refresh_properties(zfs_handle_t *zhp)
+{
+	(void) get_stats(zhp);
+}
+
+/*
+ * Makes a handle from the given dataset name.  Used by zfs_open() and
+ * zfs_iter_* to create child handles on the fly.
+ */
+zfs_handle_t *
+make_dataset_handle(libzfs_handle_t *hdl, const char *path)
+{
+	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+	char *logstr;
+
+	if (zhp == NULL)
+		return (NULL);
+
+	zhp->zfs_hdl = hdl;
+
+	/*
+	 * Preserve history log string.
+	 * any changes performed here will be
+	 * logged as an internal event.
+	 */
+	logstr = zhp->zfs_hdl->libzfs_log_str;
+	zhp->zfs_hdl->libzfs_log_str = NULL;
+top:
+	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
+
+	if (get_stats(zhp) != 0) {
+		zhp->zfs_hdl->libzfs_log_str = logstr;
+		free(zhp);
+		return (NULL);
+	}
+
+	if (zhp->zfs_dmustats.dds_inconsistent) {
+		zfs_cmd_t zc = { 0 };
+
+		/*
+		 * If it is dds_inconsistent, then we've caught it in
+		 * the middle of a 'zfs receive' or 'zfs destroy', and
+		 * it is inconsistent from the ZPL's point of view, so
+		 * can't be mounted.  However, it could also be that we
+		 * have crashed in the middle of one of those
+		 * operations, in which case we need to get rid of the
+		 * inconsistent state.  We do that by either rolling
+		 * back to the previous snapshot (which will fail if
+		 * there is none), or destroying the filesystem.  Note
+		 * that if we are still in the middle of an active
+		 * 'receive' or 'destroy', then the rollback and destroy
+		 * will fail with EBUSY and we will drive on as usual.
+		 */
+
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
+			(void) zvol_remove_link(hdl, zhp->zfs_name);
+			zc.zc_objset_type = DMU_OST_ZVOL;
+		} else {
+			zc.zc_objset_type = DMU_OST_ZFS;
+		}
+
+		/*
+		 * If we can successfully destroy it, pretend that it
+		 * never existed.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
+			zhp->zfs_hdl->libzfs_log_str = logstr;
+			free(zhp);
+			errno = ENOENT;
+			return (NULL);
+		}
+		/* If we can successfully roll it back, reget the stats */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
+			goto top;
+	}
+
+	/*
+	 * We've managed to open the dataset and gather statistics.  Determine
+	 * the high-level type.
+	 */
+	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();
+
+	if (zhp->zfs_dmustats.dds_is_snapshot)
+		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();	/* we should never see any other types */
+
+	zhp->zfs_hdl->libzfs_log_str = logstr;
+	zhp->zpool_hdl = zpool_handle(zhp);
+	return (zhp);
+}
+
+/*
+ * Opens the given snapshot, filesystem, or volume.   The 'types'
+ * argument is a mask of acceptable types.  The function will print an
+ * appropriate error message and return NULL if it can't be opened.
+ */
+zfs_handle_t *
+zfs_open(libzfs_handle_t *hdl, const char *path, int types)
+{
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
+
+	/*
+	 * Validate the name before we even try to open it.
+	 */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET, B_FALSE)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid dataset name"));
+		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+		return (NULL);
+	}
+
+	/*
+	 * Try to get stats for the dataset, which will tell us if it exists.
+	 */
+	errno = 0;
+	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
+		(void) zfs_standard_error(hdl, errno, errbuf);
+		return (NULL);
+	}
+
+	if (!(types & zhp->zfs_type)) {
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+		zfs_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Release a ZFS handle.  Nothing to do but free the associated memory.
+ */
+void
+zfs_close(zfs_handle_t *zhp)
+{
+	if (zhp->zfs_mntopts)
+		free(zhp->zfs_mntopts);
+	nvlist_free(zhp->zfs_props);
+	nvlist_free(zhp->zfs_user_props);
+	free(zhp);
+}
+
+int
+zfs_spa_version(zfs_handle_t *zhp, int *spa_version)
+{
+	zpool_handle_t *zpool_handle = zhp->zpool_hdl;
+
+	if (zpool_handle == NULL)
+		return (-1);
+
+	*spa_version = zpool_get_prop_int(zpool_handle,
+	    ZPOOL_PROP_VERSION, NULL);
+	return (0);
+}
+
+/*
+ * The choice of reservation property depends on the SPA version.
+ */
+static int
+zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop)
+{
+	int spa_version;
+
+	if (zfs_spa_version(zhp, &spa_version) < 0)
+		return (-1);
+
+	if (spa_version >= SPA_VERSION_REFRESERVATION)
+		*resv_prop = ZFS_PROP_REFRESERVATION;
+	else
+		*resv_prop = ZFS_PROP_RESERVATION;
+
+	return (0);
+}
+
+/*
+ * Given an nvlist of properties to set, validates that they are correct, and
+ * parses any numeric properties (index, boolean, etc) if they are specified as
+ * strings.
+ */
+nvlist_t *
+zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
+    uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
+{
+	nvpair_t *elem;
+	uint64_t intval;
+	char *strval;
+	zfs_prop_t prop;
+	nvlist_t *ret;
+	int chosen_normal = -1;
+	int chosen_utf = -1;
+
+	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+		const char *propname = nvpair_name(elem);
+
+		/*
+		 * Make sure this property is valid and applies to this type.
+		 */
+		if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+			if (!zfs_prop_user(propname)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "invalid property '%s'"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			/*
+			 * If this is a user property, make sure it's a
+			 * string, and that it's less than ZAP_MAXNAMELEN.
+			 */
+			if (nvpair_type(elem) != DATA_TYPE_STRING) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be a string"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property name '%s' is too long"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			(void) nvpair_value_string(elem, &strval);
+			if (nvlist_add_string(ret, propname, strval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+			continue;
+		}
+
+		if (type == ZFS_TYPE_SNAPSHOT) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "this property can not be modified for snapshots"));
+			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
+			goto error;
+		}
+
+		if (!zfs_prop_valid_for_type(prop, type)) {
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "'%s' does not "
+			    "apply to datasets of this type"), propname);
+			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
+			goto error;
+		}
+
+		if (zfs_prop_readonly(prop) &&
+		    (!zfs_prop_setonce(prop) || zhp != NULL)) {
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
+			    propname);
+			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
+			goto error;
+		}
+
+		if (zprop_parse_value(hdl, elem, prop, type, ret,
+		    &strval, &intval, errbuf) != 0)
+			goto error;
+
+		/*
+		 * Perform some additional checks for specific properties.
+		 */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+		{
+			int version;
+
+			if (zhp == NULL)
+				break;
+			version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+			if (intval < version) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "Can not downgrade; already at version %u"),
+				    version);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+		}
+
+		case ZFS_PROP_RECORDSIZE:
+		case ZFS_PROP_VOLBLOCKSIZE:
+			/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
+			if (intval < SPA_MINBLOCKSIZE ||
+			    intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be power of 2 from %u "
+				    "to %uk"), propname,
+				    (uint_t)SPA_MINBLOCKSIZE,
+				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZFS_PROP_SHAREISCSI:
+			if (strcmp(strval, "off") != 0 &&
+			    strcmp(strval, "on") != 0 &&
+			    strcmp(strval, "type=disk") != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be 'on', 'off', or 'type=disk'"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			break;
+
+		case ZFS_PROP_MOUNTPOINT:
+		{
+			namecheck_err_t why;
+
+			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
+			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
+				break;
+
+			if (mountpoint_namecheck(strval, &why)) {
+				switch (why) {
+				case NAME_ERR_LEADING_SLASH:
+					zfs_error_aux(hdl,
+					    dgettext(TEXT_DOMAIN,
+					    "'%s' must be an absolute path, "
+					    "'none', or 'legacy'"), propname);
+					break;
+				case NAME_ERR_TOOLONG:
+					zfs_error_aux(hdl,
+					    dgettext(TEXT_DOMAIN,
+					    "component of '%s' is too long"),
+					    propname);
+					break;
+				default:
+					break;
+				}
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+		}
+
+			/*FALLTHRU*/
+
+		case ZFS_PROP_SHARESMB:
+		case ZFS_PROP_SHARENFS:
+			/*
+			 * For the mountpoint and sharenfs or sharesmb
+			 * properties, check if it can be set in a
+			 * global/non-global zone based on
+			 * the zoned property value:
+			 *
+			 *		global zone	    non-global zone
+			 * --------------------------------------------------
+			 * zoned=on	mountpoint (no)	    mountpoint (yes)
+			 *		sharenfs (no)	    sharenfs (no)
+			 *		sharesmb (no)	    sharesmb (no)
+			 *
+			 * zoned=off	mountpoint (yes)	N/A
+			 *		sharenfs (yes)
+			 *		sharesmb (yes)
+			 */
+			if (zoned) {
+				if (getzoneid() == GLOBAL_ZONEID) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set on "
+					    "dataset in a non-global zone"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_ZONED,
+					    errbuf);
+					goto error;
+				} else if (prop == ZFS_PROP_SHARENFS ||
+				    prop == ZFS_PROP_SHARESMB) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set in "
+					    "a non-global zone"), propname);
+					(void) zfs_error(hdl, EZFS_ZONED,
+					    errbuf);
+					goto error;
+				}
+			} else if (getzoneid() != GLOBAL_ZONEID) {
+				/*
+				 * If zoned property is 'off', this must be in
+				 * a globle zone. If not, something is wrong.
+				 */
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' cannot be set while dataset "
+				    "'zoned' property is set"), propname);
+				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
+				goto error;
+			}
+
+			/*
+			 * At this point, it is legitimate to set the
+			 * property. Now we want to make sure that the
+			 * property value is valid if it is sharenfs.
+			 */
+			if ((prop == ZFS_PROP_SHARENFS ||
+			    prop == ZFS_PROP_SHARESMB) &&
+			    strcmp(strval, "on") != 0 &&
+			    strcmp(strval, "off") != 0) {
+				zfs_share_proto_t proto;
+
+				if (prop == ZFS_PROP_SHARESMB)
+					proto = PROTO_SMB;
+				else
+					proto = PROTO_NFS;
+
+				/*
+				 * Must be an valid sharing protocol
+				 * option string so init the libshare
+				 * in order to enable the parser and
+				 * then parse the options. We use the
+				 * control API since we don't care about
+				 * the current configuration and don't
+				 * want the overhead of loading it
+				 * until we actually do something.
+				 */
+
+				if (zfs_init_libshare(hdl,
+				    SA_INIT_CONTROL_API) != SA_OK) {
+					/*
+					 * An error occurred so we can't do
+					 * anything
+					 */
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set: problem "
+					    "in share initialization"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+
+				if (zfs_parse_options(strval, proto) != SA_OK) {
+					/*
+					 * There was an error in parsing so
+					 * deal with it by issuing an error
+					 * message and leaving after
+					 * uninitializing the the libshare
+					 * interface.
+					 */
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set to invalid "
+					    "options"), propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					zfs_uninit_libshare(hdl);
+					goto error;
+				}
+				zfs_uninit_libshare(hdl);
+			}
+
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			chosen_utf = (int)intval;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			chosen_normal = (int)intval;
+			break;
+		default:
+			break;
+		}
+
+		/*
+		 * For changes to existing volumes, we have some additional
+		 * checks to enforce.
+		 */
+		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
+			uint64_t volsize = zfs_prop_get_int(zhp,
+			    ZFS_PROP_VOLSIZE);
+			uint64_t blocksize = zfs_prop_get_int(zhp,
+			    ZFS_PROP_VOLBLOCKSIZE);
+			char buf[64];
+
+			switch (prop) {
+			case ZFS_PROP_RESERVATION:
+			case ZFS_PROP_REFRESERVATION:
+				if (intval > volsize) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' is greater than current "
+					    "volume size"), propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+				break;
+
+			case ZFS_PROP_VOLSIZE:
+				if (intval % blocksize != 0) {
+					zfs_nicenum(blocksize, buf,
+					    sizeof (buf));
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' must be a multiple of "
+					    "volume block size (%s)"),
+					    propname, buf);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+
+				if (intval == 0) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be zero"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If normalization was chosen, but no UTF8 choice was made,
+	 * enforce rejection of non-UTF8 names.
+	 *
+	 * If normalization was chosen, but rejecting non-UTF8 names
+	 * was explicitly not chosen, it is an error.
+	 */
+	if (chosen_normal > 0 && chosen_utf < 0) {
+		if (nvlist_add_uint64(ret,
+		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) {
+			(void) no_memory(hdl);
+			goto error;
+		}
+	} else if (chosen_normal > 0 && chosen_utf == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' must be set 'on' if normalization chosen"),
+		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
+		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		goto error;
+	}
+
+	/*
+	 * If this is an existing volume, and someone is setting the volsize,
+	 * make sure that it matches the reservation, or add it if necessary.
+	 */
+	if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
+	    nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+	    &intval) == 0) {
+		uint64_t old_volsize = zfs_prop_get_int(zhp,
+		    ZFS_PROP_VOLSIZE);
+		uint64_t old_reservation;
+		uint64_t new_reservation;
+		zfs_prop_t resv_prop;
+
+		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
+			goto error;
+		old_reservation = zfs_prop_get_int(zhp, resv_prop);
+
+		if (old_volsize == old_reservation &&
+		    nvlist_lookup_uint64(ret, zfs_prop_to_name(resv_prop),
+		    &new_reservation) != 0) {
+			if (nvlist_add_uint64(ret,
+			    zfs_prop_to_name(resv_prop), intval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+		}
+	}
+	return (ret);
+
+error:
+	nvlist_free(ret);
+	return (NULL);
+}
+
+static int
+zfs_get_perm_who(const char *who, zfs_deleg_who_type_t *who_type,
+    uint64_t *ret_who)
+{
+	struct passwd *pwd;
+	struct group *grp;
+	uid_t id;
+
+	if (*who_type == ZFS_DELEG_EVERYONE || *who_type == ZFS_DELEG_CREATE ||
+	    *who_type == ZFS_DELEG_NAMED_SET) {
+		*ret_who = -1;
+		return (0);
+	}
+	if (who == NULL && !(*who_type == ZFS_DELEG_EVERYONE))
+		return (EZFS_BADWHO);
+
+	if (*who_type == ZFS_DELEG_WHO_UNKNOWN &&
+	    strcmp(who, "everyone") == 0) {
+		*ret_who = -1;
+		*who_type = ZFS_DELEG_EVERYONE;
+		return (0);
+	}
+
+	pwd = getpwnam(who);
+	grp = getgrnam(who);
+
+	if ((*who_type == ZFS_DELEG_USER) && pwd) {
+		*ret_who = pwd->pw_uid;
+	} else if ((*who_type == ZFS_DELEG_GROUP) && grp) {
+		*ret_who = grp->gr_gid;
+	} else if (pwd) {
+		*ret_who = pwd->pw_uid;
+		*who_type = ZFS_DELEG_USER;
+	} else if (grp) {
+		*ret_who = grp->gr_gid;
+		*who_type = ZFS_DELEG_GROUP;
+	} else {
+		char *end;
+
+		id = strtol(who, &end, 10);
+		if (errno != 0 || *end != '\0') {
+			return (EZFS_BADWHO);
+		} else {
+			*ret_who = id;
+			if (*who_type == ZFS_DELEG_WHO_UNKNOWN)
+				*who_type = ZFS_DELEG_USER;
+		}
+	}
+
+	return (0);
+}
+
+static void
+zfs_perms_add_to_nvlist(nvlist_t *who_nvp, char *name, nvlist_t *perms_nvp)
+{
+	if (perms_nvp != NULL) {
+		verify(nvlist_add_nvlist(who_nvp,
+		    name, perms_nvp) == 0);
+	} else {
+		verify(nvlist_add_boolean(who_nvp, name) == 0);
+	}
+}
+
+static void
+helper(zfs_deleg_who_type_t who_type, uint64_t whoid, char *whostr,
+    zfs_deleg_inherit_t inherit, nvlist_t *who_nvp, nvlist_t *perms_nvp,
+    nvlist_t *sets_nvp)
+{
+	boolean_t do_perms, do_sets;
+	char name[ZFS_MAX_DELEG_NAME];
+
+	do_perms = (nvlist_next_nvpair(perms_nvp, NULL) != NULL);
+	do_sets = (nvlist_next_nvpair(sets_nvp, NULL) != NULL);
+
+	if (!do_perms && !do_sets)
+		do_perms = do_sets = B_TRUE;
+
+	if (do_perms) {
+		zfs_deleg_whokey(name, who_type, inherit,
+		    (who_type == ZFS_DELEG_NAMED_SET) ?
+		    whostr : (void *)&whoid);
+		zfs_perms_add_to_nvlist(who_nvp, name, perms_nvp);
+	}
+	if (do_sets) {
+		zfs_deleg_whokey(name, toupper(who_type), inherit,
+		    (who_type == ZFS_DELEG_NAMED_SET) ?
+		    whostr : (void *)&whoid);
+		zfs_perms_add_to_nvlist(who_nvp, name, sets_nvp);
+	}
+}
+
+static void
+zfs_perms_add_who_nvlist(nvlist_t *who_nvp, uint64_t whoid, void *whostr,
+    nvlist_t *perms_nvp, nvlist_t *sets_nvp,
+    zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit)
+{
+	if (who_type == ZFS_DELEG_NAMED_SET || who_type == ZFS_DELEG_CREATE) {
+		helper(who_type, whoid, whostr, 0,
+		    who_nvp, perms_nvp, sets_nvp);
+	} else {
+		if (inherit & ZFS_DELEG_PERM_LOCAL) {
+			helper(who_type, whoid, whostr, ZFS_DELEG_LOCAL,
+			    who_nvp, perms_nvp, sets_nvp);
+		}
+		if (inherit & ZFS_DELEG_PERM_DESCENDENT) {
+			helper(who_type, whoid, whostr, ZFS_DELEG_DESCENDENT,
+			    who_nvp, perms_nvp, sets_nvp);
+		}
+	}
+}
+
+/*
+ * Construct nvlist to pass down to kernel for setting/removing permissions.
+ *
+ * The nvlist is constructed as a series of nvpairs with an optional embedded
+ * nvlist of permissions to remove or set.  The topmost nvpairs are the actual
+ * base attribute named stored in the dsl.
+ * Arguments:
+ *
+ * whostr:   is a comma separated list of users, groups, or a single set name.
+ *           whostr may be null for everyone or create perms.
+ * who_type: is the type of entry in whostr.  Typically this will be
+ *           ZFS_DELEG_WHO_UNKNOWN.
+ * perms:    common separated list of permissions.  May be null if user
+ *           is requested to remove permissions by who.
+ * inherit:  Specifies the inheritance of the permissions.  Will be either
+ *           ZFS_DELEG_PERM_LOCAL and/or  ZFS_DELEG_PERM_DESCENDENT.
+ * nvp       The constructed nvlist to pass to zfs_perm_set().
+ *           The output nvp will look something like this.
+ *              ul$1234 -> {create ; destroy }
+ *              Ul$1234 -> { @myset }
+ *              s-$@myset - { snapshot; checksum; compression }
+ */
+int
+zfs_build_perms(zfs_handle_t *zhp, char *whostr, char *perms,
+    zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit, nvlist_t **nvp)
+{
+	nvlist_t *who_nvp;
+	nvlist_t *perms_nvp = NULL;
+	nvlist_t *sets_nvp = NULL;
+	char errbuf[1024];
+	char *who_tok = NULL, *perm;
+	int error;
+
+	*nvp = NULL;
+
+	if (perms) {
+		if ((error = nvlist_alloc(&perms_nvp,
+		    NV_UNIQUE_NAME, 0)) != 0) {
+			return (1);
+		}
+		if ((error = nvlist_alloc(&sets_nvp,
+		    NV_UNIQUE_NAME, 0)) != 0) {
+			nvlist_free(perms_nvp);
+			return (1);
+		}
+	}
+
+	if ((error = nvlist_alloc(&who_nvp, NV_UNIQUE_NAME, 0)) != 0) {
+		if (perms_nvp)
+			nvlist_free(perms_nvp);
+		if (sets_nvp)
+			nvlist_free(sets_nvp);
+		return (1);
+	}
+
+	if (who_type == ZFS_DELEG_NAMED_SET) {
+		namecheck_err_t why;
+		char what;
+
+		if ((error = permset_namecheck(whostr, &why, &what)) != 0) {
+			nvlist_free(who_nvp);
+			if (perms_nvp)
+				nvlist_free(perms_nvp);
+			if (sets_nvp)
+				nvlist_free(sets_nvp);
+
+			switch (why) {
+			case NAME_ERR_NO_AT:
+				zfs_error_aux(zhp->zfs_hdl,
+				    dgettext(TEXT_DOMAIN,
+				    "set definition must begin with an '@' "
+				    "character"));
+			default:
+				break;
+			}
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_BADPERMSET, whostr));
+		}
+	}
+
+	/*
+	 * Build up nvlist(s) of permissions.  Two nvlists are maintained.
+	 * The first nvlist perms_nvp will have normal permissions and the
+	 * other sets_nvp will have only permssion set names in it.
+	 */
+	for (perm = strtok(perms, ","); perm; perm = strtok(NULL, ",")) {
+		const char *perm_canonical = zfs_deleg_canonicalize_perm(perm);
+
+		if (perm_canonical) {
+			verify(nvlist_add_boolean(perms_nvp,
+			    perm_canonical) == 0);
+		} else if (perm[0] == '@') {
+			verify(nvlist_add_boolean(sets_nvp, perm) == 0);
+		} else {
+			nvlist_free(who_nvp);
+			nvlist_free(perms_nvp);
+			nvlist_free(sets_nvp);
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADPERM, perm));
+		}
+	}
+
+	if (whostr && who_type != ZFS_DELEG_CREATE) {
+		who_tok = strtok(whostr, ",");
+		if (who_tok == NULL) {
+			nvlist_free(who_nvp);
+			if (perms_nvp)
+				nvlist_free(perms_nvp);
+			if (sets_nvp)
+				nvlist_free(sets_nvp);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "Who string is NULL: %s"),
+			    whostr);
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
+		}
+	}
+
+	/*
+	 * Now create the nvlist(s)
+	 */
+	do {
+		uint64_t who_id;
+
+		error = zfs_get_perm_who(who_tok, &who_type,
+		    &who_id);
+		if (error) {
+			nvlist_free(who_nvp);
+			if (perms_nvp)
+				nvlist_free(perms_nvp);
+			if (sets_nvp)
+				nvlist_free(sets_nvp);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "Unable to determine uid/gid for "
+			    "%s "), who_tok);
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
+		}
+
+		/*
+		 * add entries for both local and descendent when required
+		 */
+		zfs_perms_add_who_nvlist(who_nvp, who_id, who_tok,
+		    perms_nvp, sets_nvp, who_type, inherit);
+
+	} while ((who_tok = strtok(NULL, ",")));
+	*nvp = who_nvp;
+	return (0);
+}
+
+static int
+zfs_perm_set_common(zfs_handle_t *zhp, nvlist_t *nvp, boolean_t unset)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "Cannot update 'allows' for '%s'"),
+	    zhp->zfs_name);
+
+	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, nvp))
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	zc.zc_perm_action = unset;
+
+	error = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SET_FSACL, &zc);
+	if (error && errno == ENOTSUP) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    gettext("Pool must be upgraded to use 'allow/unallow'"));
+		zcmd_free_nvlists(&zc);
+		return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION, errbuf));
+	} else if (error) {
+		return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf));
+	}
+	zcmd_free_nvlists(&zc);
+
+	return (error);
+}
+
+int
+zfs_perm_set(zfs_handle_t *zhp, nvlist_t *nvp)
+{
+	return (zfs_perm_set_common(zhp, nvp, B_FALSE));
+}
+
+int
+zfs_perm_remove(zfs_handle_t *zhp, nvlist_t *perms)
+{
+	return (zfs_perm_set_common(zhp, perms, B_TRUE));
+}
+
+static int
+perm_compare(const void *arg1, const void *arg2)
+{
+	const zfs_perm_node_t *node1 = arg1;
+	const zfs_perm_node_t *node2 = arg2;
+	int ret;
+
+	ret = strcmp(node1->z_pname, node2->z_pname);
+
+	if (ret > 0)
+		return (1);
+	if (ret < 0)
+		return (-1);
+	else
+		return (0);
+}
+
+static void
+zfs_destroy_perm_tree(avl_tree_t *tree)
+{
+	zfs_perm_node_t *permnode;
+	void *cookie = NULL;
+
+	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL)
+		free(permnode);
+	avl_destroy(tree);
+}
+
+static void
+zfs_destroy_tree(avl_tree_t *tree)
+{
+	zfs_allow_node_t *allownode;
+	void *cookie = NULL;
+
+	while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
+		zfs_destroy_perm_tree(&allownode->z_localdescend);
+		zfs_destroy_perm_tree(&allownode->z_local);
+		zfs_destroy_perm_tree(&allownode->z_descend);
+		free(allownode);
+	}
+	avl_destroy(tree);
+}
+
+void
+zfs_free_allows(zfs_allow_t *allow)
+{
+	zfs_allow_t *allownext;
+	zfs_allow_t *freeallow;
+
+	allownext = allow;
+	while (allownext) {
+		zfs_destroy_tree(&allownext->z_sets);
+		zfs_destroy_tree(&allownext->z_crperms);
+		zfs_destroy_tree(&allownext->z_user);
+		zfs_destroy_tree(&allownext->z_group);
+		zfs_destroy_tree(&allownext->z_everyone);
+		freeallow = allownext;
+		allownext = allownext->z_next;
+		free(freeallow);
+	}
+}
+
+static zfs_allow_t *
+zfs_alloc_perm_tree(zfs_handle_t *zhp, zfs_allow_t *prev, char *setpoint)
+{
+	zfs_allow_t *ptree;
+
+	if ((ptree = zfs_alloc(zhp->zfs_hdl,
+	    sizeof (zfs_allow_t))) == NULL) {
+		return (NULL);
+	}
+
+	(void) strlcpy(ptree->z_setpoint, setpoint, sizeof (ptree->z_setpoint));
+	avl_create(&ptree->z_sets,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_crperms,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_user,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_group,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_everyone,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+
+	if (prev)
+		prev->z_next = ptree;
+	ptree->z_next = NULL;
+	return (ptree);
+}
+
+/*
+ * Add permissions to the appropriate AVL permission tree.
+ * The appropriate tree may not be the requested tree.
+ * For example if ld indicates a local permission, but
+ * same permission also exists as a descendent permission
+ * then the permission will be removed from the descendent
+ * tree and add the the local+descendent tree.
+ */
+static int
+zfs_coalesce_perm(zfs_handle_t *zhp, zfs_allow_node_t *allownode,
+    char *perm, char ld)
+{
+	zfs_perm_node_t pnode, *permnode, *permnode2;
+	zfs_perm_node_t *newnode;
+	avl_index_t where, where2;
+	avl_tree_t *tree, *altree;
+
+	(void) strlcpy(pnode.z_pname, perm, sizeof (pnode.z_pname));
+
+	if (ld == ZFS_DELEG_NA) {
+		tree =  &allownode->z_localdescend;
+		altree = &allownode->z_descend;
+	} else if (ld == ZFS_DELEG_LOCAL) {
+		tree = &allownode->z_local;
+		altree = &allownode->z_descend;
+	} else {
+		tree = &allownode->z_descend;
+		altree = &allownode->z_local;
+	}
+	permnode = avl_find(tree, &pnode, &where);
+	permnode2 = avl_find(altree, &pnode, &where2);
+
+	if (permnode2) {
+		avl_remove(altree, permnode2);
+		free(permnode2);
+		if (permnode == NULL) {
+			tree =  &allownode->z_localdescend;
+		}
+	}
+
+	/*
+	 * Now insert new permission in either requested location
+	 * local/descendent or into ld when perm will exist in both.
+	 */
+	if (permnode == NULL) {
+		if ((newnode = zfs_alloc(zhp->zfs_hdl,
+		    sizeof (zfs_perm_node_t))) == NULL) {
+			return (-1);
+		}
+		*newnode = pnode;
+		avl_add(tree, newnode);
+	}
+	return (0);
+}
+
+/*
+ * Uggh, this is going to be a bit complicated.
+ * we have an nvlist coming out of the kernel that
+ * will indicate where the permission is set and then
+ * it will contain allow of the various "who's", and what
+ * their permissions are.  To further complicate this
+ * we will then have to coalesce the local,descendent
+ * and local+descendent permissions where appropriate.
+ * The kernel only knows about a permission as being local
+ * or descendent, but not both.
+ *
+ * In order to make this easier for zfs_main to deal with
+ * a series of AVL trees will be used to maintain
+ * all of this, primarily for sorting purposes as well
+ * as the ability to quickly locate a specific entry.
+ *
+ * What we end up with are tree's for sets, create perms,
+ * user, groups and everyone.  With each of those trees
+ * we have subtrees for local, descendent and local+descendent
+ * permissions.
+ */
+int
+zfs_perm_get(zfs_handle_t *zhp, zfs_allow_t **zfs_perms)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	nvlist_t *nvlist;
+	nvlist_t *permnv, *sourcenv;
+	nvpair_t *who_pair, *source_pair;
+	nvpair_t *perm_pair;
+	char errbuf[1024];
+	zfs_allow_t *zallowp, *newallowp;
+	char  ld;
+	char *nvpname;
+	uid_t	uid;
+	gid_t	gid;
+	avl_tree_t *tree;
+	avl_index_t where;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else if (errno == ENOTSUP) {
+			zcmd_free_nvlists(&zc);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    gettext("Pool must be upgraded to use 'allow'"));
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_BADVERSION, errbuf));
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &nvlist) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	source_pair = nvlist_next_nvpair(nvlist, NULL);
+
+	if (source_pair == NULL) {
+		*zfs_perms = NULL;
+		return (0);
+	}
+
+	*zfs_perms = zfs_alloc_perm_tree(zhp, NULL, nvpair_name(source_pair));
+	if (*zfs_perms == NULL) {
+		return (0);
+	}
+
+	zallowp = *zfs_perms;
+
+	for (;;) {
+		struct passwd *pwd;
+		struct group *grp;
+		zfs_allow_node_t *allownode;
+		zfs_allow_node_t  findallownode;
+		zfs_allow_node_t *newallownode;
+
+		(void) strlcpy(zallowp->z_setpoint,
+		    nvpair_name(source_pair),
+		    sizeof (zallowp->z_setpoint));
+
+		if ((error = nvpair_value_nvlist(source_pair, &sourcenv)) != 0)
+			goto abort;
+
+		/*
+		 * Make sure nvlist is composed correctly
+		 */
+		if (zfs_deleg_verify_nvlist(sourcenv)) {
+			goto abort;
+		}
+
+		who_pair = nvlist_next_nvpair(sourcenv, NULL);
+		if (who_pair == NULL) {
+			goto abort;
+		}
+
+		do {
+			error = nvpair_value_nvlist(who_pair, &permnv);
+			if (error) {
+				goto abort;
+			}
+
+			/*
+			 * First build up the key to use
+			 * for looking up in the various
+			 * who trees.
+			 */
+			ld = nvpair_name(who_pair)[1];
+			nvpname = nvpair_name(who_pair);
+			switch (nvpair_name(who_pair)[0]) {
+			case ZFS_DELEG_USER:
+			case ZFS_DELEG_USER_SETS:
+				tree = &zallowp->z_user;
+				uid = atol(&nvpname[3]);
+				pwd = getpwuid(uid);
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "user %s",
+				    (pwd) ? pwd->pw_name :
+				    &nvpair_name(who_pair)[3]);
+				break;
+			case ZFS_DELEG_GROUP:
+			case ZFS_DELEG_GROUP_SETS:
+				tree = &zallowp->z_group;
+				gid = atol(&nvpname[3]);
+				grp = getgrgid(gid);
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "group %s",
+				    (grp) ? grp->gr_name :
+				    &nvpair_name(who_pair)[3]);
+				break;
+			case ZFS_DELEG_CREATE:
+			case ZFS_DELEG_CREATE_SETS:
+				tree = &zallowp->z_crperms;
+				(void) strlcpy(findallownode.z_key, "",
+				    sizeof (findallownode.z_key));
+				break;
+			case ZFS_DELEG_EVERYONE:
+			case ZFS_DELEG_EVERYONE_SETS:
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "everyone");
+				tree = &zallowp->z_everyone;
+				break;
+			case ZFS_DELEG_NAMED_SET:
+			case ZFS_DELEG_NAMED_SET_SETS:
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "%s",
+				    &nvpair_name(who_pair)[3]);
+				tree = &zallowp->z_sets;
+				break;
+			default:
+				break;
+			}
+
+			/*
+			 * Place who in tree
+			 */
+			allownode = avl_find(tree, &findallownode, &where);
+			if (allownode == NULL) {
+				if ((newallownode = zfs_alloc(zhp->zfs_hdl,
+				    sizeof (zfs_allow_node_t))) == NULL) {
+					goto abort;
+				}
+				avl_create(&newallownode->z_localdescend,
+				    perm_compare,
+				    sizeof (zfs_perm_node_t),
+				    offsetof(zfs_perm_node_t, z_node));
+				avl_create(&newallownode->z_local,
+				    perm_compare,
+				    sizeof (zfs_perm_node_t),
+				    offsetof(zfs_perm_node_t, z_node));
+				avl_create(&newallownode->z_descend,
+				    perm_compare,
+				    sizeof (zfs_perm_node_t),
+				    offsetof(zfs_perm_node_t, z_node));
+				(void) strlcpy(newallownode->z_key,
+				    findallownode.z_key,
+				    sizeof (findallownode.z_key));
+				avl_insert(tree, newallownode, where);
+				allownode = newallownode;
+			}
+
+			/*
+			 * Now iterate over the permissions and
+			 * place them in the appropriate local,
+			 * descendent or local+descendent tree.
+			 *
+			 * The permissions are added to the tree
+			 * via zfs_coalesce_perm().
+			 */
+			perm_pair = nvlist_next_nvpair(permnv, NULL);
+			if (perm_pair == NULL)
+				goto abort;
+			do {
+				if (zfs_coalesce_perm(zhp, allownode,
+				    nvpair_name(perm_pair), ld) != 0)
+					goto abort;
+			} while ((perm_pair = nvlist_next_nvpair(permnv,
+			          perm_pair)));
+		} while ((who_pair = nvlist_next_nvpair(sourcenv, who_pair)));
+
+		source_pair = nvlist_next_nvpair(nvlist, source_pair);
+		if (source_pair == NULL)
+			break;
+
+		/*
+		 * allocate another node from the link list of
+		 * zfs_allow_t structures
+		 */
+		newallowp = zfs_alloc_perm_tree(zhp, zallowp,
+		    nvpair_name(source_pair));
+		if (newallowp == NULL) {
+			goto abort;
+		}
+		zallowp = newallowp;
+	}
+	nvlist_free(nvlist);
+	return (0);
+abort:
+	zfs_free_allows(*zfs_perms);
+	nvlist_free(nvlist);
+	return (-1);
+}
+
+static char *
+zfs_deleg_perm_note(zfs_deleg_note_t note)
+{
+	/*
+	 * Don't put newlines on end of lines
+	 */
+	switch (note) {
+	case ZFS_DELEG_NOTE_CREATE:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_DESTROY:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_SNAPSHOT:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_ROLLBACK:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_CLONE:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'create' "
+		    "ability and 'mount'\n"
+		    "\t\t\t\tability in the origin file system"));
+	case ZFS_DELEG_NOTE_PROMOTE:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'\n"
+		    "\t\t\t\tand 'promote' ability in the origin file system"));
+	case ZFS_DELEG_NOTE_RENAME:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount' "
+		    "and 'create' \n\t\t\t\tability in the new parent"));
+	case ZFS_DELEG_NOTE_RECEIVE:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'"
+		    " and 'create' ability"));
+	case ZFS_DELEG_NOTE_USERPROP:
+		return (dgettext(TEXT_DOMAIN,
+		    "Allows changing any user property"));
+	case ZFS_DELEG_NOTE_ALLOW:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the permission that is being\n"
+		    "\t\t\t\tallowed"));
+	case ZFS_DELEG_NOTE_MOUNT:
+		return (dgettext(TEXT_DOMAIN,
+		    "Allows mount/umount of ZFS datasets"));
+	case ZFS_DELEG_NOTE_SHARE:
+		return (dgettext(TEXT_DOMAIN,
+		    "Allows sharing file systems over NFS or SMB\n"
+		    "\t\t\t\tprotocols"));
+	case ZFS_DELEG_NOTE_NONE:
+	default:
+		return (dgettext(TEXT_DOMAIN, ""));
+	}
+}
+
+typedef enum {
+	ZFS_DELEG_SUBCOMMAND,
+	ZFS_DELEG_PROP,
+	ZFS_DELEG_OTHER
+} zfs_deleg_perm_type_t;
+
+/*
+ * is the permission a subcommand or other?
+ */
+zfs_deleg_perm_type_t
+zfs_deleg_perm_type(const char *perm)
+{
+	if (strcmp(perm, "userprop") == 0)
+		return (ZFS_DELEG_OTHER);
+	else
+		return (ZFS_DELEG_SUBCOMMAND);
+}
+
+static char *
+zfs_deleg_perm_type_str(zfs_deleg_perm_type_t type)
+{
+	switch (type) {
+	case ZFS_DELEG_SUBCOMMAND:
+		return (dgettext(TEXT_DOMAIN, "subcommand"));
+	case ZFS_DELEG_PROP:
+		return (dgettext(TEXT_DOMAIN, "property"));
+	case ZFS_DELEG_OTHER:
+		return (dgettext(TEXT_DOMAIN, "other"));
+	default:
+		break;
+	}
+	return ("");
+}
+
+/*ARGSUSED*/
+static int
+zfs_deleg_prop_cb(int prop, void *cb)
+{
+	if (zfs_prop_delegatable(prop))
+		(void) fprintf(stderr, "%-15s %-15s\n", zfs_prop_to_name(prop),
+		    zfs_deleg_perm_type_str(ZFS_DELEG_PROP));
+
+	return (ZPROP_CONT);
+}
+
+void
+zfs_deleg_permissions(void)
+{
+	int i;
+
+	(void) fprintf(stderr, "\n%-15s %-15s\t%s\n\n", "NAME",
+	    "TYPE", "NOTES");
+
+	/*
+	 * First print out the subcommands
+	 */
+	for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
+		(void) fprintf(stderr, "%-15s %-15s\t%s\n",
+		    zfs_deleg_perm_tab[i].z_perm,
+		    zfs_deleg_perm_type_str(
+		    zfs_deleg_perm_type(zfs_deleg_perm_tab[i].z_perm)),
+		    zfs_deleg_perm_note(zfs_deleg_perm_tab[i].z_note));
+	}
+
+	(void) zprop_iter(zfs_deleg_prop_cb, NULL, B_FALSE, B_TRUE,
+	    ZFS_TYPE_DATASET|ZFS_TYPE_VOLUME);
+}
+
+/*
+ * Given a property name and value, set the property for the given dataset.
+ */
+int
+zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = -1;
+	prop_changelist_t *cl = NULL;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *nvl = NULL, *realprops;
+	zfs_prop_t prop;
+	boolean_t do_prefix;
+	uint64_t idx;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
+	    zhp->zfs_name);
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
+	    nvlist_add_string(nvl, propname, propval) != 0) {
+		(void) no_memory(hdl);
+		goto error;
+	}
+
+	if ((realprops = zfs_valid_proplist(hdl, zhp->zfs_type, nvl,
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
+		goto error;
+
+	nvlist_free(nvl);
+	nvl = realprops;
+
+	prop = zfs_name_to_prop(propname);
+
+	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
+		goto error;
+
+	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
+		goto error;
+	}
+
+	/*
+	 * If the dataset's canmount property is being set to noauto,
+	 * then we want to prevent unmounting & remounting it.
+	 */
+	do_prefix = !((prop == ZFS_PROP_CANMOUNT) &&
+	    (zprop_string_to_index(prop, propval, &idx,
+	    ZFS_TYPE_DATASET) == 0) && (idx == ZFS_CANMOUNT_NOAUTO));
+
+	if (do_prefix && (ret = changelist_prefix(cl)) != 0)
+		goto error;
+
+	/*
+	 * Execute the corresponding ioctl() to set this property.
+	 */
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
+		goto error;
+
+	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
+	if (ret != 0) {
+		switch (errno) {
+
+		case ENOSPC:
+			/*
+			 * For quotas and reservations, ENOSPC indicates
+			 * something different; setting a quota or reservation
+			 * doesn't use any disk space.
+			 */
+			switch (prop) {
+			case ZFS_PROP_QUOTA:
+			case ZFS_PROP_REFQUOTA:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is less than current used or "
+				    "reserved space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+				break;
+
+			case ZFS_PROP_RESERVATION:
+			case ZFS_PROP_REFRESERVATION:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is greater than available space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+				break;
+
+			default:
+				(void) zfs_standard_error(hdl, errno, errbuf);
+				break;
+			}
+			break;
+
+		case EBUSY:
+			if (prop == ZFS_PROP_VOLBLOCKSIZE)
+				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
+			else
+				(void) zfs_standard_error(hdl, EBUSY, errbuf);
+			break;
+
+		case EROFS:
+			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool and or dataset must be upgraded to set this "
+			    "property or value"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+
+		case ERANGE:
+			if (prop == ZFS_PROP_COMPRESSION) {
+				(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property setting is not allowed on "
+				    "bootable datasets"));
+				(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+			} else {
+				(void) zfs_standard_error(hdl, errno, errbuf);
+			}
+			break;
+
+		case EOVERFLOW:
+			/*
+			 * This platform can't address a volume this big.
+			 */
+#ifdef _ILP32
+			if (prop == ZFS_PROP_VOLSIZE) {
+				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
+				break;
+			}
+#endif
+			/* FALLTHROUGH */
+		default:
+			(void) zfs_standard_error(hdl, errno, errbuf);
+		}
+	} else {
+		if (do_prefix)
+			ret = changelist_postfix(cl);
+
+		/*
+		 * Refresh the statistics so the new property value
+		 * is reflected.
+		 */
+		if (ret == 0)
+			(void) get_stats(zhp);
+	}
+
+error:
+	nvlist_free(nvl);
+	zcmd_free_nvlists(&zc);
+	if (cl)
+		changelist_free(cl);
+	return (ret);
+}
+
+/*
+ * Given a property, inherit the value from the parent dataset.
+ */
+int
+zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	prop_changelist_t *cl;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+	zfs_prop_t prop;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
+
+	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+		/*
+		 * For user properties, the amount of work we have to do is very
+		 * small, so just do it here.
+		 */
+		if (!zfs_prop_user(propname)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
+
+		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
+			return (zfs_standard_error(hdl, errno, errbuf));
+
+		return (0);
+	}
+
+	/*
+	 * Verify that this property is inheritable.
+	 */
+	if (zfs_prop_readonly(prop))
+		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
+
+	if (!zfs_prop_inheritable(prop))
+		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
+
+	/*
+	 * Check to see if the value applies to this type
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
+
+	/*
+	 * Normalize the name, to get rid of shorthand abbrevations.
+	 */
+	propname = zfs_prop_to_name(prop);
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
+
+	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
+	}
+
+	/*
+	 * Determine datasets which will be affected by this change, if any.
+	 */
+	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
+		return (-1);
+
+	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
+		goto error;
+	}
+
+	if ((ret = changelist_prefix(cl)) != 0)
+		goto error;
+
+	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) {
+		return (zfs_standard_error(hdl, errno, errbuf));
+	} else {
+
+		if ((ret = changelist_postfix(cl)) != 0)
+			goto error;
+
+		/*
+		 * Refresh the statistics so the new property is reflected.
+		 */
+		(void) get_stats(zhp);
+	}
+
+error:
+	changelist_free(cl);
+	return (ret);
+}
+
+/*
+ * True DSL properties are stored in an nvlist.  The following two functions
+ * extract them appropriately.
+ */
+static uint64_t
+getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
+{
+	nvlist_t *nv;
+	uint64_t value;
+
+	*source = NULL;
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
+		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
+	} else {
+		value = zfs_prop_default_numeric(prop);
+		*source = "";
+	}
+
+	return (value);
+}
+
+static char *
+getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
+{
+	nvlist_t *nv;
+	char *value;
+
+	*source = NULL;
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
+		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
+	} else {
+		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
+			value = "";
+		*source = "";
+	}
+
+	return (value);
+}
+
+/*
+ * Internal function for getting a numeric property.  Both zfs_prop_get() and
+ * zfs_prop_get_int() are built using this interface.
+ *
+ * Certain properties can be overridden using 'mount -o'.  In this case, scan
+ * the contents of the /etc/mnttab entry, searching for the appropriate options.
+ * If they differ from the on-disk values, report the current values and mark
+ * the source "temporary".
+ */
+static int
+get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
+    char **source, uint64_t *val)
+{
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *zplprops = NULL;
+	struct mnttab mnt;
+	char *mntopt_on = NULL;
+	char *mntopt_off = NULL;
+
+	*source = NULL;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+		mntopt_on = MNTOPT_ATIME;
+		mntopt_off = MNTOPT_NOATIME;
+		break;
+
+	case ZFS_PROP_DEVICES:
+		mntopt_on = MNTOPT_DEVICES;
+		mntopt_off = MNTOPT_NODEVICES;
+		break;
+
+	case ZFS_PROP_EXEC:
+		mntopt_on = MNTOPT_EXEC;
+		mntopt_off = MNTOPT_NOEXEC;
+		break;
+
+	case ZFS_PROP_READONLY:
+		mntopt_on = MNTOPT_RO;
+		mntopt_off = MNTOPT_RW;
+		break;
+
+	case ZFS_PROP_SETUID:
+		mntopt_on = MNTOPT_SETUID;
+		mntopt_off = MNTOPT_NOSETUID;
+		break;
+
+	case ZFS_PROP_XATTR:
+		mntopt_on = MNTOPT_XATTR;
+		mntopt_off = MNTOPT_NOXATTR;
+		break;
+
+	case ZFS_PROP_NBMAND:
+		mntopt_on = MNTOPT_NBMAND;
+		mntopt_off = MNTOPT_NONBMAND;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Because looking up the mount options is potentially expensive
+	 * (iterating over all of /etc/mnttab), we defer its calculation until
+	 * we're looking up a property which requires its presence.
+	 */
+	if (!zhp->zfs_mntcheck &&
+	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
+		struct mnttab entry, search = { 0 };
+		FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
+
+		search.mnt_special = (char *)zhp->zfs_name;
+		search.mnt_fstype = MNTTYPE_ZFS;
+		rewind(mnttab);
+
+		if (getmntany(mnttab, &entry, &search) == 0) {
+			zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
+			    entry.mnt_mntopts);
+			if (zhp->zfs_mntopts == NULL)
+				return (-1);
+		}
+
+		zhp->zfs_mntcheck = B_TRUE;
+	}
+
+	if (zhp->zfs_mntopts == NULL)
+		mnt.mnt_mntopts = "";
+	else
+		mnt.mnt_mntopts = zhp->zfs_mntopts;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+	case ZFS_PROP_DEVICES:
+	case ZFS_PROP_EXEC:
+	case ZFS_PROP_READONLY:
+	case ZFS_PROP_SETUID:
+	case ZFS_PROP_XATTR:
+	case ZFS_PROP_NBMAND:
+		*val = getprop_uint64(zhp, prop, source);
+
+		if (hasmntopt(&mnt, mntopt_on) && !*val) {
+			*val = B_TRUE;
+			if (src)
+				*src = ZPROP_SRC_TEMPORARY;
+		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
+			*val = B_FALSE;
+			if (src)
+				*src = ZPROP_SRC_TEMPORARY;
+		}
+		break;
+
+	case ZFS_PROP_CANMOUNT:
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val != ZFS_CANMOUNT_ON)
+			*source = zhp->zfs_name;
+		else
+			*source = "";	/* default */
+		break;
+
+	case ZFS_PROP_QUOTA:
+	case ZFS_PROP_REFQUOTA:
+	case ZFS_PROP_RESERVATION:
+	case ZFS_PROP_REFRESERVATION:
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val == 0)
+			*source = "";	/* default */
+		else
+			*source = zhp->zfs_name;
+		break;
+
+	case ZFS_PROP_MOUNTED:
+		*val = (zhp->zfs_mntopts != NULL);
+		break;
+
+	case ZFS_PROP_NUMCLONES:
+		*val = zhp->zfs_dmustats.dds_num_clones;
+		break;
+
+	case ZFS_PROP_VERSION:
+	case ZFS_PROP_NORMALIZE:
+	case ZFS_PROP_UTF8ONLY:
+	case ZFS_PROP_CASE:
+		if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) ||
+		    zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+			return (-1);
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "unable to get %s property"),
+			    zfs_prop_to_name(prop));
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION,
+			    dgettext(TEXT_DOMAIN, "internal error")));
+		}
+		if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 ||
+		    nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop),
+		    val) != 0) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "unable to get %s property"),
+			    zfs_prop_to_name(prop));
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOMEM,
+			    dgettext(TEXT_DOMAIN, "internal error")));
+		}
+		if (zplprops)
+			nvlist_free(zplprops);
+		zcmd_free_nvlists(&zc);
+		break;
+
+	default:
+		switch (zfs_prop_get_type(prop)) {
+		case PROP_TYPE_NUMBER:
+		case PROP_TYPE_INDEX:
+			*val = getprop_uint64(zhp, prop, source);
+			/*
+			 * If we tried to use a defalut value for a
+			 * readonly property, it means that it was not
+			 * present; return an error.
+			 */
+			if (zfs_prop_readonly(prop) &&
+			    *source && (*source)[0] == '\0') {
+				return (-1);
+			}
+			break;
+
+		case PROP_TYPE_STRING:
+		default:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "cannot get non-numeric property"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "internal error")));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Calculate the source type, given the raw source string.
+ */
+static void
+get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
+    char *statbuf, size_t statlen)
+{
+	if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY)
+		return;
+
+	if (source == NULL) {
+		*srctype = ZPROP_SRC_NONE;
+	} else if (source[0] == '\0') {
+		*srctype = ZPROP_SRC_DEFAULT;
+	} else {
+		if (strcmp(source, zhp->zfs_name) == 0) {
+			*srctype = ZPROP_SRC_LOCAL;
+		} else {
+			(void) strlcpy(statbuf, source, statlen);
+			*srctype = ZPROP_SRC_INHERITED;
+		}
+	}
+
+}
+
+/*
+ * Retrieve a property from the given object.  If 'literal' is specified, then
+ * numbers are left as exact values.  Otherwise, numbers are converted to a
+ * human-readable form.
+ *
+ * Returns 0 on success, or -1 on error.
+ */
+int
+zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
+    zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
+{
+	char *source = NULL;
+	uint64_t val;
+	char *str;
+	const char *strval;
+
+	/*
+	 * Check to see if this property applies to our object
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (-1);
+
+	if (src)
+		*src = ZPROP_SRC_NONE;
+
+	switch (prop) {
+	case ZFS_PROP_CREATION:
+		/*
+		 * 'creation' is a time_t stored in the statistics.  We convert
+		 * this into a string unless 'literal' is specified.
+		 */
+		{
+			val = getprop_uint64(zhp, prop, &source);
+			time_t time = (time_t)val;
+			struct tm t;
+
+			if (literal ||
+			    localtime_r(&time, &t) == NULL ||
+			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
+			    &t) == 0)
+				(void) snprintf(propbuf, proplen, "%llu", (u_longlong_t) val);
+		}
+		break;
+
+	case ZFS_PROP_MOUNTPOINT:
+		/*
+		 * Getting the precise mountpoint can be tricky.
+		 *
+		 *  - for 'none' or 'legacy', return those values.
+		 *  - for inherited mountpoints, we want to take everything
+		 *    after our ancestor and append it to the inherited value.
+		 *
+		 * If the pool has an alternate root, we want to prepend that
+		 * root to any values we return.
+		 */
+
+		str = getprop_string(zhp, prop, &source);
+
+		if (str[0] == '/') {
+			char buf[MAXPATHLEN];
+			char *root = buf;
+			const char *relpath = zhp->zfs_name + strlen(source);
+
+			if (relpath[0] == '/')
+				relpath++;
+
+			if ((zpool_get_prop(zhp->zpool_hdl,
+			    ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) ||
+			    (strcmp(root, "-") == 0))
+				root[0] = '\0';
+			/*
+			 * Special case an alternate root of '/'. This will
+			 * avoid having multiple leading slashes in the
+			 * mountpoint path.
+			 */
+			if (strcmp(root, "/") == 0)
+				root++;
+
+			/*
+			 * If the mountpoint is '/' then skip over this
+			 * if we are obtaining either an alternate root or
+			 * an inherited mountpoint.
+			 */
+			if (str[1] == '\0' && (root[0] != '\0' ||
+			    relpath[0] != '\0'))
+				str++;
+
+			if (relpath[0] == '\0')
+				(void) snprintf(propbuf, proplen, "%s%s",
+				    root, str);
+			else
+				(void) snprintf(propbuf, proplen, "%s%s%s%s",
+				    root, str, relpath[0] == '@' ? "" : "/",
+				    relpath);
+		} else {
+			/* 'legacy' or 'none' */
+			(void) strlcpy(propbuf, str, proplen);
+		}
+
+		break;
+
+	case ZFS_PROP_ORIGIN:
+		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
+		    proplen);
+		/*
+		 * If there is no parent at all, return failure to indicate that
+		 * it doesn't apply to this dataset.
+		 */
+		if (propbuf[0] == '\0')
+			return (-1);
+		break;
+
+	case ZFS_PROP_QUOTA:
+	case ZFS_PROP_REFQUOTA:
+	case ZFS_PROP_RESERVATION:
+	case ZFS_PROP_REFRESERVATION:
+
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+
+		/*
+		 * If quota or reservation is 0, we translate this into 'none'
+		 * (unless literal is set), and indicate that it's the default
+		 * value.  Otherwise, we print the number nicely and indicate
+		 * that its set locally.
+		 */
+		if (val == 0) {
+			if (literal)
+				(void) strlcpy(propbuf, "0", proplen);
+			else
+				(void) strlcpy(propbuf, "none", proplen);
+		} else {
+			if (literal)
+				(void) snprintf(propbuf, proplen, "%llu",
+				    (u_longlong_t)val);
+			else
+				zfs_nicenum(val, propbuf, proplen);
+		}
+		break;
+
+	case ZFS_PROP_COMPRESSRATIO:
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+		(void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
+		    val / 100, (longlong_t)val % 100);
+		break;
+
+	case ZFS_PROP_TYPE:
+		switch (zhp->zfs_type) {
+		case ZFS_TYPE_FILESYSTEM:
+			str = "filesystem";
+			break;
+		case ZFS_TYPE_VOLUME:
+			str = "volume";
+			break;
+		case ZFS_TYPE_SNAPSHOT:
+			str = "snapshot";
+			break;
+		default:
+			abort();
+		}
+		(void) snprintf(propbuf, proplen, "%s", str);
+		break;
+
+	case ZFS_PROP_MOUNTED:
+		/*
+		 * The 'mounted' property is a pseudo-property that described
+		 * whether the filesystem is currently mounted.  Even though
+		 * it's a boolean value, the typical values of "on" and "off"
+		 * don't make sense, so we translate to "yes" and "no".
+		 */
+		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
+		    src, &source, &val) != 0)
+			return (-1);
+		if (val)
+			(void) strlcpy(propbuf, "yes", proplen);
+		else
+			(void) strlcpy(propbuf, "no", proplen);
+		break;
+
+	case ZFS_PROP_NAME:
+		/*
+		 * The 'name' property is a pseudo-property derived from the
+		 * dataset name.  It is presented as a real property to simplify
+		 * consumers.
+		 */
+		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
+		break;
+
+	default:
+		switch (zfs_prop_get_type(prop)) {
+		case PROP_TYPE_NUMBER:
+			if (get_numeric_property(zhp, prop, src,
+			    &source, &val) != 0)
+				return (-1);
+			if (literal)
+				(void) snprintf(propbuf, proplen, "%llu",
+				    (u_longlong_t)val);
+			else
+				zfs_nicenum(val, propbuf, proplen);
+			break;
+
+		case PROP_TYPE_STRING:
+			(void) strlcpy(propbuf,
+			    getprop_string(zhp, prop, &source), proplen);
+			break;
+
+		case PROP_TYPE_INDEX:
+			if (get_numeric_property(zhp, prop, src,
+			    &source, &val) != 0)
+				return (-1);
+			if (zfs_prop_index_to_string(prop, val, &strval) != 0)
+				return (-1);
+			(void) strlcpy(propbuf, strval, proplen);
+			break;
+
+		default:
+			abort();
+		}
+	}
+
+	get_source(zhp, src, source, statbuf, statlen);
+
+	return (0);
+}
+
+/*
+ * Utility function to get the given numeric property.  Does no validation that
+ * the given property is the appropriate type; should only be used with
+ * hard-coded property types.
+ */
+uint64_t
+zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
+{
+	char *source;
+	uint64_t val;
+
+	(void) get_numeric_property(zhp, prop, NULL, &source, &val);
+
+	return (val);
+}
+
+int
+zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val)
+{
+	char buf[64];
+
+	zfs_nicenum(val, buf, sizeof (buf));
+	return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf));
+}
+
+/*
+ * Similar to zfs_prop_get(), but returns the value as an integer.
+ */
+int
+zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
+    zprop_source_t *src, char *statbuf, size_t statlen)
+{
+	char *source;
+
+	/*
+	 * Check to see if this property applies to our object
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
+		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
+		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
+		    zfs_prop_to_name(prop)));
+	}
+
+	if (src)
+		*src = ZPROP_SRC_NONE;
+
+	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
+		return (-1);
+
+	get_source(zhp, src, source, statbuf, statlen);
+
+	return (0);
+}
+
+/*
+ * Returns the name of the given zfs handle.
+ */
+const char *
+zfs_get_name(const zfs_handle_t *zhp)
+{
+	return (zhp->zfs_name);
+}
+
+/*
+ * Returns the type of the given zfs handle.
+ */
+zfs_type_t
+zfs_get_type(const zfs_handle_t *zhp)
+{
+	return (zhp->zfs_type);
+}
+
+/*
+ * Iterate over all child filesystems
+ */
+int
+zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *nzhp;
+	int ret;
+
+	if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
+		return (0);
+
+	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+		/*
+		 * Ignore private dataset names.
+		 */
+		if (dataset_name_hidden(zc.zc_name))
+			continue;
+
+		/*
+		 * Silently ignore errors, as the only plausible explanation is
+		 * that the pool has since been removed.
+		 */
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
+			continue;
+
+		if ((ret = func(nzhp, data)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
+	 * returned, then the underlying dataset has been removed since we
+	 * obtained the handle.
+	 */
+	if (errno != ESRCH && errno != ENOENT)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
+
+	return (0);
+}
+
+/*
+ * Iterate over all snapshots
+ */
+int
+zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *nzhp;
+	int ret;
+
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
+		return (0);
+
+	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	    &zc) == 0;
+	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
+			continue;
+
+		if ((ret = func(nzhp, data)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
+	 * returned, then the underlying dataset has been removed since we
+	 * obtained the handle.  Silently ignore this case, and return success.
+	 */
+	if (errno != ESRCH && errno != ENOENT)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
+
+	return (0);
+}
+
+/*
+ * Iterate over all children, snapshots and filesystems
+ */
+int
+zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	int ret;
+
+	if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
+		return (ret);
+
+	return (zfs_iter_snapshots(zhp, func, data));
+}
+
+/*
+ * Given a complete name, return just the portion that refers to the parent.
+ * Can return NULL if this is a pool.
+ */
+static int
+parent_name(const char *path, char *buf, size_t buflen)
+{
+	char *loc;
+
+	if ((loc = strrchr(path, '/')) == NULL)
+		return (-1);
+
+	(void) strncpy(buf, path, MIN(buflen, loc - path));
+	buf[loc - path] = '\0';
+
+	return (0);
+}
+
+/*
+ * If accept_ancestor is false, then check to make sure that the given path has
+ * a parent, and that it exists.  If accept_ancestor is true, then find the
+ * closest existing ancestor for the given path.  In prefixlen return the
+ * length of already existing prefix of the given path.  We also fetch the
+ * 'zoned' property, which is used to validate property settings when creating
+ * new datasets.
+ */
+static int
+check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
+    boolean_t accept_ancestor, int *prefixlen)
+{
+	zfs_cmd_t zc = { 0 };
+	char parent[ZFS_MAXNAMELEN];
+	char *slash;
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
+	    path);
+
+	/* get parent, and check to see if this is just a pool */
+	if (parent_name(path, parent, sizeof (parent)) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "missing dataset name"));
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	}
+
+	/* check to see if the pool exists */
+	if ((slash = strchr(parent, '/')) == NULL)
+		slash = parent + strlen(parent);
+	(void) strncpy(zc.zc_name, parent, slash - parent);
+	zc.zc_name[slash - parent] = '\0';
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
+	    errno == ENOENT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no such pool '%s'"), zc.zc_name);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
+	}
+
+	/* check to see if the parent dataset exists */
+	while ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
+		if (errno == ENOENT && accept_ancestor) {
+			/*
+			 * Go deeper to find an ancestor, give up on top level.
+			 */
+			if (parent_name(parent, parent, sizeof (parent)) != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "no such pool '%s'"), zc.zc_name);
+				return (zfs_error(hdl, EZFS_NOENT, errbuf));
+			}
+		} else if (errno == ENOENT) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent does not exist"));
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		} else
+			return (zfs_standard_error(hdl, errno, errbuf));
+	}
+
+	*zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	/* we are in a non-global zone, but parent is in the global zone */
+	if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
+		(void) zfs_standard_error(hdl, EPERM, errbuf);
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	/* make sure parent is a filesystem */
+	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "parent is not a filesystem"));
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	zfs_close(zhp);
+	if (prefixlen != NULL)
+		*prefixlen = strlen(parent);
+	return (0);
+}
+
+/*
+ * Finds whether the dataset of the given type(s) exists.
+ */
+boolean_t
+zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
+{
+	zfs_handle_t *zhp;
+
+	if (!zfs_validate_name(hdl, path, types, B_FALSE))
+		return (B_FALSE);
+
+	/*
+	 * Try to get stats for the dataset, which will tell us if it exists.
+	 */
+	if ((zhp = make_dataset_handle(hdl, path)) != NULL) {
+		int ds_type = zhp->zfs_type;
+
+		zfs_close(zhp);
+		if (types & ds_type)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Given a path to 'target', create all the ancestors between
+ * the prefixlen portion of the path, and the target itself.
+ * Fail if the initial prefixlen-ancestor does not already exist.
+ */
+int
+create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
+{
+	zfs_handle_t *h;
+	char *cp;
+	const char *opname;
+
+	/* make sure prefix exists */
+	cp = target + prefixlen;
+	if (*cp != '/') {
+		assert(strchr(cp, '/') == NULL);
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+	} else {
+		*cp = '\0';
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		*cp = '/';
+	}
+	if (h == NULL)
+		return (-1);
+	zfs_close(h);
+
+	/*
+	 * Attempt to create, mount, and share any ancestor filesystems,
+	 * up to the prefixlen-long one.
+	 */
+	for (cp = target + prefixlen + 1;
+	    (cp = strchr(cp, '/')); *cp = '/', cp++) {
+		char *logstr;
+
+		*cp = '\0';
+
+		h = make_dataset_handle(hdl, target);
+		if (h) {
+			/* it already exists, nothing to do here */
+			zfs_close(h);
+			continue;
+		}
+
+		logstr = hdl->libzfs_log_str;
+		hdl->libzfs_log_str = NULL;
+		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
+		    NULL) != 0) {
+			hdl->libzfs_log_str = logstr;
+			opname = dgettext(TEXT_DOMAIN, "create");
+			goto ancestorerr;
+		}
+
+		hdl->libzfs_log_str = logstr;
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		if (h == NULL) {
+			opname = dgettext(TEXT_DOMAIN, "open");
+			goto ancestorerr;
+		}
+
+		if (zfs_mount(h, NULL, 0) != 0) {
+			opname = dgettext(TEXT_DOMAIN, "mount");
+			goto ancestorerr;
+		}
+
+		if (zfs_share(h) != 0) {
+			opname = dgettext(TEXT_DOMAIN, "share");
+			goto ancestorerr;
+		}
+
+		zfs_close(h);
+	}
+
+	return (0);
+
+ancestorerr:
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "failed to %s ancestor '%s'"), opname, target);
+	return (-1);
+}
+
+/*
+ * Creates non-existing ancestors of the given path.
+ */
+int
+zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
+{
+	int prefix;
+	uint64_t zoned;
+	char *path_copy;
+	int rc;
+
+	if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0)
+		return (-1);
+
+	if ((path_copy = strdup(path)) != NULL) {
+		rc = create_parents(hdl, path_copy, prefix);
+		free(path_copy);
+	}
+	if (path_copy == NULL || rc != 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Create a new filesystem or volume.
+ */
+int
+zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
+    nvlist_t *props)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	uint64_t size = 0;
+	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
+	char errbuf[1024];
+	uint64_t zoned;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), path);
+
+	/* validate the path, taking care to note the extended error message */
+	if (!zfs_validate_name(hdl, path, type, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* validate parents exist */
+	if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
+		return (-1);
+
+	/*
+	 * The failure modes when creating a dataset of a different type over
+	 * one that already exists is a little strange.  In particular, if you
+	 * try to create a dataset on top of an existing dataset, the ioctl()
+	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
+	 * first try to see if the dataset exists.
+	 */
+	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
+	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+	}
+
+	if (type == ZFS_TYPE_VOLUME)
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	if (props && (props = zfs_valid_proplist(hdl, type, props,
+	    zoned, NULL, errbuf)) == 0)
+		return (-1);
+
+	if (type == ZFS_TYPE_VOLUME) {
+		/*
+		 * If we are creating a volume, the size and block size must
+		 * satisfy a few restraints.  First, the blocksize must be a
+		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
+		 * volsize must be a multiple of the block size, and cannot be
+		 * zero.
+		 */
+		if (props == NULL || nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "missing volume size"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		if ((ret = nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+		    &blocksize)) != 0) {
+			if (ret == ENOENT) {
+				blocksize = zfs_prop_default_numeric(
+				    ZFS_PROP_VOLBLOCKSIZE);
+			} else {
+				nvlist_free(props);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "missing volume block size"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+			}
+		}
+
+		if (size == 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume size cannot be zero"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		if (size % blocksize != 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume size must be a multiple of volume block "
+			    "size"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+	}
+
+	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
+		return (-1);
+	nvlist_free(props);
+
+	/* create the dataset */
+	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
+
+	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(hdl, path);
+		if (ret) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully created, but device links "
+			    "were not created"));
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/* check for failure */
+	if (ret != 0) {
+		char parent[ZFS_MAXNAMELEN];
+		(void) parent_name(path, parent, sizeof (parent));
+
+		switch (errno) {
+		case ENOENT:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		case EINVAL:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent '%s' is not a filesystem"), parent);
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+
+		case EDOM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume block size must be power of 2 from "
+			    "%u to %uk"),
+			    (uint_t)SPA_MINBLOCKSIZE,
+			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
+
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to set this "
+			    "property or value"));
+			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+#ifdef _ILP32
+		case EOVERFLOW:
+			/*
+			 * This platform can't address a volume this big.
+			 */
+			if (type == ZFS_TYPE_VOLUME)
+				return (zfs_error(hdl, EZFS_VOLTOOBIG,
+				    errbuf));
+#endif
+			/* FALLTHROUGH */
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Destroys the given dataset.  The caller must make sure that the filesystem
+ * isn't mounted, and that there are no active dependents.
+ */
+int
+zfs_destroy(zfs_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (ZFS_IS_VOLUME(zhp)) {
+		/*
+		 * If user doesn't have permissions to unshare volume, then
+		 * abort the request.  This would only happen for a
+		 * non-privileged user.
+		 */
+		if (zfs_unshare_iscsi(zhp) != 0) {
+			return (-1);
+		}
+
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
+
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	} else {
+		zc.zc_objset_type = DMU_OST_ZFS;
+	}
+
+	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
+		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
+		    zhp->zfs_name));
+	}
+
+	remove_mountpoint(zhp);
+
+	return (0);
+}
+
+struct destroydata {
+	char *snapname;
+	boolean_t gotone;
+	boolean_t closezhp;
+};
+
+static int
+zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct destroydata *dd = arg;
+	zfs_handle_t *szhp;
+	char name[ZFS_MAXNAMELEN];
+	boolean_t closezhp = dd->closezhp;
+	int rv;
+
+	(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+	(void) strlcat(name, "@", sizeof (name));
+	(void) strlcat(name, dd->snapname, sizeof (name));
+
+	szhp = make_dataset_handle(zhp->zfs_hdl, name);
+	if (szhp) {
+		dd->gotone = B_TRUE;
+		zfs_close(szhp);
+	}
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		(void) zvol_remove_link(zhp->zfs_hdl, name);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	dd->closezhp = B_TRUE;
+	rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
+	if (closezhp)
+		zfs_close(zhp);
+	return (rv);
+}
+
+/*
+ * Destroys all snapshots with the given name in zhp & descendants.
+ */
+int
+zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	struct destroydata dd = { 0 };
+
+	dd.snapname = snapname;
+	(void) zfs_remove_link_cb(zhp, &dd);
+
+	if (!dd.gotone) {
+		return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
+		    zhp->zfs_name, snapname));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+
+	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
+	if (ret != 0) {
+		char errbuf[1024];
+
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s@%s'"), zc.zc_name, snapname);
+
+		switch (errno) {
+		case EEXIST:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot is cloned"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
+
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Clones the given dataset.  The target must be of the same type as the source.
+ */
+int
+zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
+{
+	zfs_cmd_t zc = { 0 };
+	char parent[ZFS_MAXNAMELEN];
+	int ret;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_type_t type;
+	uint64_t zoned;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), target);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* validate parents exist */
+	if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0)
+		return (-1);
+
+	(void) parent_name(target, parent, sizeof (parent));
+
+	/* do the clone */
+	if (ZFS_IS_VOLUME(zhp)) {
+		zc.zc_objset_type = DMU_OST_ZVOL;
+		type = ZFS_TYPE_VOLUME;
+	} else {
+		zc.zc_objset_type = DMU_OST_ZFS;
+		type = ZFS_TYPE_FILESYSTEM;
+	}
+
+	if (props) {
+		if ((props = zfs_valid_proplist(hdl, type, props, zoned,
+		    zhp, errbuf)) == NULL)
+			return (-1);
+
+		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+
+		nvlist_free(props);
+	}
+
+	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
+	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	if (ret != 0) {
+		switch (errno) {
+
+		case ENOENT:
+			/*
+			 * The parent doesn't exist.  We should have caught this
+			 * above, but there may a race condition that has since
+			 * destroyed the parent.
+			 *
+			 * At this point, we don't know whether it's the source
+			 * that doesn't exist anymore, or whether the target
+			 * dataset doesn't exist.
+			 */
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
+
+		case EXDEV:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "source and target pools differ"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
+			    errbuf));
+
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	} else if (ZFS_IS_VOLUME(zhp)) {
+		ret = zvol_create_link(zhp->zfs_hdl, target);
+	}
+
+	return (ret);
+}
+
+typedef struct promote_data {
+	char cb_mountpoint[MAXPATHLEN];
+	const char *cb_target;
+	const char *cb_errbuf;
+	uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+	zfs_handle_t *szhp;
+	char snapname[MAXPATHLEN];
+	int rv = 0;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* Remove the device link if it's a zvol. */
+	if (ZFS_IS_VOLUME(zhp))
+		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
+
+	/* Check for conflicting names */
+	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
+	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
+	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+	if (szhp != NULL) {
+		zfs_close(szhp);
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot name '%s' from origin \n"
+		    "conflicts with '%s' from target"),
+		    zhp->zfs_name, snapname);
+		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
+	}
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+promote_snap_done_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
+		/* Create the device link if it's a zvol. */
+		if (ZFS_IS_VOLUME(zhp))
+			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Promotes the given clone fs to be the clone parent.
+ */
+int
+zfs_promote(zfs_handle_t *zhp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = { 0 };
+	char parent[MAXPATHLEN];
+	char *cp;
+	int ret;
+	zfs_handle_t *pzhp;
+	promote_data_t pd;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot promote '%s'"), zhp->zfs_name);
+
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be promoted"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+
+	(void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent));
+	if (parent[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "not a cloned filesystem"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+	cp = strchr(parent, '@');
+	*cp = '\0';
+
+	/* Walk the snapshots we will be moving */
+	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+	if (pzhp == NULL)
+		return (-1);
+	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+	zfs_close(pzhp);
+	pd.cb_target = zhp->zfs_name;
+	pd.cb_errbuf = errbuf;
+	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
+	if (pzhp == NULL)
+		return (-1);
+	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+	if (ret != 0) {
+		zfs_close(pzhp);
+		return (-1);
+	}
+
+	/* issue the ioctl */
+	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
+	    sizeof (zc.zc_value));
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+
+	if (ret != 0) {
+		int save_errno = errno;
+
+		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
+		zfs_close(pzhp);
+
+		switch (save_errno) {
+		case EEXIST:
+			/*
+			 * There is a conflicting snapshot name.  We
+			 * should have caught this above, but they could
+			 * have renamed something in the mean time.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "conflicting snapshot name from parent '%s'"),
+			    parent);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, save_errno, errbuf));
+		}
+	} else {
+		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
+	}
+
+	zfs_close(pzhp);
+	return (ret);
+}
+
+struct createdata {
+	const char *cd_snapname;
+	int cd_ifexists;
+};
+
+static int
+zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct createdata *cd = arg;
+	int ret;
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		char name[MAXPATHLEN];
+
+		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+		(void) strlcat(name, "@", sizeof (name));
+		(void) strlcat(name, cd->cd_snapname, sizeof (name));
+		(void) zvol_create_link_common(zhp->zfs_hdl, name,
+		    cd->cd_ifexists);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Takes a snapshot of the given dataset.
+ */
+int
+zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
+    nvlist_t *props)
+{
+	const char *delim;
+	char parent[ZFS_MAXNAMELEN];
+	zfs_handle_t *zhp;
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot snapshot '%s'"), path);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	if (props) {
+		if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
+		    props, B_FALSE, NULL, errbuf)) == NULL)
+			return (-1);
+
+		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+
+		nvlist_free(props);
+	}
+
+	/* make sure the parent exists and is of the appropriate type */
+	delim = strchr(path, '@');
+	(void) strncpy(parent, path, delim - path);
+	parent[delim - path] = '\0';
+
+	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
+	    ZFS_TYPE_VOLUME)) == NULL) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+	zc.zc_cookie = recursive;
+	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	/*
+	 * if it was recursive, the one that actually failed will be in
+	 * zc.zc_name.
+	 */
+	if (ret != 0)
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
+
+	if (ret == 0 && recursive) {
+		struct createdata cd;
+
+		cd.cd_snapname = delim + 1;
+		cd.cd_ifexists = B_FALSE;
+		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
+	}
+	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(zhp->zfs_hdl, path);
+		if (ret != 0) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully snapshotted, but device links "
+			    "were not created"));
+			zfs_close(zhp);
+			return (-1);
+		}
+	}
+
+	if (ret != 0)
+		(void) zfs_standard_error(hdl, errno, errbuf);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Destroy any more recent snapshots.  We invoke this callback on any dependents
+ * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
+ * is a dependent and we should just destroy it without checking the transaction
+ * group.
+ */
+typedef struct rollback_data {
+	const char	*cb_target;		/* the snapshot */
+	uint64_t	cb_create;		/* creation time reference */
+	boolean_t	cb_error;
+	boolean_t	cb_dependent;
+	boolean_t	cb_force;
+} rollback_data_t;
+
+static int
+rollback_destroy(zfs_handle_t *zhp, void *data)
+{
+	rollback_data_t *cbp = data;
+
+	if (!cbp->cb_dependent) {
+		if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
+		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
+		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
+		    cbp->cb_create) {
+			char *logstr;
+
+			cbp->cb_dependent = B_TRUE;
+			cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
+			    rollback_destroy, cbp);
+			cbp->cb_dependent = B_FALSE;
+
+			logstr = zhp->zfs_hdl->libzfs_log_str;
+			zhp->zfs_hdl->libzfs_log_str = NULL;
+			cbp->cb_error |= zfs_destroy(zhp);
+			zhp->zfs_hdl->libzfs_log_str = logstr;
+		}
+	} else {
+		/* We must destroy this clone; first unmount it */
+		prop_changelist_t *clp;
+
+		clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+		    cbp->cb_force ? MS_FORCE: 0);
+		if (clp == NULL || changelist_prefix(clp) != 0) {
+			cbp->cb_error = B_TRUE;
+			zfs_close(zhp);
+			return (0);
+		}
+		if (zfs_destroy(zhp) != 0)
+			cbp->cb_error = B_TRUE;
+		else
+			changelist_remove(clp, zhp->zfs_name);
+		(void) changelist_postfix(clp);
+		changelist_free(clp);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Given a dataset, rollback to a specific snapshot, discarding any
+ * data changes since then and making it the active dataset.
+ *
+ * Any snapshots more recent than the target are destroyed, along with
+ * their dependents.
+ */
+int
+zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
+{
+	rollback_data_t cb = { 0 };
+	int err;
+	zfs_cmd_t zc = { 0 };
+	boolean_t restore_resv = 0;
+	uint64_t old_volsize = 0, new_volsize;
+	zfs_prop_t resv_prop;
+
+	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
+	    zhp->zfs_type == ZFS_TYPE_VOLUME);
+
+	/*
+	 * Destroy all recent snapshots and its dependends.
+	 */
+	cb.cb_force = force;
+	cb.cb_target = snap->zfs_name;
+	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
+	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
+
+	if (cb.cb_error)
+		return (-1);
+
+	/*
+	 * Now that we have verified that the snapshot is the latest,
+	 * rollback to the given snapshot.
+	 */
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
+		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
+			return (-1);
+		old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
+		restore_resv =
+		    (old_volsize == zfs_prop_get_int(zhp, resv_prop));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	/*
+	 * We rely on zfs_iter_children() to verify that there are no
+	 * newer snapshots for the given dataset.  Therefore, we can
+	 * simply pass the name on to the ioctl() call.  There is still
+	 * an unlikely race condition where the user has taken a
+	 * snapshot since we verified that this was the most recent.
+	 *
+	 */
+	if ((err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
+		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
+		    zhp->zfs_name);
+		return (err);
+	}
+
+	/*
+	 * For volumes, if the pre-rollback volsize matched the pre-
+	 * rollback reservation and the volsize has changed then set
+	 * the reservation property to the post-rollback volsize.
+	 * Make a new handle since the rollback closed the dataset.
+	 */
+	if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
+	    (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
+		if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) {
+			zfs_close(zhp);
+			return (err);
+		}
+		if (restore_resv) {
+			new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
+			if (old_volsize != new_volsize)
+				err = zfs_prop_set_int(zhp, resv_prop,
+				    new_volsize);
+		}
+		zfs_close(zhp);
+	}
+	return (err);
+}
+
+/*
+ * Iterate over all dependents for a given dataset.  This includes both
+ * hierarchical dependents (children) and data dependents (snapshots and
+ * clones).  The bulk of the processing occurs in get_dependents() in
+ * libzfs_graph.c.
+ */
+int
+zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
+    zfs_iter_f func, void *data)
+{
+	char **dependents;
+	size_t count;
+	int i;
+	zfs_handle_t *child;
+	int ret = 0;
+
+	if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name,
+	    &dependents, &count) != 0)
+		return (-1);
+
+	for (i = 0; i < count; i++) {
+		if ((child = make_dataset_handle(zhp->zfs_hdl,
+		    dependents[i])) == NULL)
+			continue;
+
+		if ((ret = func(child, data)) != 0)
+			break;
+	}
+
+	for (i = 0; i < count; i++)
+		free(dependents[i]);
+	free(dependents);
+
+	return (ret);
+}
+
+/*
+ * Renames the given dataset.
+ */
+int
+zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
+{
+	int ret;
+	zfs_cmd_t zc = { 0 };
+	char *delim;
+	prop_changelist_t *cl = NULL;
+	zfs_handle_t *zhrp = NULL;
+	char *parentname = NULL;
+	char parent[ZFS_MAXNAMELEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+
+	/* if we have the same exact name, just return success */
+	if (strcmp(zhp->zfs_name, target) == 0)
+		return (0);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot rename to '%s'"), target);
+
+	/*
+	 * Make sure the target name is valid
+	 */
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		if ((strchr(target, '@') == NULL) ||
+		    *target == '@') {
+			/*
+			 * Snapshot target name is abbreviated,
+			 * reconstruct full dataset name
+			 */
+			(void) strlcpy(parent, zhp->zfs_name,
+			    sizeof (parent));
+			delim = strchr(parent, '@');
+			if (strchr(target, '@') == NULL)
+				*(++delim) = '\0';
+			else
+				*delim = '\0';
+			(void) strlcat(parent, target, sizeof (parent));
+			target = parent;
+		} else {
+			/*
+			 * Make sure we're renaming within the same dataset.
+			 */
+			delim = strchr(target, '@');
+			if (strncmp(zhp->zfs_name, target, delim - target)
+			    != 0 || zhp->zfs_name[delim - target] != '@') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "snapshots must be part of same "
+				    "dataset"));
+				return (zfs_error(hdl, EZFS_CROSSTARGET,
+				    errbuf));
+			}
+		}
+		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	} else {
+		if (recursive) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "recursive rename must be a snapshot"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		}
+
+		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		uint64_t unused;
+
+		/* validate parents */
+		if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0)
+			return (-1);
+
+		(void) parent_name(target, parent, sizeof (parent));
+
+		/* make sure we're in the same pool */
+		verify((delim = strchr(target, '/')) != NULL);
+		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
+		    zhp->zfs_name[delim - target] != '/') {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "datasets must be within same pool"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+		}
+
+		/* new name cannot be a child of the current dataset name */
+		if (strncmp(parent, zhp->zfs_name,
+		    strlen(zhp->zfs_name)) == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "New dataset name cannot be a descendent of "
+			    "current dataset name"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+	}
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
+
+	if (getzoneid() == GLOBAL_ZONEID &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
+	}
+
+	if (recursive) {
+		struct destroydata dd;
+
+		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
+		if (parentname == NULL) {
+			ret = -1;
+			goto error;
+		}
+		delim = strchr(parentname, '@');
+		*delim = '\0';
+		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET);
+		if (zhrp == NULL) {
+			ret = -1;
+			goto error;
+		}
+
+		dd.snapname = delim + 1;
+		dd.gotone = B_FALSE;
+		dd.closezhp = B_TRUE;
+
+		/* We remove any zvol links prior to renaming them */
+		ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
+		if (ret) {
+			goto error;
+		}
+	} else {
+		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL)
+			return (-1);
+
+		if (changelist_haszonedchild(cl)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "child dataset with inherited mountpoint is used "
+			    "in a non-global zone"));
+			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
+			ret = -1;
+			goto error;
+		}
+
+		if ((ret = changelist_prefix(cl)) != 0)
+			goto error;
+	}
+
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
+
+	zc.zc_cookie = recursive;
+
+	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
+		/*
+		 * if it was recursive, the one that actually failed will
+		 * be in zc.zc_name
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot rename '%s'"), zc.zc_name);
+
+		if (recursive && errno == EEXIST) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "a child dataset already has a snapshot "
+			    "with the new name"));
+			(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
+		} else {
+			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
+		}
+
+		/*
+		 * On failure, we still want to remount any filesystems that
+		 * were previously mounted, so we don't alter the system state.
+		 */
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = delim + 1;
+			cd.cd_ifexists = B_TRUE;
+			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
+			(void) changelist_postfix(cl);
+		}
+	} else {
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = strchr(target, '@') + 1;
+			cd.cd_ifexists = B_TRUE;
+			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
+			changelist_rename(cl, zfs_get_name(zhp), target);
+			ret = changelist_postfix(cl);
+		}
+	}
+
+error:
+	if (parentname) {
+		free(parentname);
+	}
+	if (zhrp) {
+		zfs_close(zhrp);
+	}
+	if (cl) {
+		changelist_free(cl);
+	}
+	return (ret);
+}
+
+/*
+ * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
+ * poke devfsadm to create the /dev link, and then wait for the link to appear.
+ */
+int
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	return (zvol_create_link_common(hdl, dataset, B_FALSE));
+}
+
+static int
+zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
+{
+	zfs_cmd_t zc = { 0 };
+	di_devlink_handle_t dhdl;
+	priv_set_t *priv_effective;
+	int privileged;
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	/*
+	 * Issue the appropriate ioctl.
+	 */
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case EEXIST:
+			/*
+			 * Silently ignore the case where the link already
+			 * exists.  This allows 'zfs volinit' to be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		case ENOENT:
+			/*
+			 * Dataset does not exist in the kernel.  If we
+			 * don't care (see zfs_rename), then ignore the
+			 * error quietly.
+			 */
+			if (ifexists) {
+				return (0);
+			}
+
+			/* FALLTHROUGH */
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset));
+		}
+	}
+
+	/*
+	 * If privileged call devfsadm and wait for the links to
+	 * magically appear.
+	 * Otherwise, print out an informational message.
+	 */
+
+	priv_effective = priv_allocset();
+	(void) getppriv(PRIV_EFFECTIVE, priv_effective);
+	privileged = (priv_isfullset(priv_effective) == B_TRUE);
+	priv_freeset(priv_effective);
+
+	if (privileged) {
+		if ((dhdl = di_devlink_init(ZFS_DRIVER,
+		    DI_MAKE_LINK)) == NULL) {
+			zfs_error_aux(hdl, strerror(errno));
+			(void) zfs_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset);
+			(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
+			return (-1);
+		} else {
+			(void) di_devlink_fini(&dhdl);
+		}
+	} else {
+		char pathname[MAXPATHLEN];
+		struct stat64 statbuf;
+		int i;
+
+#define	MAX_WAIT	10
+
+		/*
+		 * This is the poor mans way of waiting for the link
+		 * to show up.  If after 10 seconds we still don't
+		 * have it, then print out a message.
+		 */
+		(void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
+		    dataset);
+
+		for (i = 0; i != MAX_WAIT; i++) {
+			if (stat64(pathname, &statbuf) == 0)
+				break;
+			(void) sleep(1);
+		}
+		if (i == MAX_WAIT)
+			(void) printf(gettext("%s may not be immediately "
+			    "available\n"), pathname);
+	}
+
+	return (0);
+}
+
+/*
+ * Remove a minor node for the given zvol and the associated /dev links.
+ */
+int
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case ENXIO:
+			/*
+			 * Silently ignore the case where the link no longer
+			 * exists, so that 'zfs volfini' can be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot remove device "
+			    "links for '%s'"), dataset));
+		}
+	}
+
+	return (0);
+}
+
+nvlist_t *
+zfs_get_user_props(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_user_props);
+}
+
+/*
+ * This function is used by 'zfs list' to determine the exact set of columns to
+ * display, and their maximum widths.  This does two main things:
+ *
+ *      - If this is a list of all properties, then expand the list to include
+ *        all native properties, and set a flag so that for each dataset we look
+ *        for new unique user properties and add them to the list.
+ *
+ *      - For non fixed-width properties, keep track of the maximum width seen
+ *        so that we can size the column appropriately.
+ */
+int
+zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zprop_list_t *entry;
+	zprop_list_t **last, **start;
+	nvlist_t *userprops, *propval;
+	nvpair_t *elem;
+	char *strval;
+	char buf[ZFS_MAXPROPLEN];
+
+	if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0)
+		return (-1);
+
+	userprops = zfs_get_user_props(zhp);
+
+	entry = *plp;
+	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
+		/*
+		 * Go through and add any user properties as necessary.  We
+		 * start by incrementing our list pointer to the first
+		 * non-native property.
+		 */
+		start = plp;
+		while (*start != NULL) {
+			if ((*start)->pl_prop == ZPROP_INVAL)
+				break;
+			start = &(*start)->pl_next;
+		}
+
+		elem = NULL;
+		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
+			/*
+			 * See if we've already found this property in our list.
+			 */
+			for (last = start; *last != NULL;
+			    last = &(*last)->pl_next) {
+				if (strcmp((*last)->pl_user_prop,
+				    nvpair_name(elem)) == 0)
+					break;
+			}
+
+			if (*last == NULL) {
+				if ((entry = zfs_alloc(hdl,
+				    sizeof (zprop_list_t))) == NULL ||
+				    ((entry->pl_user_prop = zfs_strdup(hdl,
+				    nvpair_name(elem)))) == NULL) {
+					free(entry);
+					return (-1);
+				}
+
+				entry->pl_prop = ZPROP_INVAL;
+				entry->pl_width = strlen(nvpair_name(elem));
+				entry->pl_all = B_TRUE;
+				*last = entry;
+			}
+		}
+	}
+
+	/*
+	 * Now go through and check the width of any non-fixed columns
+	 */
+	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
+		if (entry->pl_fixed)
+			continue;
+
+		if (entry->pl_prop != ZPROP_INVAL) {
+			if (zfs_prop_get(zhp, entry->pl_prop,
+			    buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
+				if (strlen(buf) > entry->pl_width)
+					entry->pl_width = strlen(buf);
+			}
+		} else if (nvlist_lookup_nvlist(userprops,
+		    entry->pl_user_prop, &propval)  == 0) {
+			verify(nvlist_lookup_string(propval,
+			    ZPROP_VALUE, &strval) == 0);
+			if (strlen(strval) > entry->pl_width)
+				entry->pl_width = strlen(strval);
+		}
+	}
+
+	return (0);
+}
+
+int
+zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
+{
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *nvp;
+	gid_t gid;
+	uid_t uid;
+	const gid_t *groups;
+	int group_cnt;
+	int error;
+
+	if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0)
+		return (no_memory(hdl));
+
+	uid = ucred_geteuid(cred);
+	gid = ucred_getegid(cred);
+	group_cnt = ucred_getgroups(cred, &groups);
+
+	if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1)
+		return (1);
+
+	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) {
+		nvlist_free(nvp);
+		return (1);
+	}
+
+	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) {
+		nvlist_free(nvp);
+		return (1);
+	}
+
+	if (nvlist_add_uint32_array(nvp,
+	    ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) {
+		nvlist_free(nvp);
+		return (1);
+	}
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvp))
+		return (-1);
+
+	error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc);
+	nvlist_free(nvp);
+	return (error);
+}
+
+int
+zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
+    void *export, void *sharetab, int sharemax, zfs_share_op_t operation)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
+	zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab;
+	zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export;
+	zc.zc_share.z_sharetype = operation;
+	zc.zc_share.z_sharemax = sharemax;
+
+	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
+	return (error);
+}
diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c
new file mode 100644
index 000000000..e7cbf2386
--- /dev/null
+++ b/lib/libzfs/libzfs_graph.c
@@ -0,0 +1,662 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Iterate over all children of the current object.  This includes the normal
+ * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
+ * walk all datasets in the pool, and construct a directed graph of the form:
+ *
+ * 			home
+ *                        |
+ *                   +----+----+
+ *                   |         |
+ *                   v         v             ws
+ *                  bar       baz             |
+ *                             |              |
+ *                             v              v
+ *                          @yesterday ----> foo
+ *
+ * In order to construct this graph, we have to walk every dataset in the pool,
+ * because the clone parent is stored as a property of the child, not the
+ * parent.  The parent only keeps track of the number of clones.
+ *
+ * In the normal case (without clones) this would be rather expensive.  To avoid
+ * unnecessary computation, we first try a walk of the subtree hierarchy
+ * starting from the initial node.  At each dataset, we construct a node in the
+ * graph and an edge leading from its parent.  If we don't see any snapshots
+ * with a non-zero clone count, then we are finished.
+ *
+ * If we do find a cloned snapshot, then we finish the walk of the current
+ * subtree, but indicate that we need to do a complete walk.  We then perform a
+ * global walk of all datasets, avoiding the subtree we already processed.
+ *
+ * At the end of this, we'll end up with a directed graph of all relevant (and
+ * possible some irrelevant) datasets in the system.  We need to both find our
+ * limiting subgraph and determine a safe ordering in which to destroy the
+ * datasets.  We do a topological ordering of our graph starting at our target
+ * dataset, and then walk the results in reverse.
+ *
+ * It's possible for the graph to have cycles if, for example, the user renames
+ * a clone to be the parent of its origin snapshot.  The user can request to
+ * generate an error in this case, or ignore the cycle and continue.
+ *
+ * When removing datasets, we want to destroy the snapshots in chronological
+ * order (because this is the most efficient method).  In order to accomplish
+ * this, we store the creation transaction group with each vertex and keep each
+ * vertex's edges sorted according to this value.  The topological sort will
+ * automatically walk the snapshots in the correct order.
+ */
+
+#include <assert.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include "zfs_namecheck.h"
+
+#define	MIN_EDGECOUNT	4
+
+/*
+ * Vertex structure.  Indexed by dataset name, this structure maintains a list
+ * of edges to other vertices.
+ */
+struct zfs_edge;
+typedef struct zfs_vertex {
+	char			zv_dataset[ZFS_MAXNAMELEN];
+	struct zfs_vertex	*zv_next;
+	int			zv_visited;
+	uint64_t		zv_txg;
+	struct zfs_edge		**zv_edges;
+	int			zv_edgecount;
+	int			zv_edgealloc;
+} zfs_vertex_t;
+
+enum {
+	VISIT_SEEN = 1,
+	VISIT_SORT_PRE,
+	VISIT_SORT_POST
+};
+
+/*
+ * Edge structure.  Simply maintains a pointer to the destination vertex.  There
+ * is no need to store the source vertex, since we only use edges in the context
+ * of the source vertex.
+ */
+typedef struct zfs_edge {
+	zfs_vertex_t		*ze_dest;
+	struct zfs_edge		*ze_next;
+} zfs_edge_t;
+
+#define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
+
+/*
+ * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
+ */
+typedef struct zfs_graph {
+	zfs_vertex_t		**zg_hash;
+	size_t			zg_size;
+	size_t			zg_nvertex;
+	const char		*zg_root;
+	int			zg_clone_count;
+} zfs_graph_t;
+
+/*
+ * Allocate a new edge pointing to the target vertex.
+ */
+static zfs_edge_t *
+zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
+{
+	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
+
+	if (zep == NULL)
+		return (NULL);
+
+	zep->ze_dest = dest;
+
+	return (zep);
+}
+
+/*
+ * Destroy an edge.
+ */
+static void
+zfs_edge_destroy(zfs_edge_t *zep)
+{
+	free(zep);
+}
+
+/*
+ * Allocate a new vertex with the given name.
+ */
+static zfs_vertex_t *
+zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
+
+	if (zvp == NULL)
+		return (NULL);
+
+	assert(strlen(dataset) < ZFS_MAXNAMELEN);
+
+	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
+
+	if ((zvp->zv_edges = zfs_alloc(hdl,
+	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
+		free(zvp);
+		return (NULL);
+	}
+
+	zvp->zv_edgealloc = MIN_EDGECOUNT;
+
+	return (zvp);
+}
+
+/*
+ * Destroy a vertex.  Frees up any associated edges.
+ */
+static void
+zfs_vertex_destroy(zfs_vertex_t *zvp)
+{
+	int i;
+
+	for (i = 0; i < zvp->zv_edgecount; i++)
+		zfs_edge_destroy(zvp->zv_edges[i]);
+
+	free(zvp->zv_edges);
+	free(zvp);
+}
+
+/*
+ * Given a vertex, add an edge to the destination vertex.
+ */
+static int
+zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
+    zfs_vertex_t *dest)
+{
+	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
+
+	if (zep == NULL)
+		return (-1);
+
+	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
+		void *ptr;
+
+		if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
+		    zvp->zv_edgealloc * sizeof (void *),
+		    zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
+			return (-1);
+
+		zvp->zv_edges = ptr;
+		zvp->zv_edgealloc *= 2;
+	}
+
+	zvp->zv_edges[zvp->zv_edgecount++] = zep;
+
+	return (0);
+}
+
+static int
+zfs_edge_compare(const void *a, const void *b)
+{
+	const zfs_edge_t *ea = *((zfs_edge_t **)a);
+	const zfs_edge_t *eb = *((zfs_edge_t **)b);
+
+	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
+		return (-1);
+	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
+		return (1);
+	return (0);
+}
+
+/*
+ * Sort the given vertex edges according to the creation txg of each vertex.
+ */
+static void
+zfs_vertex_sort_edges(zfs_vertex_t *zvp)
+{
+	if (zvp->zv_edgecount == 0)
+		return;
+
+	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
+	    zfs_edge_compare);
+}
+
+/*
+ * Construct a new graph object.  We allow the size to be specified as a
+ * parameter so in the future we can size the hash according to the number of
+ * datasets in the pool.
+ */
+static zfs_graph_t *
+zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
+{
+	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
+
+	if (zgp == NULL)
+		return (NULL);
+
+	zgp->zg_size = size;
+	if ((zgp->zg_hash = zfs_alloc(hdl,
+	    size * sizeof (zfs_vertex_t *))) == NULL) {
+		free(zgp);
+		return (NULL);
+	}
+
+	zgp->zg_root = dataset;
+	zgp->zg_clone_count = 0;
+
+	return (zgp);
+}
+
+/*
+ * Destroy a graph object.  We have to iterate over all the hash chains,
+ * destroying each vertex in the process.
+ */
+static void
+zfs_graph_destroy(zfs_graph_t *zgp)
+{
+	int i;
+	zfs_vertex_t *current, *next;
+
+	for (i = 0; i < zgp->zg_size; i++) {
+		current = zgp->zg_hash[i];
+		while (current != NULL) {
+			next = current->zv_next;
+			zfs_vertex_destroy(current);
+			current = next;
+		}
+	}
+
+	free(zgp->zg_hash);
+	free(zgp);
+}
+
+/*
+ * Graph hash function.  Classic bernstein k=33 hash function, taken from
+ * usr/src/cmd/sgs/tools/common/strhash.c
+ */
+static size_t
+zfs_graph_hash(zfs_graph_t *zgp, const char *str)
+{
+	size_t hash = 5381;
+	int c;
+
+	while ((c = *str++) != 0)
+		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+
+	return (hash % zgp->zg_size);
+}
+
+/*
+ * Given a dataset name, finds the associated vertex, creating it if necessary.
+ */
+static zfs_vertex_t *
+zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
+    uint64_t txg)
+{
+	size_t idx = zfs_graph_hash(zgp, dataset);
+	zfs_vertex_t *zvp;
+
+	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
+		if (strcmp(zvp->zv_dataset, dataset) == 0) {
+			if (zvp->zv_txg == 0)
+				zvp->zv_txg = txg;
+			return (zvp);
+		}
+	}
+
+	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
+		return (NULL);
+
+	zvp->zv_next = zgp->zg_hash[idx];
+	zvp->zv_txg = txg;
+	zgp->zg_hash[idx] = zvp;
+	zgp->zg_nvertex++;
+
+	return (zvp);
+}
+
+/*
+ * Given two dataset names, create an edge between them.  For the source vertex,
+ * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
+ * created it as a destination of another edge.  If 'dest' is NULL, then this
+ * is an individual vertex (i.e. the starting vertex), so don't add an edge.
+ */
+static int
+zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
+    const char *dest, uint64_t txg)
+{
+	zfs_vertex_t *svp, *dvp;
+
+	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
+		return (-1);
+	svp->zv_visited = VISIT_SEEN;
+	if (dest != NULL) {
+		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
+		if (dvp == NULL)
+			return (-1);
+		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Iterate over all children of the given dataset, adding any vertices
+ * as necessary.  Returns -1 if there was an error, or 0 otherwise.
+ * This is a simple recursive algorithm - the ZFS namespace typically
+ * is very flat.  We manually invoke the necessary ioctl() calls to
+ * avoid the overhead and additional semantics of zfs_open().
+ */
+static int
+iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_vertex_t *zvp;
+
+	/*
+	 * Look up the source vertex, and avoid it if we've seen it before.
+	 */
+	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
+	if (zvp == NULL)
+		return (-1);
+	if (zvp->zv_visited == VISIT_SEEN)
+		return (0);
+
+	/*
+	 * Iterate over all children
+	 */
+	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
+
+		/*
+		 * Ignore private dataset names.
+		 */
+		if (dataset_name_hidden(zc.zc_name))
+			continue;
+
+		/*
+		 * Get statistics for this dataset, to determine the type of the
+		 * dataset and clone statistics.  If this fails, the dataset has
+		 * since been removed, and we're pretty much screwed anyway.
+		 */
+		zc.zc_objset_stats.dds_origin[0] = '\0';
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+			continue;
+
+		if (zc.zc_objset_stats.dds_origin[0] != '\0') {
+			if (zfs_graph_add(hdl, zgp,
+			    zc.zc_objset_stats.dds_origin, zc.zc_name,
+			    zc.zc_objset_stats.dds_creation_txg) != 0)
+				return (-1);
+			/*
+			 * Count origins only if they are contained in the graph
+			 */
+			if (isa_child_of(zc.zc_objset_stats.dds_origin,
+			    zgp->zg_root))
+				zgp->zg_clone_count--;
+		}
+
+		/*
+		 * Add an edge between the parent and the child.
+		 */
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+
+		/*
+		 * Recursively visit child
+		 */
+		if (iterate_children(hdl, zgp, zc.zc_name))
+			return (-1);
+	}
+
+	/*
+	 * Now iterate over all snapshots.
+	 */
+	bzero(&zc, sizeof (zc));
+
+	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
+
+		/*
+		 * Get statistics for this dataset, to determine the type of the
+		 * dataset and clone statistics.  If this fails, the dataset has
+		 * since been removed, and we're pretty much screwed anyway.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+			continue;
+
+		/*
+		 * Add an edge between the parent and the child.
+		 */
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+
+		zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
+	}
+
+	zvp->zv_visited = VISIT_SEEN;
+
+	return (0);
+}
+
+/*
+ * Returns false if there are no snapshots with dependent clones in this
+ * subtree or if all of those clones are also in this subtree.  Returns
+ * true if there is an error or there are external dependents.
+ */
+static boolean_t
+external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+
+	/*
+	 * Check whether this dataset is a clone or has clones since
+	 * iterate_children() only checks the children.
+	 */
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+		return (B_TRUE);
+
+	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
+		if (zfs_graph_add(hdl, zgp,
+		    zc.zc_objset_stats.dds_origin, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (B_TRUE);
+		if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
+			zgp->zg_clone_count--;
+	}
+
+	if ((zc.zc_objset_stats.dds_num_clones) ||
+	    iterate_children(hdl, zgp, dataset))
+		return (B_TRUE);
+
+	return (zgp->zg_clone_count != 0);
+}
+
+/*
+ * Construct a complete graph of all necessary vertices.  First, iterate over
+ * only our object's children.  If no cloned snapshots are found, or all of
+ * the cloned snapshots are in this subtree then return a graph of the subtree.
+ * Otherwise, start at the root of the pool and iterate over all datasets.
+ */
+static zfs_graph_t *
+construct_graph(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
+	int ret = 0;
+
+	if (zgp == NULL)
+		return (zgp);
+
+	if ((strchr(dataset, '/') == NULL) ||
+	    (external_dependents(hdl, zgp, dataset))) {
+		/*
+		 * Determine pool name and try again.
+		 */
+		int len = strcspn(dataset, "/@") + 1;
+		char *pool = zfs_alloc(hdl, len);
+
+		if (pool == NULL) {
+			zfs_graph_destroy(zgp);
+			return (NULL);
+		}
+		(void) strlcpy(pool, dataset, len);
+
+		if (iterate_children(hdl, zgp, pool) == -1 ||
+		    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
+			free(pool);
+			zfs_graph_destroy(zgp);
+			return (NULL);
+		}
+		free(pool);
+	}
+
+	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
+
+	return (zgp);
+}
+
+/*
+ * Given a graph, do a recursive topological sort into the given array.  This is
+ * really just a depth first search, so that the deepest nodes appear first.
+ * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
+ */
+static int
+topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
+    size_t *idx, zfs_vertex_t *zgv)
+{
+	int i;
+
+	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
+		/*
+		 * If we've already seen this vertex as part of our depth-first
+		 * search, then we have a cyclic dependency, and we must return
+		 * an error.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "recursive dependency at '%s'"),
+		    zgv->zv_dataset);
+		return (zfs_error(hdl, EZFS_RECURSIVE,
+		    dgettext(TEXT_DOMAIN,
+		    "cannot determine dependent datasets")));
+	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
+		/*
+		 * If we've already processed this as part of the topological
+		 * sort, then don't bother doing so again.
+		 */
+		return (0);
+	}
+
+	zgv->zv_visited = VISIT_SORT_PRE;
+
+	/* avoid doing a search if we don't have to */
+	zfs_vertex_sort_edges(zgv);
+	for (i = 0; i < zgv->zv_edgecount; i++) {
+		if (topo_sort(hdl, allowrecursion, result, idx,
+		    zgv->zv_edges[i]->ze_dest) != 0)
+			return (-1);
+	}
+
+	/* we may have visited this in the course of the above */
+	if (zgv->zv_visited == VISIT_SORT_POST)
+		return (0);
+
+	if ((result[*idx] = zfs_alloc(hdl,
+	    strlen(zgv->zv_dataset) + 1)) == NULL)
+		return (-1);
+
+	(void) strcpy(result[*idx], zgv->zv_dataset);
+	*idx += 1;
+	zgv->zv_visited = VISIT_SORT_POST;
+	return (0);
+}
+
+/*
+ * The only public interface for this file.  Do the dirty work of constructing a
+ * child list for the given object.  Construct the graph, do the toplogical
+ * sort, and then return the array of strings to the caller.
+ *
+ * The 'allowrecursion' parameter controls behavior when cycles are found.  If
+ * it is set, the the cycle is ignored and the results returned as if the cycle
+ * did not exist.  If it is not set, then the routine will generate an error if
+ * a cycle is found.
+ */
+int
+get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
+    const char *dataset, char ***result, size_t *count)
+{
+	zfs_graph_t *zgp;
+	zfs_vertex_t *zvp;
+
+	if ((zgp = construct_graph(hdl, dataset)) == NULL)
+		return (-1);
+
+	if ((*result = zfs_alloc(hdl,
+	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
+		free(*result);
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	*count = 0;
+	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
+		free(*result);
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	/*
+	 * Get rid of the last entry, which is our starting vertex and not
+	 * strictly a dependent.
+	 */
+	assert(*count > 0);
+	free((*result)[*count - 1]);
+	(*count)--;
+
+	zfs_graph_destroy(zgp);
+
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
new file mode 100644
index 000000000..d67776889
--- /dev/null
+++ b/lib/libzfs/libzfs_import.c
@@ -0,0 +1,1311 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Pool import support functions.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device.  If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ * 	pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded.  Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed.  Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <devid.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <sys/vdev_impl.h>
+
+#include "libzfs.h"
+#include "libzfs_impl.h"
+
+/*
+ * Intermediate structures used to gather configuration information.
+ */
+typedef struct config_entry {
+	uint64_t		ce_txg;
+	nvlist_t		*ce_config;
+	struct config_entry	*ce_next;
+} config_entry_t;
+
+typedef struct vdev_entry {
+	uint64_t		ve_guid;
+	config_entry_t		*ve_configs;
+	struct vdev_entry	*ve_next;
+} vdev_entry_t;
+
+typedef struct pool_entry {
+	uint64_t		pe_guid;
+	vdev_entry_t		*pe_vdevs;
+	struct pool_entry	*pe_next;
+} pool_entry_t;
+
+typedef struct name_entry {
+	char			*ne_name;
+	uint64_t		ne_guid;
+	struct name_entry	*ne_next;
+} name_entry_t;
+
+typedef struct pool_list {
+	pool_entry_t		*pools;
+	name_entry_t		*names;
+} pool_list_t;
+
+static char *
+get_devid(const char *path)
+{
+	int fd;
+	ddi_devid_t devid;
+	char *minor, *ret;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (NULL);
+
+	minor = NULL;
+	ret = NULL;
+	if (devid_get(fd, &devid) == 0) {
+		if (devid_get_minor_name(fd, &minor) == 0)
+			ret = devid_str_encode(devid, minor);
+		if (minor != NULL)
+			devid_str_free(minor);
+		devid_free(devid);
+	}
+	(void) close(fd);
+
+	return (ret);
+}
+
+
+/*
+ * Go through and fix up any path and/or devid information for the given vdev
+ * configuration.
+ */
+static int
+fix_paths(nvlist_t *nv, name_entry_t *names)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	uint64_t guid;
+	name_entry_t *ne, *best;
+	char *path, *devid;
+	int matched;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (fix_paths(child[c], names) != 0)
+				return (-1);
+		return (0);
+	}
+
+	/*
+	 * This is a leaf (file or disk) vdev.  In either case, go through
+	 * the name list and see if we find a matching guid.  If so, replace
+	 * the path and see if we can calculate a new devid.
+	 *
+	 * There may be multiple names associated with a particular guid, in
+	 * which case we have overlapping slices or multiple paths to the same
+	 * disk.  If this is the case, then we want to pick the path that is
+	 * the most similar to the original, where "most similar" is the number
+	 * of matching characters starting from the end of the path.  This will
+	 * preserve slice numbers even if the disks have been reorganized, and
+	 * will also catch preferred disk names if multiple paths exist.
+	 */
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		path = NULL;
+
+	matched = 0;
+	best = NULL;
+	for (ne = names; ne != NULL; ne = ne->ne_next) {
+		if (ne->ne_guid == guid) {
+			const char *src, *dst;
+			int count;
+
+			if (path == NULL) {
+				best = ne;
+				break;
+			}
+
+			src = ne->ne_name + strlen(ne->ne_name) - 1;
+			dst = path + strlen(path) - 1;
+			for (count = 0; src >= ne->ne_name && dst >= path;
+			    src--, dst--, count++)
+				if (*src != *dst)
+					break;
+
+			/*
+			 * At this point, 'count' is the number of characters
+			 * matched from the end.
+			 */
+			if (count > matched || best == NULL) {
+				best = ne;
+				matched = count;
+			}
+		}
+	}
+
+	if (best == NULL)
+		return (0);
+
+	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+		return (-1);
+
+	if ((devid = get_devid(best->ne_name)) == NULL) {
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+	} else {
+		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
+			return (-1);
+		devid_str_free(devid);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given configuration to the list of known devices.
+ */
+static int
+add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
+    nvlist_t *config)
+{
+	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	name_entry_t *ne;
+
+	/*
+	 * If this is a hot spare not currently in use or level 2 cache
+	 * device, add it to the list of names to translate, but don't do
+	 * anything else.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &state) == 0 &&
+	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+			return (-1);
+
+		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+			free(ne);
+			return (-1);
+		}
+		ne->ne_guid = vdev_guid;
+		ne->ne_next = pl->names;
+		pl->names = ne;
+		return (0);
+	}
+
+	/*
+	 * If we have a valid config but cannot read any of these fields, then
+	 * it means we have a half-initialized label.  In vdev_label_init()
+	 * we write a label with txg == 0 so that we can identify the device
+	 * in case the user refers to the same disk later on.  If we fail to
+	 * create the pool, we'll be left with a label in this state
+	 * which should not be considered part of a valid pool.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &pool_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+	    &top_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+	    &txg) != 0 || txg == 0) {
+		nvlist_free(config);
+		return (0);
+	}
+
+	/*
+	 * First, see if we know about this pool.  If not, then add it to the
+	 * list of known pools.
+	 */
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		if (pe->pe_guid == pool_guid)
+			break;
+	}
+
+	if (pe == NULL) {
+		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		pe->pe_guid = pool_guid;
+		pe->pe_next = pl->pools;
+		pl->pools = pe;
+	}
+
+	/*
+	 * Second, see if we know about this toplevel vdev.  Add it if its
+	 * missing.
+	 */
+	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+		if (ve->ve_guid == top_guid)
+			break;
+	}
+
+	if (ve == NULL) {
+		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		ve->ve_guid = top_guid;
+		ve->ve_next = pe->pe_vdevs;
+		pe->pe_vdevs = ve;
+	}
+
+	/*
+	 * Third, see if we have a config with a matching transaction group.  If
+	 * so, then we do nothing.  Otherwise, add it to the list of known
+	 * configs.
+	 */
+	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
+		if (ce->ce_txg == txg)
+			break;
+	}
+
+	if (ce == NULL) {
+		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		ce->ce_txg = txg;
+		ce->ce_config = config;
+		ce->ce_next = ve->ve_configs;
+		ve->ve_configs = ce;
+	} else {
+		nvlist_free(config);
+	}
+
+	/*
+	 * At this point we've successfully added our config to the list of
+	 * known configs.  The last thing to do is add the vdev guid -> path
+	 * mappings so that we can fix up the configuration as necessary before
+	 * doing the import.
+	 */
+	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+		return (-1);
+
+	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+		free(ne);
+		return (-1);
+	}
+
+	ne->ne_guid = vdev_guid;
+	ne->ne_next = pl->names;
+	pl->names = ne;
+
+	return (0);
+}
+
+/*
+ * Returns true if the named pool matches the given GUID.
+ */
+static int
+pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
+    boolean_t *isactive)
+{
+	zpool_handle_t *zhp;
+	uint64_t theguid;
+
+	if (zpool_open_silent(hdl, name, &zhp) != 0)
+		return (-1);
+
+	if (zhp == NULL) {
+		*isactive = B_FALSE;
+		return (0);
+	}
+
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
+	    &theguid) == 0);
+
+	zpool_close(zhp);
+
+	*isactive = (theguid == guid);
+	return (0);
+}
+
+static nvlist_t *
+refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
+{
+	nvlist_t *nvl;
+	zfs_cmd_t zc = { 0 };
+	int err;
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
+		return (NULL);
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc,
+	    zc.zc_nvlist_conf_size * 2) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
+	    &zc)) != 0 && errno == ENOMEM) {
+		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+			zcmd_free_nvlists(&zc);
+			return (NULL);
+		}
+	}
+
+	if (err) {
+		(void) zpool_standard_error(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot discover pools"));
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	zcmd_free_nvlists(&zc);
+	return (nvl);
+}
+
+/*
+ * Convert our list of pools into the definitive set of configurations.  We
+ * start by picking the best config for each toplevel vdev.  Once that's done,
+ * we assemble the toplevel vdevs into a full config for the pool.  We make a
+ * pass to fix up any incorrect paths, and then add it to the main list to
+ * return to the user.
+ */
+static nvlist_t *
+get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
+{
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
+	nvlist_t **spares, **l2cache;
+	uint_t i, nspares, nl2cache;
+	boolean_t config_seen;
+	uint64_t best_txg;
+	char *name, *hostname;
+	uint64_t version, guid;
+	uint_t children = 0;
+	nvlist_t **child = NULL;
+	uint_t c;
+	boolean_t isactive;
+	uint64_t hostid;
+	nvlist_t *nvl;
+	boolean_t found_one = B_FALSE;
+
+	if (nvlist_alloc(&ret, 0, 0) != 0)
+		goto nomem;
+
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		uint64_t id;
+
+		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		config_seen = B_FALSE;
+
+		/*
+		 * Iterate over all toplevel vdevs.  Grab the pool configuration
+		 * from the first one we find, and then go through the rest and
+		 * add them as necessary to the 'vdevs' member of the config.
+		 */
+		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+
+			/*
+			 * Determine the best configuration for this vdev by
+			 * selecting the config with the latest transaction
+			 * group.
+			 */
+			best_txg = 0;
+			for (ce = ve->ve_configs; ce != NULL;
+			    ce = ce->ce_next) {
+
+				if (ce->ce_txg > best_txg) {
+					tmp = ce->ce_config;
+					best_txg = ce->ce_txg;
+				}
+			}
+
+			if (!config_seen) {
+				/*
+				 * Copy the relevant pieces of data to the pool
+				 * configuration:
+				 *
+				 *	version
+				 * 	pool guid
+				 * 	name
+				 * 	pool state
+				 *	hostid (if available)
+				 *	hostname (if available)
+				 */
+				uint64_t state;
+
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VERSION, &version) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_VERSION, version) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_GUID, guid) != 0)
+					goto nomem;
+				verify(nvlist_lookup_string(tmp,
+				    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
+				if (nvlist_add_string(config,
+				    ZPOOL_CONFIG_POOL_NAME, name) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_STATE, state) != 0)
+					goto nomem;
+				hostid = 0;
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+					if (nvlist_add_uint64(config,
+					    ZPOOL_CONFIG_HOSTID, hostid) != 0)
+						goto nomem;
+					verify(nvlist_lookup_string(tmp,
+					    ZPOOL_CONFIG_HOSTNAME,
+					    &hostname) == 0);
+					if (nvlist_add_string(config,
+					    ZPOOL_CONFIG_HOSTNAME,
+					    hostname) != 0)
+						goto nomem;
+				}
+
+				config_seen = B_TRUE;
+			}
+
+			/*
+			 * Add this top-level vdev to the child array.
+			 */
+			verify(nvlist_lookup_nvlist(tmp,
+			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
+			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+			if (id >= children) {
+				nvlist_t **newchild;
+
+				newchild = zfs_alloc(hdl, (id + 1) *
+				    sizeof (nvlist_t *));
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				free(child);
+				child = newchild;
+				children = id + 1;
+			}
+			if (nvlist_dup(nvtop, &child[id], 0) != 0)
+				goto nomem;
+
+		}
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		/*
+		 * Look for any missing top-level vdevs.  If this is the case,
+		 * create a faked up 'missing' vdev as a placeholder.  We cannot
+		 * simply compress the child array, because the kernel performs
+		 * certain checks to make sure the vdev IDs match their location
+		 * in the configuration.
+		 */
+		for (c = 0; c < children; c++)
+			if (child[c] == NULL) {
+				nvlist_t *missing;
+				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+				if (nvlist_add_string(missing,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_MISSING) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+					nvlist_free(missing);
+					goto nomem;
+				}
+				child[c] = missing;
+			}
+
+		/*
+		 * Put all of this pool's top-level vdevs into a root vdev.
+		 */
+		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+		    child, children) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		for (c = 0; c < children; c++)
+			nvlist_free(child[c]);
+		free(child);
+		children = 0;
+		child = NULL;
+
+		/*
+		 * Go through and fix up any paths and/or devids based on our
+		 * known list of vdev GUID -> path mappings.
+		 */
+		if (fix_paths(nvroot, pl->names) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		/*
+		 * Add the root vdev to this pool's configuration.
+		 */
+		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    nvroot) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+		nvlist_free(nvroot);
+
+		/*
+		 * zdb uses this path to report on active pools that were
+		 * imported or created using -R.
+		 */
+		if (active_ok)
+			goto add_pool;
+
+		/*
+		 * Determine if this pool is currently active, in which case we
+		 * can't actually import it.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		if (pool_active(hdl, name, guid, &isactive) != 0)
+			goto error;
+
+		if (isactive) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
+
+		if ((nvl = refresh_config(hdl, config)) == NULL)
+			goto error;
+
+		nvlist_free(config);
+		config = nvl;
+
+		/*
+		 * Go through and update the paths for spares, now that we have
+		 * them.
+		 */
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0) {
+			for (i = 0; i < nspares; i++) {
+				if (fix_paths(spares[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Update the paths for l2cache devices.
+		 */
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache) == 0) {
+			for (i = 0; i < nl2cache; i++) {
+				if (fix_paths(l2cache[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Restore the original information read from the actual label.
+		 */
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
+		    DATA_TYPE_UINT64);
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
+		    DATA_TYPE_STRING);
+		if (hostid != 0) {
+			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
+			    hostid) == 0);
+			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
+			    hostname) == 0);
+		}
+
+add_pool:
+		/*
+		 * Add this pool to the list of configs.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (nvlist_add_nvlist(ret, name, config) != 0)
+			goto nomem;
+
+		found_one = B_TRUE;
+		nvlist_free(config);
+		config = NULL;
+	}
+
+	if (!found_one) {
+		nvlist_free(ret);
+		ret = NULL;
+	}
+
+	return (ret);
+
+nomem:
+	(void) no_memory(hdl);
+error:
+	nvlist_free(config);
+	nvlist_free(ret);
+	for (c = 0; c < children; c++)
+		nvlist_free(child[c]);
+	free(child);
+
+	return (NULL);
+}
+
+/*
+ * Return the offset of the given label.
+ */
+static uint64_t
+label_offset(uint64_t size, int l)
+{
+	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
+	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
+	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
+}
+
+/*
+ * Given a file descriptor, read the label information and return an nvlist
+ * describing the configuration, if there is one.
+ */
+int
+zpool_read_label(int fd, nvlist_t **config)
+{
+	struct stat64 statbuf;
+	int l;
+	vdev_label_t *label;
+	uint64_t state, txg, size;
+
+	*config = NULL;
+
+	if (fstat64(fd, &statbuf) == -1)
+		return (0);
+	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+		return (-1);
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+		if (pread64(fd, label, sizeof (vdev_label_t),
+		    label_offset(size, l)) != sizeof (vdev_label_t))
+			continue;
+
+		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		free(label);
+		return (0);
+	}
+
+	free(label);
+	*config = NULL;
+	return (0);
+}
+
+/*
+ * Given a list of directories to search, find all pools stored on disk.  This
+ * includes partial pools which are not available to import.  If no args are
+ * given (argc is 0), then the default directory (/dev/dsk) is searched.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+static nvlist_t *
+zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
+    boolean_t active_ok, char *poolname, uint64_t guid)
+{
+	int i;
+	DIR *dirp = NULL;
+	struct dirent64 *dp;
+	char path[MAXPATHLEN];
+	char *end;
+	size_t pathleft;
+	struct stat64 statbuf;
+	nvlist_t *ret = NULL, *config;
+	static char *default_dir = "/dev/dsk";
+	int fd;
+	pool_list_t pools = { 0 };
+	pool_entry_t *pe, *penext;
+	vdev_entry_t *ve, *venext;
+	config_entry_t *ce, *cenext;
+	name_entry_t *ne, *nenext;
+
+	verify(poolname == NULL || guid == 0);
+
+	if (argc == 0) {
+		argc = 1;
+		argv = &default_dir;
+	}
+
+	/*
+	 * Go through and read the label configuration information from every
+	 * possible device, organizing the information according to pool GUID
+	 * and toplevel GUID.
+	 */
+	for (i = 0; i < argc; i++) {
+		char *rdsk;
+		int dfd;
+
+		/* use realpath to normalize the path */
+		if (realpath(argv[i], path) == 0) {
+			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    argv[i]);
+			goto error;
+		}
+		end = &path[strlen(path)];
+		*end++ = '/';
+		*end = 0;
+		pathleft = &path[sizeof (path)] - end;
+
+		/*
+		 * Using raw devices instead of block devices when we're
+		 * reading the labels skips a bunch of slow operations during
+		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
+		 */
+		if (strcmp(path, "/dev/dsk/") == 0)
+			rdsk = "/dev/rdsk/";
+		else
+			rdsk = path;
+
+		if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
+		    (dirp = fdopendir(dfd)) == NULL) {
+			zfs_error_aux(hdl, strerror(errno));
+			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    rdsk);
+			goto error;
+		}
+
+		/*
+		 * This is not MT-safe, but we have no MT consumers of libzfs
+		 */
+		while ((dp = readdir64(dirp)) != NULL) {
+			const char *name = dp->d_name;
+			if (name[0] == '.' &&
+			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+				continue;
+
+			if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
+				continue;
+
+			/*
+			 * Ignore failed stats.  We only want regular
+			 * files, character devs and block devs.
+			 */
+			if (fstat64(fd, &statbuf) != 0 ||
+			    (!S_ISREG(statbuf.st_mode) &&
+			    !S_ISCHR(statbuf.st_mode) &&
+			    !S_ISBLK(statbuf.st_mode))) {
+				(void) close(fd);
+				continue;
+			}
+
+			if ((zpool_read_label(fd, &config)) != 0) {
+				(void) close(fd);
+				(void) no_memory(hdl);
+				goto error;
+			}
+
+			(void) close(fd);
+
+			if (config != NULL) {
+				boolean_t matched = B_TRUE;
+
+				if (poolname != NULL) {
+					char *pname;
+
+					matched = nvlist_lookup_string(config,
+					    ZPOOL_CONFIG_POOL_NAME,
+					    &pname) == 0 &&
+					    strcmp(poolname, pname) == 0;
+				} else if (guid != 0) {
+					uint64_t this_guid;
+
+					matched = nvlist_lookup_uint64(config,
+					    ZPOOL_CONFIG_POOL_GUID,
+					    &this_guid) == 0 &&
+					    guid == this_guid;
+				}
+				if (!matched) {
+					nvlist_free(config);
+					config = NULL;
+					continue;
+				}
+				/* use the non-raw path for the config */
+				(void) strlcpy(end, name, pathleft);
+				if (add_config(hdl, &pools, path, config) != 0)
+					goto error;
+			}
+		}
+
+		(void) closedir(dirp);
+		dirp = NULL;
+	}
+
+	ret = get_configs(hdl, &pools, active_ok);
+
+error:
+	for (pe = pools.pools; pe != NULL; pe = penext) {
+		penext = pe->pe_next;
+		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+			venext = ve->ve_next;
+			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+				cenext = ce->ce_next;
+				if (ce->ce_config)
+					nvlist_free(ce->ce_config);
+				free(ce);
+			}
+			free(ve);
+		}
+		free(pe);
+	}
+
+	for (ne = pools.names; ne != NULL; ne = nenext) {
+		nenext = ne->ne_next;
+		if (ne->ne_name)
+			free(ne->ne_name);
+		free(ne);
+	}
+
+	if (dirp)
+		(void) closedir(dirp);
+
+	return (ret);
+}
+
+nvlist_t *
+zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
+}
+
+nvlist_t *
+zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
+    char *pool)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
+}
+
+nvlist_t *
+zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
+    uint64_t guid)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
+}
+
+nvlist_t *
+zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
+}
+
+/*
+ * Given a cache file, return the contents as a list of importable pools.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+nvlist_t *
+zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
+    char *poolname, uint64_t guid)
+{
+	char *buf;
+	int fd;
+	struct stat64 statbuf;
+	nvlist_t *raw, *src, *dst;
+	nvlist_t *pools;
+	nvpair_t *elem;
+	char *name;
+	uint64_t this_guid;
+	boolean_t active;
+
+	verify(poolname == NULL || guid == 0);
+
+	if ((fd = open(cachefile, O_RDONLY)) < 0) {
+		zfs_error_aux(hdl, "%s", strerror(errno));
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
+		return (NULL);
+	}
+
+	if (fstat64(fd, &statbuf) != 0) {
+		zfs_error_aux(hdl, "%s", strerror(errno));
+		(void) close(fd);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
+		return (NULL);
+	}
+
+	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
+		(void) close(fd);
+		return (NULL);
+	}
+
+	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
+		(void) close(fd);
+		free(buf);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN,
+		    "failed to read cache file contents"));
+		return (NULL);
+	}
+
+	(void) close(fd);
+
+	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
+		free(buf);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN,
+		    "invalid or corrupt cache file contents"));
+		return (NULL);
+	}
+
+	free(buf);
+
+	/*
+	 * Go through and get the current state of the pools and refresh their
+	 * state.
+	 */
+	if (nvlist_alloc(&pools, 0, 0) != 0) {
+		(void) no_memory(hdl);
+		nvlist_free(raw);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
+		verify(nvpair_value_nvlist(elem, &src) == 0);
+
+		verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (poolname != NULL && strcmp(poolname, name) != 0)
+			continue;
+
+		verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
+		    &this_guid) == 0);
+		if (guid != 0) {
+			verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
+			    &this_guid) == 0);
+			if (guid != this_guid)
+				continue;
+		}
+
+		if (pool_active(hdl, name, this_guid, &active) != 0) {
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+
+		if (active)
+			continue;
+
+		if ((dst = refresh_config(hdl, src)) == NULL) {
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+
+		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
+			(void) no_memory(hdl);
+			nvlist_free(dst);
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+		nvlist_free(dst);
+	}
+
+	nvlist_free(raw);
+	return (pools);
+}
+
+
+boolean_t
+find_guid(nvlist_t *nv, uint64_t guid)
+{
+	uint64_t tmp;
+	nvlist_t **child;
+	uint_t c, children;
+
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
+	if (tmp == guid)
+		return (B_TRUE);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_guid(child[c], guid))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+typedef struct aux_cbdata {
+	const char	*cb_type;
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} aux_cbdata_t;
+
+static int
+find_aux(zpool_handle_t *zhp, void *data)
+{
+	aux_cbdata_t *cbp = data;
+	nvlist_t **list;
+	uint_t i, count;
+	uint64_t guid;
+	nvlist_t *nvroot;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
+	    &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i],
+			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			if (guid == cbp->cb_guid) {
+				cbp->cb_zhp = zhp;
+				return (1);
+			}
+		}
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Determines if the pool is in use.  If so, it returns true and the state of
+ * the pool as well as the name of the pool.  Both strings are allocated and
+ * must be freed by the caller.
+ */
+int
+zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
+    boolean_t *inuse)
+{
+	nvlist_t *config;
+	char *name;
+	boolean_t ret;
+	uint64_t guid, vdev_guid;
+	zpool_handle_t *zhp;
+	nvlist_t *pool_config;
+	uint64_t stateval, isspare;
+	aux_cbdata_t cb = { 0 };
+	boolean_t isactive;
+
+	*inuse = B_FALSE;
+
+	if (zpool_read_label(fd, &config) != 0) {
+		(void) no_memory(hdl);
+		return (-1);
+	}
+
+	if (config == NULL)
+		return (0);
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &stateval) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) == 0);
+
+	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+	}
+
+	switch (stateval) {
+	case POOL_STATE_EXPORTED:
+		ret = B_TRUE;
+		break;
+
+	case POOL_STATE_ACTIVE:
+		/*
+		 * For an active pool, we have to determine if it's really part
+		 * of a currently active pool (in which case the pool will exist
+		 * and the guid will be the same), or whether it's part of an
+		 * active pool that was disconnected without being explicitly
+		 * exported.
+		 */
+		if (pool_active(hdl, name, guid, &isactive) != 0) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if (isactive) {
+			/*
+			 * Because the device may have been removed while
+			 * offlined, we only report it as active if the vdev is
+			 * still present in the config.  Otherwise, pretend like
+			 * it's not in use.
+			 */
+			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
+			    (pool_config = zpool_get_config(zhp, NULL))
+			    != NULL) {
+				nvlist_t *nvroot;
+
+				verify(nvlist_lookup_nvlist(pool_config,
+				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+				ret = find_guid(nvroot, vdev_guid);
+			} else {
+				ret = B_FALSE;
+			}
+
+			/*
+			 * If this is an active spare within another pool, we
+			 * treat it like an unused hot spare.  This allows the
+			 * user to create a pool with a hot spare that currently
+			 * in use within another pool.  Since we return B_TRUE,
+			 * libdiskmgt will continue to prevent generic consumers
+			 * from using the device.
+			 */
+			if (ret && nvlist_lookup_uint64(config,
+			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
+				stateval = POOL_STATE_SPARE;
+
+			if (zhp != NULL)
+				zpool_close(zhp);
+		} else {
+			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
+			ret = B_TRUE;
+		}
+		break;
+
+	case POOL_STATE_SPARE:
+		/*
+		 * For a hot spare, it can be either definitively in use, or
+		 * potentially active.  To determine if it's in use, we iterate
+		 * over all pools in the system and search for one with a spare
+		 * with a matching guid.
+		 *
+		 * Due to the shared nature of spares, we don't actually report
+		 * the potentially active case as in use.  This means the user
+		 * can freely create pools on the hot spares of exported pools,
+		 * but to do otherwise makes the resulting code complicated, and
+		 * we end up having to deal with this case anyway.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		cb.cb_type = ZPOOL_CONFIG_SPARES;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	case POOL_STATE_L2CACHE:
+
+		/*
+		 * Check if any pool is currently using this l2cache device.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	default:
+		ret = B_FALSE;
+	}
+
+
+	if (ret) {
+		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
+			if (cb.cb_zhp)
+				zpool_close(cb.cb_zhp);
+			nvlist_free(config);
+			return (-1);
+		}
+		*state = (pool_state_t)stateval;
+	}
+
+	if (cb.cb_zhp)
+		zpool_close(cb.cb_zhp);
+
+	nvlist_free(config);
+	*inuse = ret;
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
new file mode 100644
index 000000000..97a48c3ee
--- /dev/null
+++ b/lib/libzfs/libzfs_mount.c
@@ -0,0 +1,1409 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Routines to manage ZFS mounts.  We separate all the nasty routines that have
+ * to deal with the OS.  The following functions are the main entry points --
+ * they are used by mount and unmount and when changing a filesystem's
+ * mountpoint.
+ *
+ * 	zfs_is_mounted()
+ * 	zfs_mount()
+ * 	zfs_unmount()
+ * 	zfs_unmountall()
+ *
+ * This file also contains the functions used to manage sharing filesystems via
+ * NFS and iSCSI:
+ *
+ * 	zfs_is_shared()
+ * 	zfs_share()
+ * 	zfs_unshare()
+ *
+ * 	zfs_is_shared_nfs()
+ * 	zfs_is_shared_smb()
+ * 	zfs_is_shared_iscsi()
+ * 	zfs_share_proto()
+ * 	zfs_shareall();
+ * 	zfs_share_iscsi()
+ * 	zfs_unshare_nfs()
+ * 	zfs_unshare_smb()
+ * 	zfs_unshareall_nfs()
+ *	zfs_unshareall_smb()
+ *	zfs_unshareall()
+ *	zfs_unshareall_bypath()
+ * 	zfs_unshare_iscsi()
+ *
+ * The following functions are available for pool consumers, and will
+ * mount/unmount and share/unshare all datasets within pool:
+ *
+ * 	zpool_enable_datasets()
+ * 	zpool_disable_datasets()
+ */
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+#include <libshare.h>
+#include <sys/systeminfo.h>
+#define	MAXISALEN	257	/* based on sysinfo(2) man page */
+
+static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
+zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
+    zfs_share_proto_t);
+
+static int (*iscsitgt_zfs_share)(const char *);
+static int (*iscsitgt_zfs_unshare)(const char *);
+static int (*iscsitgt_zfs_is_shared)(const char *);
+static int (*iscsitgt_svc_online)(void);
+
+/*
+ * The share protocols table must be in the same order as the zfs_share_prot_t
+ * enum in libzfs_impl.h
+ */
+typedef struct {
+	zfs_prop_t p_prop;
+	char *p_name;
+	int p_share_err;
+	int p_unshare_err;
+} proto_table_t;
+
+proto_table_t proto_table[PROTO_END] = {
+	{ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
+	{ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
+};
+
+zfs_share_proto_t nfs_only[] = {
+	PROTO_NFS,
+	PROTO_END
+};
+
+zfs_share_proto_t smb_only[] = {
+	PROTO_SMB,
+	PROTO_END
+};
+zfs_share_proto_t share_all_proto[] = {
+	PROTO_NFS,
+	PROTO_SMB,
+	PROTO_END
+};
+
+#ifdef __GNUC__
+static void
+zfs_iscsi_init(void) __attribute__((constructor));
+#else
+#pragma init(zfs_iscsi_init)
+#endif
+
+static void
+zfs_iscsi_init(void)
+{
+	void *libiscsitgt;
+
+	if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
+	    RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
+	    (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_share")) == NULL ||
+	    (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_unshare")) == NULL ||
+	    (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_is_shared")) == NULL ||
+	    (iscsitgt_svc_online = (int (*)(void))dlsym(libiscsitgt,
+	    "iscsitgt_svc_online")) == NULL) {
+		iscsitgt_zfs_share = NULL;
+		iscsitgt_zfs_unshare = NULL;
+		iscsitgt_zfs_is_shared = NULL;
+		iscsitgt_svc_online = NULL;
+	}
+}
+
+/*
+ * Search the sharetab for the given mountpoint and protocol, returning
+ * a zfs_share_type_t value.
+ */
+static zfs_share_type_t
+is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
+{
+	char buf[MAXPATHLEN], *tab;
+	char *ptr;
+
+	if (hdl->libzfs_sharetab == NULL)
+		return (SHARED_NOT_SHARED);
+
+	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
+
+	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
+
+		/* the mountpoint is the first entry on each line */
+		if ((tab = strchr(buf, '\t')) == NULL)
+			continue;
+
+		*tab = '\0';
+		if (strcmp(buf, mountpoint) == 0) {
+			/*
+			 * the protocol field is the third field
+			 * skip over second field
+			 */
+			ptr = ++tab;
+			if ((tab = strchr(ptr, '\t')) == NULL)
+				continue;
+			ptr = ++tab;
+			if ((tab = strchr(ptr, '\t')) == NULL)
+				continue;
+			*tab = '\0';
+			if (strcmp(ptr,
+			    proto_table[proto].p_name) == 0) {
+				switch (proto) {
+				case PROTO_NFS:
+					return (SHARED_NFS);
+				case PROTO_SMB:
+					return (SHARED_SMB);
+				default:
+					return (0);
+				}
+			}
+		}
+	}
+
+	return (SHARED_NOT_SHARED);
+}
+
+/*
+ * Returns true if the specified directory is empty.  If we can't open the
+ * directory at all, return true so that the mount can fail with a more
+ * informative error message.
+ */
+static boolean_t
+dir_is_empty(const char *dirname)
+{
+	DIR *dirp;
+	struct dirent64 *dp;
+
+	if ((dirp = opendir(dirname)) == NULL)
+		return (B_TRUE);
+
+	while ((dp = readdir64(dirp)) != NULL) {
+
+		if (strcmp(dp->d_name, ".") == 0 ||
+		    strcmp(dp->d_name, "..") == 0)
+			continue;
+
+		(void) closedir(dirp);
+		return (B_FALSE);
+	}
+
+	(void) closedir(dirp);
+	return (B_TRUE);
+}
+
+/*
+ * Checks to see if the mount is active.  If the filesystem is mounted, we fill
+ * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
+ * 0.
+ */
+boolean_t
+is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
+{
+	struct mnttab search = { 0 }, entry;
+
+	/*
+	 * Search for the entry in /etc/mnttab.  We don't bother getting the
+	 * mountpoint, as we can just search for the special device.  This will
+	 * also let us find mounts when the mountpoint is 'legacy'.
+	 */
+	search.mnt_special = (char *)special;
+	search.mnt_fstype = MNTTYPE_ZFS;
+
+	rewind(zfs_hdl->libzfs_mnttab);
+	if (getmntany(zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
+		return (B_FALSE);
+
+	if (where != NULL)
+		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
+
+	return (B_TRUE);
+}
+
+boolean_t
+zfs_is_mounted(zfs_handle_t *zhp, char **where)
+{
+	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
+}
+
+/*
+ * Returns true if the given dataset is mountable, false otherwise.  Returns the
+ * mountpoint in 'buf'.
+ */
+static boolean_t
+zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
+    zprop_source_t *source)
+{
+	char sourceloc[ZFS_MAXNAMELEN];
+	zprop_source_t sourcetype;
+
+	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
+		return (B_FALSE);
+
+	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
+	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
+
+	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
+	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
+		return (B_FALSE);
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
+		return (B_FALSE);
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
+	    getzoneid() == GLOBAL_ZONEID)
+		return (B_FALSE);
+
+	if (source)
+		*source = sourcetype;
+
+	return (B_TRUE);
+}
+
+/*
+ * Mount the given filesystem.
+ */
+int
+zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
+{
+	struct stat buf;
+	char mountpoint[ZFS_MAXPROPLEN];
+	char mntopts[MNT_LINE_MAX];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	if (options == NULL)
+		mntopts[0] = '\0';
+	else
+		(void) strlcpy(mntopts, options, sizeof (mntopts));
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
+		return (0);
+
+	/* Create the directory if it doesn't already exist */
+	if (lstat(mountpoint, &buf) != 0) {
+		if (mkdirp(mountpoint, 0755) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "failed to create mountpoint"));
+			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+			    mountpoint));
+		}
+	}
+
+	/*
+	 * Determine if the mountpoint is empty.  If so, refuse to perform the
+	 * mount.  We don't perform this check if MS_OVERLAY is specified, which
+	 * would defeat the point.  We also avoid this check if 'remount' is
+	 * specified.
+	 */
+	if ((flags & MS_OVERLAY) == 0 &&
+	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
+	    !dir_is_empty(mountpoint)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "directory is not empty"));
+		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
+	}
+
+	/* perform the mount */
+	if (mount(zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
+	    MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
+		/*
+		 * Generic errors are nasty, but there are just way too many
+		 * from mount(), and they're well-understood.  We pick a few
+		 * common ones to improve upon.
+		 */
+		if (errno == EBUSY) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "mountpoint or dataset is busy"));
+		} else if (errno == EPERM) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Insufficient privileges"));
+		} else {
+			zfs_error_aux(hdl, strerror(errno));
+		}
+
+		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+		    zhp->zfs_name));
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount a single filesystem.
+ */
+static int
+unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
+{
+	if (umount2(mountpoint, flags) != 0) {
+		zfs_error_aux(hdl, strerror(errno));
+		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+		    mountpoint));
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount the given filesystem.
+ */
+int
+zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
+{
+	struct mnttab search = { 0 }, entry;
+	char *mntpt = NULL;
+
+	/* check to see if need to unmount the filesystem */
+	search.mnt_special = zhp->zfs_name;
+	search.mnt_fstype = MNTTYPE_ZFS;
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+
+		/*
+		 * mountpoint may have come from a call to
+		 * getmnt/getmntany if it isn't NULL. If it is NULL,
+		 * we know it comes from getmntany which can then get
+		 * overwritten later. We strdup it to play it safe.
+		 */
+		if (mountpoint == NULL)
+			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
+		else
+			mntpt = zfs_strdup(zhp->zfs_hdl, mountpoint);
+
+		/*
+		 * Unshare and unmount the filesystem
+		 */
+		if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
+			return (-1);
+
+		if (unmount_one(zhp->zfs_hdl, mntpt, flags) != 0) {
+			free(mntpt);
+			(void) zfs_shareall(zhp);
+			return (-1);
+		}
+		free(mntpt);
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount this filesystem and any children inheriting the mountpoint property.
+ * To do this, just act like we're changing the mountpoint property, but don't
+ * remount the filesystems afterwards.
+ */
+int
+zfs_unmountall(zfs_handle_t *zhp, int flags)
+{
+	prop_changelist_t *clp;
+	int ret;
+
+	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags);
+	if (clp == NULL)
+		return (-1);
+
+	ret = changelist_prefix(clp);
+	changelist_free(clp);
+
+	return (ret);
+}
+
+boolean_t
+zfs_is_shared(zfs_handle_t *zhp)
+{
+	zfs_share_type_t rc = 0;
+	zfs_share_proto_t *curr_proto;
+
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_is_shared_iscsi(zhp));
+
+	for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
+	    curr_proto++)
+		rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
+
+	return (rc ? B_TRUE : B_FALSE);
+}
+
+int
+zfs_share(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_share_iscsi(zhp));
+
+	return (zfs_share_proto(zhp, share_all_proto));
+}
+
+int
+zfs_unshare(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_unshare_iscsi(zhp));
+
+	return (zfs_unshareall(zhp));
+}
+
+/*
+ * Check to see if the filesystem is currently shared.
+ */
+zfs_share_type_t
+zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
+{
+	char *mountpoint;
+	zfs_share_type_t rc;
+
+	if (!zfs_is_mounted(zhp, &mountpoint))
+		return (SHARED_NOT_SHARED);
+
+	if (rc = is_shared(zhp->zfs_hdl, mountpoint, proto)) {
+		if (where != NULL)
+			*where = mountpoint;
+		else
+			free(mountpoint);
+		return (rc);
+	} else {
+		free(mountpoint);
+		return (SHARED_NOT_SHARED);
+	}
+}
+
+boolean_t
+zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
+{
+	return (zfs_is_shared_proto(zhp, where,
+	    PROTO_NFS) != SHARED_NOT_SHARED);
+}
+
+boolean_t
+zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
+{
+	return (zfs_is_shared_proto(zhp, where,
+	    PROTO_SMB) != SHARED_NOT_SHARED);
+}
+
+/*
+ * Make sure things will work if libshare isn't installed by using
+ * wrapper functions that check to see that the pointers to functions
+ * initialized in _zfs_init_libshare() are actually present.
+ */
+
+static sa_handle_t (*_sa_init)(int);
+static void (*_sa_fini)(sa_handle_t);
+static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
+static int (*_sa_enable_share)(sa_share_t, char *);
+static int (*_sa_disable_share)(sa_share_t, char *);
+static char *(*_sa_errorstr)(int);
+static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
+static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
+static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
+static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
+    char *, char *, zprop_source_t, char *, char *, char *);
+static void (*_sa_update_sharetab_ts)(sa_handle_t);
+
+/*
+ * _zfs_init_libshare()
+ *
+ * Find the libshare.so.1 entry points that we use here and save the
+ * values to be used later. This is triggered by the runtime loader.
+ * Make sure the correct ISA version is loaded.
+ */
+#ifdef __GNUC__
+static void
+_zfs_init_libshare(void) __attribute__((constructor));
+#else
+#pragma init(_zfs_init_libshare)
+#endif
+static void
+_zfs_init_libshare(void)
+{
+	void *libshare;
+	char path[MAXPATHLEN];
+	char isa[MAXISALEN];
+
+#if defined(_LP64)
+	if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1)
+		isa[0] = '\0';
+#else
+	isa[0] = '\0';
+#endif
+	(void) snprintf(path, MAXPATHLEN,
+	    "/usr/lib/%s/libshare.so.1", isa);
+
+	if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
+		_sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
+		_sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
+		_sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
+		    dlsym(libshare, "sa_find_share");
+		_sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
+		    "sa_enable_share");
+		_sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
+		    "sa_disable_share");
+		_sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr");
+		_sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *))
+		    dlsym(libshare, "sa_parse_legacy_options");
+		_sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
+		    dlsym(libshare, "sa_needs_refresh");
+		_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
+		    dlsym(libshare, "sa_get_zfs_handle");
+		_sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
+		    sa_share_t, char *, char *, zprop_source_t, char *,
+		    char *, char *))dlsym(libshare, "sa_zfs_process_share");
+		_sa_update_sharetab_ts = (void (*)(sa_handle_t))
+		    dlsym(libshare, "sa_update_sharetab_ts");
+		if (_sa_init == NULL || _sa_fini == NULL ||
+		    _sa_find_share == NULL || _sa_enable_share == NULL ||
+		    _sa_disable_share == NULL || _sa_errorstr == NULL ||
+		    _sa_parse_legacy_options == NULL ||
+		    _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
+		    _sa_zfs_process_share == NULL ||
+		    _sa_update_sharetab_ts == NULL) {
+			_sa_init = NULL;
+			_sa_fini = NULL;
+			_sa_disable_share = NULL;
+			_sa_enable_share = NULL;
+			_sa_errorstr = NULL;
+			_sa_parse_legacy_options = NULL;
+			(void) dlclose(libshare);
+			_sa_needs_refresh = NULL;
+			_sa_get_zfs_handle = NULL;
+			_sa_zfs_process_share = NULL;
+			_sa_update_sharetab_ts = NULL;
+		}
+	}
+}
+
+/*
+ * zfs_init_libshare(zhandle, service)
+ *
+ * Initialize the libshare API if it hasn't already been initialized.
+ * In all cases it returns 0 if it succeeded and an error if not. The
+ * service value is which part(s) of the API to initialize and is a
+ * direct map to the libshare sa_init(service) interface.
+ */
+int
+zfs_init_libshare(libzfs_handle_t *zhandle, int service)
+{
+	int ret = SA_OK;
+
+	if (_sa_init == NULL)
+		ret = SA_CONFIG_ERR;
+
+	if (ret == SA_OK && zhandle->libzfs_shareflags & ZFSSHARE_MISS) {
+		/*
+		 * We had a cache miss. Most likely it is a new ZFS
+		 * dataset that was just created. We want to make sure
+		 * so check timestamps to see if a different process
+		 * has updated any of the configuration. If there was
+		 * some non-ZFS change, we need to re-initialize the
+		 * internal cache.
+		 */
+		zhandle->libzfs_shareflags &= ~ZFSSHARE_MISS;
+		if (_sa_needs_refresh != NULL &&
+		    _sa_needs_refresh(zhandle->libzfs_sharehdl)) {
+			zfs_uninit_libshare(zhandle);
+			zhandle->libzfs_sharehdl = _sa_init(service);
+		}
+	}
+
+	if (ret == SA_OK && zhandle && zhandle->libzfs_sharehdl == NULL)
+		zhandle->libzfs_sharehdl = _sa_init(service);
+
+	if (ret == SA_OK && zhandle->libzfs_sharehdl == NULL)
+		ret = SA_NO_MEMORY;
+
+	return (ret);
+}
+
+/*
+ * zfs_uninit_libshare(zhandle)
+ *
+ * Uninitialize the libshare API if it hasn't already been
+ * uninitialized. It is OK to call multiple times.
+ */
+void
+zfs_uninit_libshare(libzfs_handle_t *zhandle)
+{
+	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
+		if (_sa_fini != NULL)
+			_sa_fini(zhandle->libzfs_sharehdl);
+		zhandle->libzfs_sharehdl = NULL;
+	}
+}
+
+/*
+ * zfs_parse_options(options, proto)
+ *
+ * Call the legacy parse interface to get the protocol specific
+ * options using the NULL arg to indicate that this is a "parse" only.
+ */
+int
+zfs_parse_options(char *options, zfs_share_proto_t proto)
+{
+	if (_sa_parse_legacy_options != NULL) {
+		return (_sa_parse_legacy_options(NULL, options,
+		    proto_table[proto].p_name));
+	}
+	return (SA_CONFIG_ERR);
+}
+
+/*
+ * zfs_sa_find_share(handle, path)
+ *
+ * wrapper around sa_find_share to find a share path in the
+ * configuration.
+ */
+static sa_share_t
+zfs_sa_find_share(sa_handle_t handle, char *path)
+{
+	if (_sa_find_share != NULL)
+		return (_sa_find_share(handle, path));
+	return (NULL);
+}
+
+/*
+ * zfs_sa_enable_share(share, proto)
+ *
+ * Wrapper for sa_enable_share which enables a share for a specified
+ * protocol.
+ */
+static int
+zfs_sa_enable_share(sa_share_t share, char *proto)
+{
+	if (_sa_enable_share != NULL)
+		return (_sa_enable_share(share, proto));
+	return (SA_CONFIG_ERR);
+}
+
+/*
+ * zfs_sa_disable_share(share, proto)
+ *
+ * Wrapper for sa_enable_share which disables a share for a specified
+ * protocol.
+ */
+static int
+zfs_sa_disable_share(sa_share_t share, char *proto)
+{
+	if (_sa_disable_share != NULL)
+		return (_sa_disable_share(share, proto));
+	return (SA_CONFIG_ERR);
+}
+
+/*
+ * Share the given filesystem according to the options in the specified
+ * protocol specific properties (sharenfs, sharesmb).  We rely
+ * on "libshare" to the dirty work for us.
+ */
+static int
+zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	char shareopts[ZFS_MAXPROPLEN];
+	char sourcestr[ZFS_MAXPROPLEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	sa_share_t share;
+	zfs_share_proto_t *curr_proto;
+	zprop_source_t sourcetype;
+	int ret;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
+		return (0);
+
+	if ((ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+		(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
+		    zfs_get_name(zhp), _sa_errorstr != NULL ?
+		    _sa_errorstr(ret) : "");
+		return (-1);
+	}
+
+	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
+		/*
+		 * Return success if there are no share options.
+		 */
+		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
+		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
+		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
+		    strcmp(shareopts, "off") == 0)
+			continue;
+
+		/*
+		 * If the 'zoned' property is set, then zfs_is_mountable()
+		 * will have already bailed out if we are in the global zone.
+		 * But local zones cannot be NFS servers, so we ignore it for
+		 * local zones as well.
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
+			continue;
+
+		share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
+		if (share == NULL) {
+			/*
+			 * This may be a new file system that was just
+			 * created so isn't in the internal cache
+			 * (second time through). Rather than
+			 * reloading the entire configuration, we can
+			 * assume ZFS has done the checking and it is
+			 * safe to add this to the internal
+			 * configuration.
+			 */
+			if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
+			    NULL, NULL, mountpoint,
+			    proto_table[*curr_proto].p_name, sourcetype,
+			    shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
+				(void) zfs_error_fmt(hdl,
+				    proto_table[*curr_proto].p_share_err,
+				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+				    zfs_get_name(zhp));
+				return (-1);
+			}
+			hdl->libzfs_shareflags |= ZFSSHARE_MISS;
+			share = zfs_sa_find_share(hdl->libzfs_sharehdl,
+			    mountpoint);
+		}
+		if (share != NULL) {
+			int err;
+			err = zfs_sa_enable_share(share,
+			    proto_table[*curr_proto].p_name);
+			if (err != SA_OK) {
+				(void) zfs_error_fmt(hdl,
+				    proto_table[*curr_proto].p_share_err,
+				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+				    zfs_get_name(zhp));
+				return (-1);
+			}
+		} else {
+			(void) zfs_error_fmt(hdl,
+			    proto_table[*curr_proto].p_share_err,
+			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+			    zfs_get_name(zhp));
+			return (-1);
+		}
+
+	}
+	return (0);
+}
+
+
+int
+zfs_share_nfs(zfs_handle_t *zhp)
+{
+	return (zfs_share_proto(zhp, nfs_only));
+}
+
+int
+zfs_share_smb(zfs_handle_t *zhp)
+{
+	return (zfs_share_proto(zhp, smb_only));
+}
+
+int
+zfs_shareall(zfs_handle_t *zhp)
+{
+	return (zfs_share_proto(zhp, share_all_proto));
+}
+
+/*
+ * Unshare a filesystem by mountpoint.
+ */
+static int
+unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
+    zfs_share_proto_t proto)
+{
+	sa_share_t share;
+	int err;
+	char *mntpt;
+	/*
+	 * Mountpoint could get trashed if libshare calls getmntany
+	 * which id does during API initialization, so strdup the
+	 * value.
+	 */
+	mntpt = zfs_strdup(hdl, mountpoint);
+
+	/* make sure libshare initialized */
+	if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+		free(mntpt);	/* don't need the copy anymore */
+		return (zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+		    name, _sa_errorstr(err)));
+	}
+
+	share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
+	free(mntpt);	/* don't need the copy anymore */
+
+	if (share != NULL) {
+		err = zfs_sa_disable_share(share, proto_table[proto].p_name);
+		if (err != SA_OK) {
+			return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+			    name, _sa_errorstr(err)));
+		}
+	} else {
+		return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
+		    name));
+	}
+	return (0);
+}
+
+/*
+ * Unshare the given filesystem.
+ */
+int
+zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
+    zfs_share_proto_t *proto)
+{
+	struct mnttab search = { 0 }, entry;
+	char *mntpt = NULL;
+
+	/* check to see if need to unmount the filesystem */
+	search.mnt_special = (char *)zfs_get_name(zhp);
+	search.mnt_fstype = MNTTYPE_ZFS;
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (mountpoint != NULL)
+		mntpt = zfs_strdup(zhp->zfs_hdl, mountpoint);
+
+	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+		zfs_share_proto_t *curr_proto;
+
+		if (mountpoint == NULL)
+			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
+
+		for (curr_proto = proto; *curr_proto != PROTO_END;
+		    curr_proto++) {
+
+			if (is_shared(zhp->zfs_hdl, mntpt, *curr_proto) &&
+			    unshare_one(zhp->zfs_hdl, zhp->zfs_name,
+			    mntpt, *curr_proto) != 0) {
+				if (mntpt != NULL)
+					free(mntpt);
+				return (-1);
+			}
+		}
+	}
+	if (mntpt != NULL)
+		free(mntpt);
+
+	return (0);
+}
+
+int
+zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
+{
+	return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
+}
+
+int
+zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
+{
+	return (zfs_unshare_proto(zhp, mountpoint, smb_only));
+}
+
+/*
+ * Same as zfs_unmountall(), but for NFS and SMB unshares.
+ */
+int
+zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+{
+	prop_changelist_t *clp;
+	int ret;
+
+	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
+	if (clp == NULL)
+		return (-1);
+
+	ret = changelist_unshare(clp, proto);
+	changelist_free(clp);
+
+	return (ret);
+}
+
+int
+zfs_unshareall_nfs(zfs_handle_t *zhp)
+{
+	return (zfs_unshareall_proto(zhp, nfs_only));
+}
+
+int
+zfs_unshareall_smb(zfs_handle_t *zhp)
+{
+	return (zfs_unshareall_proto(zhp, smb_only));
+}
+
+int
+zfs_unshareall(zfs_handle_t *zhp)
+{
+	return (zfs_unshareall_proto(zhp, share_all_proto));
+}
+
+int
+zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
+{
+	return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
+}
+
+/*
+ * Remove the mountpoint associated with the current dataset, if necessary.
+ * We only remove the underlying directory if:
+ *
+ *	- The mountpoint is not 'none' or 'legacy'
+ *	- The mountpoint is non-empty
+ *	- The mountpoint is the default or inherited
+ *	- The 'zoned' property is set, or we're in a local zone
+ *
+ * Any other directories we leave alone.
+ */
+void
+remove_mountpoint(zfs_handle_t *zhp)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	zprop_source_t source;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
+	    &source))
+		return;
+
+	if (source == ZPROP_SRC_DEFAULT ||
+	    source == ZPROP_SRC_INHERITED) {
+		/*
+		 * Try to remove the directory, silently ignoring any errors.
+		 * The filesystem may have since been removed or moved around,
+		 * and this error isn't really useful to the administrator in
+		 * any way.
+		 */
+		(void) rmdir(mountpoint);
+	}
+}
+
+boolean_t
+zfs_is_shared_iscsi(zfs_handle_t *zhp)
+{
+
+	/*
+	 * If iscsi deamon isn't running then we aren't shared
+	 */
+	if (iscsitgt_svc_online && iscsitgt_svc_online() == 1)
+		return (B_FALSE);
+	else
+		return (iscsitgt_zfs_is_shared != NULL &&
+		    iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
+}
+
+int
+zfs_share_iscsi(zfs_handle_t *zhp)
+{
+	char shareopts[ZFS_MAXPROPLEN];
+	const char *dataset = zhp->zfs_name;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	/*
+	 * Return success if there are no share options.
+	 */
+	if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
+	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
+	    strcmp(shareopts, "off") == 0)
+		return (0);
+
+	if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) {
+		int error = EZFS_SHAREISCSIFAILED;
+
+		/*
+		 * If service isn't availabele and EPERM was
+		 * returned then use special error.
+		 */
+		if (iscsitgt_svc_online && errno == EPERM &&
+		    (iscsitgt_svc_online() != 0))
+			error = EZFS_ISCSISVCUNAVAIL;
+
+		return (zfs_error_fmt(hdl, error,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
+	}
+
+	return (0);
+}
+
+int
+zfs_unshare_iscsi(zfs_handle_t *zhp)
+{
+	const char *dataset = zfs_get_name(zhp);
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	/*
+	 * Return if the volume is not shared
+	 */
+	if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI)
+		return (0);
+
+	/*
+	 * If this fails with ENODEV it indicates that zvol wasn't shared so
+	 * we should return success in that case.
+	 */
+	if (iscsitgt_zfs_unshare == NULL ||
+	    (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) {
+		if (errno == EPERM)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Insufficient privileges to unshare iscsi"));
+		return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
+	}
+
+	return (0);
+}
+
+typedef struct mount_cbdata {
+	zfs_handle_t	**cb_datasets;
+	int 		cb_used;
+	int		cb_alloc;
+} mount_cbdata_t;
+
+static int
+mount_cb(zfs_handle_t *zhp, void *data)
+{
+	mount_cbdata_t *cbp = data;
+
+	if (!(zfs_get_type(zhp) & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME))) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (cbp->cb_alloc == cbp->cb_used) {
+		void *ptr;
+
+		if ((ptr = zfs_realloc(zhp->zfs_hdl,
+		    cbp->cb_datasets, cbp->cb_alloc * sizeof (void *),
+		    cbp->cb_alloc * 2 * sizeof (void *))) == NULL)
+			return (-1);
+		cbp->cb_datasets = ptr;
+
+		cbp->cb_alloc *= 2;
+	}
+
+	cbp->cb_datasets[cbp->cb_used++] = zhp;
+
+	return (zfs_iter_filesystems(zhp, mount_cb, cbp));
+}
+
+static int
+dataset_cmp(const void *a, const void *b)
+{
+	zfs_handle_t **za = (zfs_handle_t **)a;
+	zfs_handle_t **zb = (zfs_handle_t **)b;
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+	boolean_t gota, gotb;
+
+	if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
+		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
+		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (gota && gotb)
+		return (strcmp(mounta, mountb));
+
+	if (gota)
+		return (-1);
+	if (gotb)
+		return (1);
+
+	return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+}
+
+/*
+ * Mount and share all datasets within the given pool.  This assumes that no
+ * datasets within the pool are currently mounted.  Because users can create
+ * complicated nested hierarchies of mountpoints, we first gather all the
+ * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
+ * we have the list of all filesystems, we iterate over them in order and mount
+ * and/or share each one.
+ */
+#pragma weak zpool_mount_datasets = zpool_enable_datasets
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+	mount_cbdata_t cb = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	zfs_handle_t *zfsp;
+	int i, ret = -1;
+	int *good;
+
+	/*
+	 * Gather all non-snap datasets within the pool.
+	 */
+	if ((cb.cb_datasets = zfs_alloc(hdl, 4 * sizeof (void *))) == NULL)
+		return (-1);
+	cb.cb_alloc = 4;
+
+	if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
+		goto out;
+
+	cb.cb_datasets[0] = zfsp;
+	cb.cb_used = 1;
+
+	if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0)
+		goto out;
+
+	/*
+	 * Sort the datasets by mountpoint.
+	 */
+	qsort(cb.cb_datasets, cb.cb_used, sizeof (void *), dataset_cmp);
+
+	/*
+	 * And mount all the datasets, keeping track of which ones
+	 * succeeded or failed. By using zfs_alloc(), the good pointer
+	 * will always be non-NULL.
+	 */
+	good = zfs_alloc(zhp->zpool_hdl, cb.cb_used * sizeof (int));
+	ret = 0;
+	for (i = 0; i < cb.cb_used; i++) {
+		if (zfs_mount(cb.cb_datasets[i], mntopts, flags) != 0)
+			ret = -1;
+		else
+			good[i] = 1;
+	}
+
+	/*
+	 * Then share all the ones that need to be shared. This needs
+	 * to be a separate pass in order to avoid excessive reloading
+	 * of the configuration. Good should never be NULL since
+	 * zfs_alloc is supposed to exit if memory isn't available.
+	 */
+	for (i = 0; i < cb.cb_used; i++) {
+		if (good[i] && zfs_share(cb.cb_datasets[i]) != 0)
+			ret = -1;
+	}
+
+	free(good);
+
+out:
+	for (i = 0; i < cb.cb_used; i++)
+		zfs_close(cb.cb_datasets[i]);
+	free(cb.cb_datasets);
+
+	return (ret);
+}
+
+
+static int
+zvol_cb(const char *dataset, void *data)
+{
+	libzfs_handle_t *hdl = data;
+	zfs_handle_t *zhp;
+
+	/*
+	 * Ignore snapshots and ignore failures from non-existant datasets.
+	 */
+	if (strchr(dataset, '@') != NULL ||
+	    (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
+		return (0);
+
+	if (zfs_unshare_iscsi(zhp) != 0)
+		return (-1);
+
+	zfs_close(zhp);
+
+	return (0);
+}
+
+static int
+mountpoint_compare(const void *a, const void *b)
+{
+	const char *mounta = *((char **)a);
+	const char *mountb = *((char **)b);
+
+	return (strcmp(mountb, mounta));
+}
+
+/*
+ * Unshare and unmount all datasets within the given pool.  We don't want to
+ * rely on traversing the DSL to discover the filesystems within the pool,
+ * because this may be expensive (if not all of them are mounted), and can fail
+ * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
+ * gather all the filesystems that are currently mounted.
+ */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+	int used, alloc;
+	struct mnttab entry;
+	size_t namelen;
+	char **mountpoints = NULL;
+	zfs_handle_t **datasets = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	int i;
+	int ret = -1;
+	int flags = (force ? MS_FORCE : 0);
+
+	/*
+	 * First unshare all zvols.
+	 */
+	if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
+		return (-1);
+
+	namelen = strlen(zhp->zpool_name);
+
+	rewind(hdl->libzfs_mnttab);
+	used = alloc = 0;
+	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
+		/*
+		 * Ignore non-ZFS entries.
+		 */
+		if (entry.mnt_fstype == NULL ||
+		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
+			continue;
+
+		/*
+		 * Ignore filesystems not within this pool.
+		 */
+		if (entry.mnt_mountp == NULL ||
+		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
+		    (entry.mnt_special[namelen] != '/' &&
+		    entry.mnt_special[namelen] != '\0'))
+			continue;
+
+		/*
+		 * At this point we've found a filesystem within our pool.  Add
+		 * it to our growing list.
+		 */
+		if (used == alloc) {
+			if (alloc == 0) {
+				if ((mountpoints = zfs_alloc(hdl,
+				    8 * sizeof (void *))) == NULL)
+					goto out;
+
+				if ((datasets = zfs_alloc(hdl,
+				    8 * sizeof (void *))) == NULL)
+					goto out;
+
+				alloc = 8;
+			} else {
+				void *ptr;
+
+				if ((ptr = zfs_realloc(hdl, mountpoints,
+				    alloc * sizeof (void *),
+				    alloc * 2 * sizeof (void *))) == NULL)
+					goto out;
+				mountpoints = ptr;
+
+				if ((ptr = zfs_realloc(hdl, datasets,
+				    alloc * sizeof (void *),
+				    alloc * 2 * sizeof (void *))) == NULL)
+					goto out;
+				datasets = ptr;
+
+				alloc *= 2;
+			}
+		}
+
+		if ((mountpoints[used] = zfs_strdup(hdl,
+		    entry.mnt_mountp)) == NULL)
+			goto out;
+
+		/*
+		 * This is allowed to fail, in case there is some I/O error.  It
+		 * is only used to determine if we need to remove the underlying
+		 * mountpoint, so failure is not fatal.
+		 */
+		datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
+
+		used++;
+	}
+
+	/*
+	 * At this point, we have the entire list of filesystems, so sort it by
+	 * mountpoint.
+	 */
+	qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
+
+	/*
+	 * Walk through and first unshare everything.
+	 */
+	for (i = 0; i < used; i++) {
+		zfs_share_proto_t *curr_proto;
+		for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
+		    curr_proto++) {
+			if (is_shared(hdl, mountpoints[i], *curr_proto) &&
+			    unshare_one(hdl, mountpoints[i],
+			    mountpoints[i], *curr_proto) != 0)
+				goto out;
+		}
+	}
+
+	/*
+	 * Now unmount everything, removing the underlying directories as
+	 * appropriate.
+	 */
+	for (i = 0; i < used; i++) {
+		if (unmount_one(hdl, mountpoints[i], flags) != 0)
+			goto out;
+	}
+
+	for (i = 0; i < used; i++) {
+		if (datasets[i])
+			remove_mountpoint(datasets[i]);
+	}
+
+	ret = 0;
+out:
+	for (i = 0; i < used; i++) {
+		if (datasets[i])
+			zfs_close(datasets[i]);
+		free(mountpoints[i]);
+	}
+	free(datasets);
+	free(mountpoints);
+
+	return (ret);
+}
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
new file mode 100644
index 000000000..ae4b19adb
--- /dev/null
+++ b/lib/libzfs/libzfs_pool.c
@@ -0,0 +1,3062 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <alloca.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <devid.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/efi_partition.h>
+#include <sys/vtoc.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zio.h>
+#include <strings.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
+
+#if defined(__i386) || defined(__amd64)
+#define	BOOTCMD	"installgrub(1M)"
+#else
+#define	BOOTCMD	"installboot(1M)"
+#endif
+
+/*
+ * ====================================================================
+ *   zpool property functions
+ * ====================================================================
+ */
+
+static int
+zpool_get_all_props(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	return (0);
+}
+
+static int
+zpool_props_refresh(zpool_handle_t *zhp)
+{
+	nvlist_t *old_props;
+
+	old_props = zhp->zpool_props;
+
+	if (zpool_get_all_props(zhp) != 0)
+		return (-1);
+
+	nvlist_free(old_props);
+	return (0);
+}
+
+static char *
+zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
+    zprop_source_t *src)
+{
+	nvlist_t *nv, *nvl;
+	uint64_t ival;
+	char *value;
+	zprop_source_t source;
+
+	nvl = zhp->zpool_props;
+	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
+		source = ival;
+		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
+	} else {
+		source = ZPROP_SRC_DEFAULT;
+		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
+			value = "-";
+	}
+
+	if (src)
+		*src = source;
+
+	return (value);
+}
+
+uint64_t
+zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
+{
+	nvlist_t *nv, *nvl;
+	uint64_t value;
+	zprop_source_t source;
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
+		/*
+		 * zpool_get_all_props() has most likely failed because
+		 * the pool is faulted, but if all we need is the top level
+		 * vdev's guid then get it from the zhp config nvlist.
+		 */
+		if ((prop == ZPOOL_PROP_GUID) &&
+		    (nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
+		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
+		    == 0)) {
+			return (value);
+		}
+		return (zpool_prop_default_numeric(prop));
+	}
+
+	nvl = zhp->zpool_props;
+	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
+		source = value;
+		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
+	} else {
+		source = ZPROP_SRC_DEFAULT;
+		value = zpool_prop_default_numeric(prop);
+	}
+
+	if (src)
+		*src = source;
+
+	return (value);
+}
+
+/*
+ * Map VDEV STATE to printed strings.
+ */
+char *
+zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
+{
+	switch (state) {
+	case VDEV_STATE_CLOSED:
+	case VDEV_STATE_OFFLINE:
+		return (gettext("OFFLINE"));
+	case VDEV_STATE_REMOVED:
+		return (gettext("REMOVED"));
+	case VDEV_STATE_CANT_OPEN:
+		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
+			return (gettext("FAULTED"));
+		else
+			return (gettext("UNAVAIL"));
+	case VDEV_STATE_FAULTED:
+		return (gettext("FAULTED"));
+	case VDEV_STATE_DEGRADED:
+		return (gettext("DEGRADED"));
+	case VDEV_STATE_HEALTHY:
+		return (gettext("ONLINE"));
+	}
+
+	return (gettext("UNKNOWN"));
+}
+
+/*
+ * Get a zpool property value for 'prop' and return the value in
+ * a pre-allocated buffer.
+ */
+int
+zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
+    zprop_source_t *srctype)
+{
+	uint64_t intval;
+	const char *strval;
+	zprop_source_t src = ZPROP_SRC_NONE;
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		if (prop == ZPOOL_PROP_NAME)
+			(void) strlcpy(buf, zpool_get_name(zhp), len);
+		else if (prop == ZPOOL_PROP_HEALTH)
+			(void) strlcpy(buf, "FAULTED", len);
+		else
+			(void) strlcpy(buf, "-", len);
+		return (0);
+	}
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
+	    prop != ZPOOL_PROP_NAME)
+		return (-1);
+
+	switch (zpool_prop_get_type(prop)) {
+	case PROP_TYPE_STRING:
+		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
+		    len);
+		break;
+
+	case PROP_TYPE_NUMBER:
+		intval = zpool_get_prop_int(zhp, prop, &src);
+
+		switch (prop) {
+		case ZPOOL_PROP_SIZE:
+		case ZPOOL_PROP_USED:
+		case ZPOOL_PROP_AVAILABLE:
+			(void) zfs_nicenum(intval, buf, len);
+			break;
+
+		case ZPOOL_PROP_CAPACITY:
+			(void) snprintf(buf, len, "%llu%%",
+			    (u_longlong_t)intval);
+			break;
+
+		case ZPOOL_PROP_HEALTH:
+			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+			verify(nvlist_lookup_uint64_array(nvroot,
+			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+
+			(void) strlcpy(buf, zpool_state_to_name(intval,
+			    vs->vs_aux), len);
+			break;
+		default:
+			(void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
+		}
+		break;
+
+	case PROP_TYPE_INDEX:
+		intval = zpool_get_prop_int(zhp, prop, &src);
+		if (zpool_prop_index_to_string(prop, intval, &strval)
+		    != 0)
+			return (-1);
+		(void) strlcpy(buf, strval, len);
+		break;
+
+	default:
+		abort();
+	}
+
+	if (srctype)
+		*srctype = src;
+
+	return (0);
+}
+
+/*
+ * Check if the bootfs name has the same pool name as it is set to.
+ * Assuming bootfs is a valid dataset name.
+ */
+static boolean_t
+bootfs_name_valid(const char *pool, char *bootfs)
+{
+	int len = strlen(pool);
+
+	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
+		return (B_FALSE);
+
+	if (strncmp(pool, bootfs, len) == 0 &&
+	    (bootfs[len] == '/' || bootfs[len] == '\0'))
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * Inspect the configuration to determine if any of the devices contain
+ * an EFI label.
+ */
+static boolean_t
+pool_uses_efi(nvlist_t *config)
+{
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (read_efi_label(config, NULL) >= 0);
+
+	for (c = 0; c < children; c++) {
+		if (pool_uses_efi(child[c]))
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static boolean_t
+pool_is_bootable(zpool_handle_t *zhp)
+{
+	char bootfs[ZPOOL_MAXNAMELEN];
+
+	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
+	    sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
+	    sizeof (bootfs)) != 0);
+}
+
+
+/*
+ * Given an nvlist of zpool properties to be set, validate that they are
+ * correct, and parse any numeric properties (index, boolean, etc) if they are
+ * specified as strings.
+ */
+static nvlist_t *
+zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
+    nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
+{
+	nvpair_t *elem;
+	nvlist_t *retprops;
+	zpool_prop_t prop;
+	char *strval;
+	uint64_t intval;
+	char *slash;
+	struct stat64 statbuf;
+	zpool_handle_t *zhp;
+	nvlist_t *nvroot;
+
+	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+		const char *propname = nvpair_name(elem);
+
+		/*
+		 * Make sure this property is valid and applies to this type.
+		 */
+		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property '%s'"), propname);
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+			goto error;
+		}
+
+		if (zpool_prop_readonly(prop)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
+			    "is readonly"), propname);
+			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
+			goto error;
+		}
+
+		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
+		    &strval, &intval, errbuf) != 0)
+			goto error;
+
+		/*
+		 * Perform additional checking for specific properties.
+		 */
+		switch (prop) {
+		case ZPOOL_PROP_VERSION:
+			if (intval < version || intval > SPA_VERSION) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' number %d is invalid."),
+				    propname, intval);
+				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZPOOL_PROP_BOOTFS:
+			if (create_or_import) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' cannot be set at creation "
+				    "or import time"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (version < SPA_VERSION_BOOTFS) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool must be upgraded to support "
+				    "'%s' property"), propname);
+				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+				goto error;
+			}
+
+			/*
+			 * bootfs property value has to be a dataset name and
+			 * the dataset has to be in the same pool as it sets to.
+			 */
+			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
+			    strval)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
+				    "is an invalid name"), strval);
+				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+				goto error;
+			}
+
+			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "could not open pool '%s'"), poolname);
+				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
+				goto error;
+			}
+			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+
+			/*
+			 * bootfs property cannot be set on a disk which has
+			 * been EFI labeled.
+			 */
+			if (pool_uses_efi(nvroot)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' not supported on "
+				    "EFI labeled devices"), propname);
+				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
+				zpool_close(zhp);
+				goto error;
+			}
+			zpool_close(zhp);
+			break;
+
+		case ZPOOL_PROP_ALTROOT:
+			if (!create_or_import) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' can only be set during pool "
+				    "creation or import"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (strval[0] != '/') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "bad alternate root '%s'"), strval);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZPOOL_PROP_CACHEFILE:
+			if (strval[0] == '\0')
+				break;
+
+			if (strcmp(strval, "none") == 0)
+				break;
+
+			if (strval[0] != '/') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' must be empty, an "
+				    "absolute path, or 'none'"), propname);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+
+			slash = strrchr(strval, '/');
+
+			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
+			    strcmp(slash, "/..") == 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' is not a valid file"), strval);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+
+			*slash = '\0';
+
+			if (strval[0] != '\0' &&
+			    (stat64(strval, &statbuf) != 0 ||
+			    !S_ISDIR(statbuf.st_mode))) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' is not a valid directory"),
+				    strval);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+
+			*slash = '/';
+			break;
+		}
+	}
+
+	return (retprops);
+error:
+	nvlist_free(retprops);
+	return (NULL);
+}
+
+/*
+ * Set zpool property : propname=propval.
+ */
+int
+zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = -1;
+	char errbuf[1024];
+	nvlist_t *nvl = NULL;
+	nvlist_t *realprops;
+	uint64_t version;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
+	    zhp->zpool_name);
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
+		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+		return (no_memory(zhp->zpool_hdl));
+
+	if (nvlist_add_string(nvl, propname, propval) != 0) {
+		nvlist_free(nvl);
+		return (no_memory(zhp->zpool_hdl));
+	}
+
+	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
+	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
+	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
+		nvlist_free(nvl);
+		return (-1);
+	}
+
+	nvlist_free(nvl);
+	nvl = realprops;
+
+	/*
+	 * Execute the corresponding ioctl() to set this property.
+	 */
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
+		nvlist_free(nvl);
+		return (-1);
+	}
+
+	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
+
+	zcmd_free_nvlists(&zc);
+	nvlist_free(nvl);
+
+	if (ret)
+		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
+	else
+		(void) zpool_props_refresh(zhp);
+
+	return (ret);
+}
+
+int
+zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
+{
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	zprop_list_t *entry;
+	char buf[ZFS_MAXPROPLEN];
+
+	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
+		return (-1);
+
+	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
+
+		if (entry->pl_fixed)
+			continue;
+
+		if (entry->pl_prop != ZPROP_INVAL &&
+		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
+		    NULL) == 0) {
+			if (strlen(buf) > entry->pl_width)
+				entry->pl_width = strlen(buf);
+		}
+	}
+
+	return (0);
+}
+
+
+/*
+ * Validate the given pool name, optionally putting an extended error message in
+ * 'buf'.
+ */
+boolean_t
+zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
+{
+	namecheck_err_t why;
+	char what;
+	int ret;
+
+	ret = pool_namecheck(pool, &why, &what);
+
+	/*
+	 * The rules for reserved pool names were extended at a later point.
+	 * But we need to support users with existing pools that may now be
+	 * invalid.  So we only check for this expanded set of names during a
+	 * create (or import), and only in userland.
+	 */
+	if (ret == 0 && !isopen &&
+	    (strncmp(pool, "mirror", 6) == 0 ||
+	    strncmp(pool, "raidz", 5) == 0 ||
+	    strncmp(pool, "spare", 5) == 0 ||
+	    strcmp(pool, "log") == 0)) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "name is reserved"));
+		return (B_FALSE);
+	}
+
+
+	if (ret != 0) {
+		if (hdl != NULL) {
+			switch (why) {
+			case NAME_ERR_TOOLONG:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "name is too long"));
+				break;
+
+			case NAME_ERR_INVALCHAR:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "invalid character "
+				    "'%c' in pool name"), what);
+				break;
+
+			case NAME_ERR_NOLETTER:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name must begin with a letter"));
+				break;
+
+			case NAME_ERR_RESERVED:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
+				break;
+
+			case NAME_ERR_DISKLIKE:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool name is reserved"));
+				break;
+
+			case NAME_ERR_LEADING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
+				break;
+
+			case NAME_ERR_EMPTY_COMPONENT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
+				break;
+
+			case NAME_ERR_TRAILING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
+				break;
+
+			case NAME_ERR_MULTIPLE_AT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
+				break;
+
+			}
+		}
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+/*
+ * Open a handle to the given pool, even if the pool is currently in the FAULTED
+ * state.
+ */
+zpool_handle_t *
+zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
+{
+	zpool_handle_t *zhp;
+	boolean_t missing;
+
+	/*
+	 * Make sure the pool name is valid.
+	 */
+	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
+		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+		    pool);
+		return (NULL);
+	}
+
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (NULL);
+
+	zhp->zpool_hdl = hdl;
+	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	if (missing) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
+		(void) zfs_error_fmt(hdl, EZFS_NOENT,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Like the above, but silent on error.  Used when iterating over pools (because
+ * the configuration cache may be out of date).
+ */
+int
+zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
+{
+	zpool_handle_t *zhp;
+	boolean_t missing;
+
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (-1);
+
+	zhp->zpool_hdl = hdl;
+	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (-1);
+	}
+
+	if (missing) {
+		zpool_close(zhp);
+		*ret = NULL;
+		return (0);
+	}
+
+	*ret = zhp;
+	return (0);
+}
+
+/*
+ * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
+ * state.
+ */
+zpool_handle_t *
+zpool_open(libzfs_handle_t *hdl, const char *pool)
+{
+	zpool_handle_t *zhp;
+
+	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
+		return (NULL);
+
+	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
+		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Close the handle.  Simply frees the memory associated with the handle.
+ */
+void
+zpool_close(zpool_handle_t *zhp)
+{
+	if (zhp->zpool_config)
+		nvlist_free(zhp->zpool_config);
+	if (zhp->zpool_old_config)
+		nvlist_free(zhp->zpool_old_config);
+	if (zhp->zpool_props)
+		nvlist_free(zhp->zpool_props);
+	free(zhp);
+}
+
+/*
+ * Return the name of the pool.
+ */
+const char *
+zpool_get_name(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_name);
+}
+
+
+/*
+ * Return the state of the pool (ACTIVE or UNAVAILABLE)
+ */
+int
+zpool_get_state(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_state);
+}
+
+/*
+ * Create the named pool, using the provided vdev list.  It is assumed
+ * that the consumer has already validated the contents of the nvlist, so we
+ * don't have to worry about error semantics.
+ */
+int
+zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
+    nvlist_t *props, nvlist_t *fsprops)
+{
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *zc_fsprops = NULL;
+	nvlist_t *zc_props = NULL;
+	char msg[1024];
+	char *altroot;
+	int ret = -1;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), pool);
+
+	if (!zpool_name_valid(hdl, B_FALSE, pool))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
+		return (-1);
+
+	if (props) {
+		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
+		    SPA_VERSION_1, B_TRUE, msg)) == NULL) {
+			goto create_failed;
+		}
+	}
+
+	if (fsprops) {
+		uint64_t zoned;
+		char *zonestr;
+
+		zoned = ((nvlist_lookup_string(fsprops,
+		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
+		    strcmp(zonestr, "on") == 0);
+
+		if ((zc_fsprops = zfs_valid_proplist(hdl,
+		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
+			goto create_failed;
+		}
+		if (!zc_props &&
+		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
+			goto create_failed;
+		}
+		if (nvlist_add_nvlist(zc_props,
+		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
+			goto create_failed;
+		}
+	}
+
+	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
+		goto create_failed;
+
+	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
+
+	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
+
+		zcmd_free_nvlists(&zc);
+		nvlist_free(zc_props);
+		nvlist_free(zc_fsprops);
+
+		switch (errno) {
+		case EBUSY:
+			/*
+			 * This can happen if the user has specified the same
+			 * device multiple times.  We can't reliably detect this
+			 * until we try to add it and see we already have a
+			 * label.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case EOVERFLOW:
+			/*
+			 * This occurs when one of the devices is below
+			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
+			 * device was the problem device since there's no
+			 * reliable way to determine device size from userland.
+			 */
+			{
+				char buf[64];
+
+				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "one or more devices is less than the "
+				    "minimum size (%s)"), buf);
+			}
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case ENOSPC:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is out of space"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case ENOTBLK:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cache device must be a disk or disk slice"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		default:
+			return (zpool_standard_error(hdl, errno, msg));
+		}
+	}
+
+	/*
+	 * If this is an alternate root pool, then we automatically set the
+	 * mountpoint of the root dataset to be '/'.
+	 */
+	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
+	    &altroot) == 0) {
+		zfs_handle_t *zhp;
+
+		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
+		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
+		    "/") == 0);
+
+		zfs_close(zhp);
+	}
+
+create_failed:
+	zcmd_free_nvlists(&zc);
+	nvlist_free(zc_props);
+	nvlist_free(zc_fsprops);
+	return (ret);
+}
+
+/*
+ * Destroy the given pool.  It is up to the caller to ensure that there are no
+ * datasets left in the pool.
+ */
+int
+zpool_destroy(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *zfp = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+
+	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
+	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
+		return (-1);
+
+	if (zpool_remove_zvol_links(zhp) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s'"), zhp->zpool_name);
+
+		if (errno == EROFS) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is read only"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		} else {
+			(void) zpool_standard_error(hdl, errno, msg);
+		}
+
+		if (zfp)
+			zfs_close(zfp);
+		return (-1);
+	}
+
+	if (zfp) {
+		remove_mountpoint(zfp);
+		zfs_close(zfp);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given vdevs to the pool.  The caller must have already performed the
+ * necessary verification to ensure that the vdev specification is well-formed.
+ */
+int
+zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot add to '%s'"), zhp->zpool_name);
+
+	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
+	    SPA_VERSION_SPARES &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add hot spares"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
+	if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
+	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
+		uint64_t s;
+
+		for (s = 0; s < nspares; s++) {
+			char *path;
+
+			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
+			    &path) == 0 && pool_uses_efi(spares[s])) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "device '%s' contains an EFI label and "
+				    "cannot be used on root pools."),
+				    zpool_vdev_name(hdl, NULL, spares[s]));
+				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+			}
+		}
+	}
+
+	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
+	    SPA_VERSION_L2CACHE &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+	    &l2cache, &nl2cache) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add cache devices"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
+		return (-1);
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
+		switch (errno) {
+		case EBUSY:
+			/*
+			 * This can happen if the user has specified the same
+			 * device multiple times.  We can't reliably detect this
+			 * until we try to add it and see we already have a
+			 * label.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case EOVERFLOW:
+			/*
+			 * This occurrs when one of the devices is below
+			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
+			 * device was the problem device since there's no
+			 * reliable way to determine device size from userland.
+			 */
+			{
+				char buf[64];
+
+				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "device is less than the minimum "
+				    "size (%s)"), buf);
+			}
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to add these vdevs"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
+			break;
+
+		case EDOM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "root pool can not have multiple vdevs"
+			    " or separate logs"));
+			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
+			break;
+
+		case ENOTBLK:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cache device must be a disk or disk slice"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		default:
+			(void) zpool_standard_error(hdl, errno, msg);
+		}
+
+		ret = -1;
+	} else {
+		ret = 0;
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	return (ret);
+}
+
+/*
+ * Exports the pool from the system.  The caller must ensure that there are no
+ * mounted datasets in the pool.
+ */
+int
+zpool_export(zpool_handle_t *zhp, boolean_t force)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+
+	if (zpool_remove_zvol_links(zhp) != 0)
+		return (-1);
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot export '%s'"), zhp->zpool_name);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_cookie = force;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
+		switch (errno) {
+		case EXDEV:
+			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
+			    "use '-f' to override the following errors:\n"
+			    "'%s' has an active shared spare which could be"
+			    " used by other pools once '%s' is exported."),
+			    zhp->zpool_name, zhp->zpool_name);
+			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
+			    msg));
+		default:
+			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
+			    msg));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * zpool_import() is a contracted interface. Should be kept the same
+ * if possible.
+ *
+ * Applications should use zpool_import_props() to import a pool with
+ * new properties value to be set.
+ */
+int
+zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+    char *altroot)
+{
+	nvlist_t *props = NULL;
+	int ret;
+
+	if (altroot != NULL) {
+		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+			return (zfs_error_fmt(hdl, EZFS_NOMEM,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		}
+
+		if (nvlist_add_string(props,
+		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
+			nvlist_free(props);
+			return (zfs_error_fmt(hdl, EZFS_NOMEM,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		}
+	}
+
+	ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
+	if (props)
+		nvlist_free(props);
+	return (ret);
+}
+
+/*
+ * Import the given pool using the known configuration and a list of
+ * properties to be set. The configuration should have come from
+ * zpool_find_import(). The 'newname' parameters control whether the pool
+ * is imported with a different name.
+ */
+int
+zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+    nvlist_t *props, boolean_t importfaulted)
+{
+	zfs_cmd_t zc = { 0 };
+	char *thename;
+	char *origname;
+	int ret;
+	char errbuf[1024];
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &origname) == 0);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot import pool '%s'"), origname);
+
+	if (newname != NULL) {
+		if (!zpool_name_valid(hdl, B_FALSE, newname))
+			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		thename = (char *)newname;
+	} else {
+		thename = origname;
+	}
+
+	if (props) {
+		uint64_t version;
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+		    &version) == 0);
+
+		if ((props = zpool_valid_proplist(hdl, origname,
+		    props, version, B_TRUE, errbuf)) == NULL) {
+			return (-1);
+		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+	}
+
+	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &zc.zc_guid) == 0);
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
+		nvlist_free(props);
+		return (-1);
+	}
+
+	zc.zc_cookie = (uint64_t)importfaulted;
+	ret = 0;
+	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+		char desc[1024];
+		if (newname == NULL)
+			(void) snprintf(desc, sizeof (desc),
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    thename);
+		else
+			(void) snprintf(desc, sizeof (desc),
+			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
+			    origname, thename);
+
+		switch (errno) {
+		case ENOTSUP:
+			/*
+			 * Unsupported version.
+			 */
+			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
+			break;
+
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
+			break;
+
+		default:
+			(void) zpool_standard_error(hdl, errno, desc);
+		}
+
+		ret = -1;
+	} else {
+		zpool_handle_t *zhp;
+
+		/*
+		 * This should never fail, but play it safe anyway.
+		 */
+		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
+			ret = -1;
+		} else if (zhp != NULL) {
+			ret = zpool_create_zvol_links(zhp);
+			zpool_close(zhp);
+		}
+
+	}
+
+	zcmd_free_nvlists(&zc);
+	nvlist_free(props);
+
+	return (ret);
+}
+
+/*
+ * Scrub the pool.
+ */
+int
+zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_cookie = type;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
+		return (0);
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+
+	if (errno == EBUSY)
+		return (zfs_error(hdl, EZFS_RESILVERING, msg));
+	else
+		return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
+ * spare; but FALSE if its an INUSE spare.
+ */
+static nvlist_t *
+vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
+    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+{
+	uint_t c, children;
+	nvlist_t **child;
+	uint64_t theguid, present;
+	char *path;
+	uint64_t wholedisk = 0;
+	nvlist_t *ret;
+	uint64_t is_log;
+
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
+
+	if (search == NULL &&
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
+		/*
+		 * If the device has never been present since import, the only
+		 * reliable way to match the vdev is by GUID.
+		 */
+		if (theguid == guid)
+			return (nv);
+	} else if (search != NULL &&
+	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk);
+		if (wholedisk) {
+			/*
+			 * For whole disks, the internal path has 's0', but the
+			 * path passed in by the user doesn't.
+			 */
+			if (strlen(search) == strlen(path) - 2 &&
+			    strncmp(search, path, strlen(search)) == 0)
+				return (nv);
+		} else if (strcmp(search, path) == 0) {
+			return (nv);
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (NULL);
+
+	for (c = 0; c < children; c++) {
+		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+		    avail_spare, l2cache, NULL)) != NULL) {
+			/*
+			 * The 'is_log' value is only set for the toplevel
+			 * vdev, not the leaf vdevs.  So we always lookup the
+			 * log device from the root of the vdev tree (where
+			 * 'log' is non-NULL).
+			 */
+			if (log != NULL &&
+			    nvlist_lookup_uint64(child[c],
+			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
+			    is_log) {
+				*log = B_TRUE;
+			}
+			return (ret);
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    avail_spare, l2cache, NULL)) != NULL) {
+				*avail_spare = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    avail_spare, l2cache, NULL)) != NULL) {
+				*l2cache = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
+	return (NULL);
+}
+
+nvlist_t *
+zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
+    boolean_t *l2cache, boolean_t *log)
+{
+	char buf[MAXPATHLEN];
+	const char *search;
+	char *end;
+	nvlist_t *nvroot;
+	uint64_t guid;
+
+	guid = strtoull(path, &end, 10);
+	if (guid != 0 && *end == '\0') {
+		search = NULL;
+	} else if (path[0] != '/') {
+		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
+		search = buf;
+	} else {
+		search = path;
+	}
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	*avail_spare = B_FALSE;
+	*l2cache = B_FALSE;
+	if (log != NULL)
+		*log = B_FALSE;
+	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
+	    l2cache, log));
+}
+
+static int
+vdev_online(nvlist_t *nv)
+{
+	uint64_t ival;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
+		return (0);
+
+	return (1);
+}
+
+/*
+ * Get phys_path for a root pool
+ * Return 0 on success; non-zeron on failure.
+ */
+int
+zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
+{
+	nvlist_t *vdev_root;
+	nvlist_t **child;
+	uint_t count;
+	int i;
+
+	/*
+	 * Make sure this is a root pool, as phys_path doesn't mean
+	 * anything to a non-root pool.
+	 */
+	if (!pool_is_bootable(zhp))
+		return (-1);
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config,
+	    ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
+
+	if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
+	    &child, &count) != 0)
+		return (-2);
+
+	for (i = 0; i < count; i++) {
+		nvlist_t **child2;
+		uint_t count2;
+		char *type;
+		char *tmppath;
+		int j;
+
+		if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
+		    != 0)
+			return (-3);
+
+		if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+			if (!vdev_online(child[i]))
+				return (-8);
+			verify(nvlist_lookup_string(child[i],
+			    ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
+			(void) strncpy(physpath, tmppath, strlen(tmppath));
+		} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
+			if (nvlist_lookup_nvlist_array(child[i],
+			    ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
+				return (-4);
+
+			for (j = 0; j < count2; j++) {
+				if (!vdev_online(child2[j]))
+					return (-8);
+				if (nvlist_lookup_string(child2[j],
+				    ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
+					return (-5);
+
+				if ((strlen(physpath) + strlen(tmppath)) >
+				    MAXNAMELEN)
+					return (-6);
+
+				if (strlen(physpath) == 0) {
+					(void) strncpy(physpath, tmppath,
+					    strlen(tmppath));
+				} else {
+					(void) strcat(physpath, " ");
+					(void) strcat(physpath, tmppath);
+				}
+			}
+		} else {
+			return (-7);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Returns TRUE if the given guid corresponds to the given type.
+ * This is used to check for hot spares (INUSE or not), and level 2 cache
+ * devices.
+ */
+static boolean_t
+is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
+{
+	uint64_t target_guid;
+	nvlist_t *nvroot;
+	nvlist_t **list;
+	uint_t count;
+	int i;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
+			    &target_guid) == 0);
+			if (guid == target_guid)
+				return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Bring the specified vdev online.   The 'flags' parameter is a set of the
+ * ZFS_ONLINE_* flags.
+ */
+int
+zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
+    vdev_state_t *newstate)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (avail_spare ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	zc.zc_cookie = VDEV_STATE_ONLINE;
+	zc.zc_obj = flags;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
+		return (zpool_standard_error(hdl, errno, msg));
+
+	*newstate = zc.zc_cookie;
+	return (0);
+}
+
+/*
+ * Take the specified vdev offline
+ */
+int
+zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (avail_spare ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	zc.zc_cookie = VDEV_STATE_OFFLINE;
+	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+	case EBUSY:
+
+		/*
+		 * There are no other replicas of this device.
+		 */
+		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+
+	default:
+		return (zpool_standard_error(hdl, errno, msg));
+	}
+}
+
+/*
+ * Mark the given vdev faulted.
+ */
+int
+zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+           dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_guid = guid;
+	zc.zc_cookie = VDEV_STATE_FAULTED;
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+	case EBUSY:
+
+		/*
+		 * There are no other replicas of this device.
+		 */
+		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+
+	default:
+		return (zpool_standard_error(hdl, errno, msg));
+	}
+
+}
+
+/*
+ * Mark the given vdev degraded.
+ */
+int
+zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+           dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_guid = guid;
+	zc.zc_cookie = VDEV_STATE_DEGRADED;
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
+ * a hot spare.
+ */
+static boolean_t
+is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *type;
+
+	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
+		    &type) == 0);
+
+		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+		    children == 2 && child[which] == tgt)
+			return (B_TRUE);
+
+		for (c = 0; c < children; c++)
+			if (is_replacing_spare(child[c], tgt, which))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Attach new_disk (fully described by nvroot) to old_disk.
+ * If 'replacing' is specified, the new disk will replace the old one.
+ */
+int
+zpool_vdev_attach(zpool_handle_t *zhp,
+    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	int ret;
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache, islog;
+	uint64_t val;
+	char *path, *newname;
+	nvlist_t **child;
+	uint_t children;
+	nvlist_t *config_root;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	boolean_t rootpool = pool_is_bootable(zhp);
+
+	if (replacing)
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot replace %s with %s"), old_disk, new_disk);
+	else
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot attach %s to %s"), new_disk, old_disk);
+
+	/*
+	 * If this is a root pool, make sure that we're not attaching an
+	 * EFI labeled device.
+	 */
+	if (rootpool && pool_uses_efi(nvroot)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "EFI labeled devices are not supported on root pools."));
+		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
+	    &islog)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (avail_spare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	if (l2cache)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+	zc.zc_cookie = replacing;
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0 || children != 1) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+	}
+
+	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
+
+	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
+		return (-1);
+
+	/*
+	 * If the target is a hot spare that has been swapped in, we can only
+	 * replace it with another hot spare.
+	 */
+	if (replacing &&
+	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
+	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
+	    NULL) == NULL || !avail_spare) &&
+	    is_replacing_spare(config_root, tgt, 1)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can only be replaced by another hot spare"));
+		free(newname);
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	/*
+	 * If we are attempting to replace a spare, it canot be applied to an
+	 * already spared device.
+	 */
+	if (replacing &&
+	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+	    zpool_find_vdev(zhp, newname, &avail_spare,
+	    &l2cache, NULL) != NULL && avail_spare &&
+	    is_replacing_spare(config_root, tgt, 0)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device has already been replaced with a spare"));
+		free(newname);
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	free(newname);
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
+		return (-1);
+
+	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	if (ret == 0) {
+		if (rootpool) {
+			/*
+			 * XXX - This should be removed once we can
+			 * automatically install the bootblocks on the
+			 * newly attached disk.
+			 */
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please "
+			    "be sure to invoke %s to make '%s' bootable.\n"),
+			    BOOTCMD, new_disk);
+		}
+		return (0);
+	}
+
+	switch (errno) {
+	case ENOTSUP:
+		/*
+		 * Can't attach to or replace this type of vdev.
+		 */
+		if (replacing) {
+			if (islog)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "cannot replace a log with a spare"));
+			else
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "cannot replace a replacing device"));
+		} else {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "can only attach to mirrors and top-level "
+			    "disks"));
+		}
+		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
+		break;
+
+	case EINVAL:
+		/*
+		 * The new device must be a single disk.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+		    new_disk);
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case EOVERFLOW:
+		/*
+		 * The new device is too small.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device is too small"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case EDOM:
+		/*
+		 * The new device has a different alignment requirement.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "devices have different sector alignment"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case ENAMETOOLONG:
+		/*
+		 * The resulting top-level vdev spec won't fit in the label.
+		 */
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
+		break;
+
+	default:
+		(void) zpool_standard_error(hdl, errno, msg);
+	}
+
+	return (-1);
+}
+
+/*
+ * Detach the specified device.
+ */
+int
+zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (avail_spare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	if (l2cache)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+
+	case ENOTSUP:
+		/*
+		 * Can't detach from this type of vdev.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
+		    "applicable to mirror and replacing vdevs"));
+		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
+		break;
+
+	case EBUSY:
+		/*
+		 * There are no other replicas of this device.
+		 */
+		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
+		break;
+
+	default:
+		(void) zpool_standard_error(hdl, errno, msg);
+	}
+
+	return (-1);
+}
+
+/*
+ * Remove the given device.  Currently, this is supported only for hot spares
+ * and level 2 cache devices.
+ */
+int
+zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (!avail_spare && !l2cache) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "only inactive hot spares or cache devices "
+		    "can be removed"));
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+	}
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Clear the errors for the pool, or the particular device if specified.
+ */
+int
+zpool_clear(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	if (path)
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
+		    path);
+	else
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
+		    zhp->zpool_name);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if (path) {
+		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
+		    &l2cache, NULL)) == 0)
+			return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+		/*
+		 * Don't allow error clearing for hot spares.  Do allow
+		 * error clearing for l2cache devices.
+		 */
+		if (avail_spare)
+			return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+		    &zc.zc_guid) == 0);
+	}
+
+	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Similar to zpool_clear(), but takes a GUID (used by fmd).
+ */
+int
+zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
+           (u_longlong_t)guid);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_guid = guid;
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
+ * hierarchy.
+ */
+int
+zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
+    void *data)
+{
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char (*paths)[MAXPATHLEN];
+	size_t size = 4;
+	int curr, fd, base, ret = 0;
+	DIR *dirp;
+	struct dirent *dp;
+	struct stat st;
+
+	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
+		return (errno == ENOENT ? 0 : -1);
+
+	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
+		int err = errno;
+		(void) close(base);
+		return (err == ENOENT ? 0 : -1);
+	}
+
+	/*
+	 * Oddly this wasn't a directory -- ignore that failure since we
+	 * know there are no links lower in the (non-existant) hierarchy.
+	 */
+	if (!S_ISDIR(st.st_mode)) {
+		(void) close(base);
+		return (0);
+	}
+
+	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
+		(void) close(base);
+		return (-1);
+	}
+
+	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
+	curr = 0;
+
+	while (curr >= 0) {
+		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
+			goto err;
+
+		if (S_ISDIR(st.st_mode)) {
+			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
+				goto err;
+
+			if ((dirp = fdopendir(fd)) == NULL) {
+				(void) close(fd);
+				goto err;
+			}
+
+			while ((dp = readdir(dirp)) != NULL) {
+				if (dp->d_name[0] == '.')
+					continue;
+
+				if (curr + 1 == size) {
+					paths = zfs_realloc(hdl, paths,
+					    size * sizeof (paths[0]),
+					    size * 2 * sizeof (paths[0]));
+					if (paths == NULL) {
+						(void) closedir(dirp);
+						(void) close(fd);
+						goto err;
+					}
+
+					size *= 2;
+				}
+
+				(void) strlcpy(paths[curr + 1], paths[curr],
+				    sizeof (paths[curr + 1]));
+				(void) strlcat(paths[curr], "/",
+				    sizeof (paths[curr]));
+				(void) strlcat(paths[curr], dp->d_name,
+				    sizeof (paths[curr]));
+				curr++;
+			}
+
+			(void) closedir(dirp);
+
+		} else {
+			if ((ret = cb(paths[curr], data)) != 0)
+				break;
+		}
+
+		curr--;
+	}
+
+	free(paths);
+	(void) close(base);
+
+	return (ret);
+
+err:
+	free(paths);
+	(void) close(base);
+	return (-1);
+}
+
+typedef struct zvol_cb {
+	zpool_handle_t *zcb_pool;
+	boolean_t zcb_create;
+} zvol_cb_t;
+
+/*ARGSUSED*/
+static int
+do_zvol_create(zfs_handle_t *zhp, void *data)
+{
+	int ret = 0;
+
+	if (ZFS_IS_VOLUME(zhp)) {
+		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
+	}
+
+	if (ret == 0)
+		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Iterate over all zvols in the pool and make any necessary minor nodes.
+ */
+int
+zpool_create_zvol_links(zpool_handle_t *zhp)
+{
+	zfs_handle_t *zfp;
+	int ret;
+
+	/*
+	 * If the pool is unavailable, just return success.
+	 */
+	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+	    zhp->zpool_name)) == NULL)
+		return (0);
+
+	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
+
+	zfs_close(zfp);
+	return (ret);
+}
+
+static int
+do_zvol_remove(const char *dataset, void *data)
+{
+	zpool_handle_t *zhp = data;
+
+	return (zvol_remove_link(zhp->zpool_hdl, dataset));
+}
+
+/*
+ * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
+ * by examining the /dev links so that a corrupted pool doesn't impede this
+ * operation.
+ */
+int
+zpool_remove_zvol_links(zpool_handle_t *zhp)
+{
+	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
+}
+
+/*
+ * Convert from a devid string to a path.
+ */
+static char *
+devid_to_path(char *devid_str)
+{
+	ddi_devid_t devid;
+	char *minor;
+	char *path;
+	devid_nmlist_t *list = NULL;
+	int ret;
+
+	if (devid_str_decode(devid_str, &devid, &minor) != 0)
+		return (NULL);
+
+	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
+
+	devid_str_free(minor);
+	devid_free(devid);
+
+	if (ret != 0)
+		return (NULL);
+
+	if ((path = strdup(list[0].devname)) == NULL)
+		return (NULL);
+
+	devid_free_nmlist(list);
+
+	return (path);
+}
+
+/*
+ * Convert from a path to a devid string.
+ */
+static char *
+path_to_devid(const char *path)
+{
+	int fd;
+	ddi_devid_t devid;
+	char *minor, *ret;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (NULL);
+
+	minor = NULL;
+	ret = NULL;
+	if (devid_get(fd, &devid) == 0) {
+		if (devid_get_minor_name(fd, &minor) == 0)
+			ret = devid_str_encode(devid, minor);
+		if (minor != NULL)
+			devid_str_free(minor);
+		devid_free(devid);
+	}
+	(void) close(fd);
+
+	return (ret);
+}
+
+/*
+ * Issue the necessary ioctl() to update the stored path value for the vdev.  We
+ * ignore any failure here, since a common case is for an unprivileged user to
+ * type 'zpool status', and we'll display the correct information anyway.
+ */
+static void
+set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+	    &zc.zc_guid) == 0);
+
+	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
+}
+
+/*
+ * Given a vdev, return the name to display in iostat.  If the vdev has a path,
+ * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
+ * We also check if this is a whole disk, in which case we strip off the
+ * trailing 's0' slice name.
+ *
+ * This routine is also responsible for identifying when disks have been
+ * reconfigured in a new location.  The kernel will have opened the device by
+ * devid, but the path will still refer to the old location.  To catch this, we
+ * first do a path -> devid translation (which is fast for the common case).  If
+ * the devid matches, we're done.  If not, we do a reverse devid -> path
+ * translation and issue the appropriate ioctl() to update the path of the vdev.
+ * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
+ * of these checks.
+ */
+char *
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
+{
+	char *path, *devid;
+	uint64_t value;
+	char buf[64];
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+	    &value) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+		    &value) == 0);
+		(void) snprintf(buf, sizeof (buf), "%llu",
+		    (u_longlong_t)value);
+		path = buf;
+	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+
+		/*
+		 * If the device is dead (faulted, offline, etc) then don't
+		 * bother opening it.  Otherwise we may be forcing the user to
+		 * open a misbehaving device, which can have undesirable
+		 * effects.
+		 */
+		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &vsc) != 0 ||
+		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
+		    zhp != NULL &&
+		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
+			/*
+			 * Determine if the current path is correct.
+			 */
+			char *newdevid = path_to_devid(path);
+
+			if (newdevid == NULL ||
+			    strcmp(devid, newdevid) != 0) {
+				char *newpath;
+
+				if ((newpath = devid_to_path(devid)) != NULL) {
+					/*
+					 * Update the path appropriately.
+					 */
+					set_path(zhp, nv, newpath);
+					if (nvlist_add_string(nv,
+					    ZPOOL_CONFIG_PATH, newpath) == 0)
+						verify(nvlist_lookup_string(nv,
+						    ZPOOL_CONFIG_PATH,
+						    &path) == 0);
+					free(newpath);
+				}
+			}
+
+			if (newdevid)
+				devid_str_free(newdevid);
+		}
+
+		if (strncmp(path, "/dev/dsk/", 9) == 0)
+			path += 9;
+
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &value) == 0 && value) {
+			char *tmp = zfs_strdup(hdl, path);
+			if (tmp == NULL)
+				return (NULL);
+			tmp[strlen(path) - 2] = '\0';
+			return (tmp);
+		}
+	} else {
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
+
+		/*
+		 * If it's a raidz device, we need to stick in the parity level.
+		 */
+		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+			    &value) == 0);
+			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
+			    (u_longlong_t)value);
+			path = buf;
+		}
+	}
+
+	return (zfs_strdup(hdl, path));
+}
+
+static int
+zbookmark_compare(const void *a, const void *b)
+{
+	return (memcmp(a, b, sizeof (zbookmark_t)));
+}
+
+/*
+ * Retrieve the persistent error log, uniquify the members, and return to the
+ * caller.
+ */
+int
+zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
+{
+	zfs_cmd_t zc = { 0 };
+	uint64_t count;
+	zbookmark_t *zb = NULL;
+	int i;
+
+	/*
+	 * Retrieve the raw error list from the kernel.  If the number of errors
+	 * has increased, allocate more space and continue until we get the
+	 * entire list.
+	 */
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
+	    &count) == 0);
+	if (count == 0)
+		return (0);
+	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
+	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
+		return (-1);
+	zc.zc_nvlist_dst_size = count;
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+	for (;;) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
+		    &zc) != 0) {
+			free((void *)(uintptr_t)zc.zc_nvlist_dst);
+			if (errno == ENOMEM) {
+				count = zc.zc_nvlist_dst_size;
+				if ((zc.zc_nvlist_dst = (uintptr_t)
+				    zfs_alloc(zhp->zpool_hdl, count *
+				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
+					return (-1);
+			} else {
+				return (-1);
+			}
+		} else {
+			break;
+		}
+	}
+
+	/*
+	 * Sort the resulting bookmarks.  This is a little confusing due to the
+	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
+	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
+	 * _not_ copied as part of the process.  So we point the start of our
+	 * array appropriate and decrement the total number of elements.
+	 */
+	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
+	    zc.zc_nvlist_dst_size;
+	count -= zc.zc_nvlist_dst_size;
+
+	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
+
+	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
+
+	/*
+	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
+	 */
+	for (i = 0; i < count; i++) {
+		nvlist_t *nv;
+
+		/* ignoring zb_blkid and zb_level for now */
+		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
+		    zb[i-1].zb_object == zb[i].zb_object)
+			continue;
+
+		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
+			goto nomem;
+		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
+		    zb[i].zb_objset) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
+		    zb[i].zb_object) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		nvlist_free(nv);
+	}
+
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (0);
+
+nomem:
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (no_memory(zhp->zpool_hdl));
+}
+
+/*
+ * Upgrade a ZFS pool to the latest on-disk version.
+ */
+int
+zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+	zc.zc_cookie = new_version;
+
+	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
+		return (zpool_standard_error_fmt(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
+		    zhp->zpool_name));
+	return (0);
+}
+
+void
+zpool_set_history_str(const char *subcommand, int argc, char **argv,
+    char *history_str)
+{
+	int i;
+
+	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
+	for (i = 1; i < argc; i++) {
+		if (strlen(history_str) + 1 + strlen(argv[i]) >
+		    HIS_MAX_RECORD_LEN)
+			break;
+		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
+		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
+	}
+}
+
+/*
+ * Stage command history for logging.
+ */
+int
+zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
+{
+	if (history_str == NULL)
+		return (EINVAL);
+
+	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
+		return (EINVAL);
+
+	if (hdl->libzfs_log_str != NULL)
+		free(hdl->libzfs_log_str);
+
+	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
+		return (no_memory(hdl));
+
+	return (0);
+}
+
+/*
+ * Perform ioctl to get some command history of a pool.
+ *
+ * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
+ * logical offset of the history buffer to start reading from.
+ *
+ * Upon return, 'off' is the next logical offset to read from and
+ * 'len' is the actual amount of bytes read into 'buf'.
+ */
+static int
+get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	zc.zc_history = (uint64_t)(uintptr_t)buf;
+	zc.zc_history_len = *len;
+	zc.zc_history_offset = *off;
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
+		switch (errno) {
+		case EPERM:
+			return (zfs_error_fmt(hdl, EZFS_PERM,
+			    dgettext(TEXT_DOMAIN,
+			    "cannot show history for pool '%s'"),
+			    zhp->zpool_name));
+		case ENOENT:
+			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
+			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
+			    "'%s'"), zhp->zpool_name));
+		case ENOTSUP:
+			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
+			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
+			    "'%s', pool must be upgraded"), zhp->zpool_name));
+		default:
+			return (zpool_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "cannot get history for '%s'"), zhp->zpool_name));
+		}
+	}
+
+	*len = zc.zc_history_len;
+	*off = zc.zc_history_offset;
+
+	return (0);
+}
+
+/*
+ * Process the buffer of nvlists, unpacking and storing each nvlist record
+ * into 'records'.  'leftover' is set to the number of bytes that weren't
+ * processed as there wasn't a complete record.
+ */
+static int
+zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
+    nvlist_t ***records, uint_t *numrecords)
+{
+	uint64_t reclen;
+	nvlist_t *nv;
+	int i;
+
+	while (bytes_read > sizeof (reclen)) {
+
+		/* get length of packed record (stored as little endian) */
+		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
+			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
+
+		if (bytes_read < sizeof (reclen) + reclen)
+			break;
+
+		/* unpack record */
+		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
+			return (ENOMEM);
+		bytes_read -= sizeof (reclen) + reclen;
+		buf += sizeof (reclen) + reclen;
+
+		/* add record to nvlist array */
+		(*numrecords)++;
+		if (ISP2(*numrecords + 1)) {
+			*records = realloc(*records,
+			    *numrecords * 2 * sizeof (nvlist_t *));
+		}
+		(*records)[*numrecords - 1] = nv;
+	}
+
+	*leftover = bytes_read;
+	return (0);
+}
+
+#define	HIS_BUF_LEN	(128*1024)
+
+/*
+ * Retrieve the command history of a pool.
+ */
+int
+zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
+{
+	char buf[HIS_BUF_LEN];
+	uint64_t off = 0;
+	nvlist_t **records = NULL;
+	uint_t numrecords = 0;
+	int err, i;
+
+	do {
+		uint64_t bytes_read = sizeof (buf);
+		uint64_t leftover;
+
+		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
+			break;
+
+		/* if nothing else was read in, we're at EOF, just return */
+		if (!bytes_read)
+			break;
+
+		if ((err = zpool_history_unpack(buf, bytes_read,
+		    &leftover, &records, &numrecords)) != 0)
+			break;
+		off -= leftover;
+
+		/* CONSTCOND */
+	} while (1);
+
+	if (!err) {
+		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
+		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
+		    records, numrecords) == 0);
+	}
+	for (i = 0; i < numrecords; i++)
+		nvlist_free(records[i]);
+	free(records);
+
+	return (err);
+}
+
+void
+zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
+    char *pathname, size_t len)
+{
+	zfs_cmd_t zc = { 0 };
+	boolean_t mounted = B_FALSE;
+	char *mntpnt = NULL;
+	char dsname[MAXNAMELEN];
+
+	if (dsobj == 0) {
+		/* special case for the MOS */
+		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
+		return;
+	}
+
+	/* get the dataset's name */
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_obj = dsobj;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd,
+	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
+		/* just write out a path of two object numbers */
+		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
+		    (longlong_t)dsobj, (longlong_t)obj);
+		return;
+	}
+	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
+
+	/* find out if the dataset is mounted */
+	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
+
+	/* get the corrupted object's path */
+	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
+	zc.zc_obj = obj;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
+	    &zc) == 0) {
+		if (mounted) {
+			(void) snprintf(pathname, len, "%s%s", mntpnt,
+			    zc.zc_value);
+		} else {
+			(void) snprintf(pathname, len, "%s:%s",
+			    dsname, zc.zc_value);
+		}
+	} else {
+		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
+	}
+	free(mntpnt);
+}
+
+#define	RDISK_ROOT	"/dev/rdsk"
+#define	BACKUP_SLICE	"s2"
+/*
+ * Don't start the slice at the default block of 34; many storage
+ * devices will use a stripe width of 128k, so start there instead.
+ */
+#define	NEW_START_BLOCK	256
+
+/*
+ * Read the EFI label from the config, if a label does not exist then
+ * pass back the error to the caller. If the caller has passed a non-NULL
+ * diskaddr argument then we set it to the starting address of the EFI
+ * partition.
+ */
+static int
+read_efi_label(nvlist_t *config, diskaddr_t *sb)
+{
+	char *path;
+	int fd;
+	char diskname[MAXPATHLEN];
+	int err = -1;
+
+	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
+		return (err);
+
+	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
+	    strrchr(path, '/'));
+	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
+		struct dk_gpt *vtoc;
+
+		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
+			if (sb != NULL)
+				*sb = vtoc->efi_parts[0].p_start;
+			efi_free(vtoc);
+		}
+		(void) close(fd);
+	}
+	return (err);
+}
+
+/*
+ * determine where a partition starts on a disk in the current
+ * configuration
+ */
+static diskaddr_t
+find_start_block(nvlist_t *config)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	diskaddr_t sb = MAXOFFSET_T;
+	uint64_t wholedisk;
+
+	if (nvlist_lookup_nvlist_array(config,
+	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
+		if (nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk) != 0 || !wholedisk) {
+			return (MAXOFFSET_T);
+		}
+		if (read_efi_label(config, &sb) < 0)
+			sb = MAXOFFSET_T;
+		return (sb);
+	}
+
+	for (c = 0; c < children; c++) {
+		sb = find_start_block(child[c]);
+		if (sb != MAXOFFSET_T) {
+			return (sb);
+		}
+	}
+	return (MAXOFFSET_T);
+}
+
+/*
+ * Label an individual disk.  The name provided is the short name,
+ * stripped of any leading /dev path.
+ */
+int
+zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
+{
+	char path[MAXPATHLEN];
+	struct dk_gpt *vtoc;
+	int fd;
+	size_t resv = EFI_MIN_RESV_SIZE;
+	uint64_t slice_size;
+	diskaddr_t start_block;
+	char errbuf[1024];
+
+	/* prepare an error message just in case */
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
+
+	if (zhp) {
+		nvlist_t *nvroot;
+
+		if (pool_is_bootable(zhp)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "EFI labeled devices are not supported on root "
+			    "pools."));
+			return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
+		}
+
+		verify(nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+
+		if (zhp->zpool_start_block == 0)
+			start_block = find_start_block(nvroot);
+		else
+			start_block = zhp->zpool_start_block;
+		zhp->zpool_start_block = start_block;
+	} else {
+		/* new pool */
+		start_block = NEW_START_BLOCK;
+	}
+
+	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
+	    BACKUP_SLICE);
+
+	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+		/*
+		 * This shouldn't happen.  We've long since verified that this
+		 * is a valid device.
+		 */
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "unable to open device"));
+		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+	}
+
+	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
+		/*
+		 * The only way this can fail is if we run out of memory, or we
+		 * were unable to read the disk's capacity
+		 */
+		if (errno == ENOMEM)
+			(void) no_memory(hdl);
+
+		(void) close(fd);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "unable to read disk capacity"), name);
+
+		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+	}
+
+	slice_size = vtoc->efi_last_u_lba + 1;
+	slice_size -= EFI_MIN_RESV_SIZE;
+	if (start_block == MAXOFFSET_T)
+		start_block = NEW_START_BLOCK;
+	slice_size -= start_block;
+
+	vtoc->efi_parts[0].p_start = start_block;
+	vtoc->efi_parts[0].p_size = slice_size;
+
+	/*
+	 * Why we use V_USR: V_BACKUP confuses users, and is considered
+	 * disposable by some EFI utilities (since EFI doesn't have a backup
+	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
+	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
+	 * etc. were all pretty specific.  V_USR is as close to reality as we
+	 * can get, in the absence of V_OTHER.
+	 */
+	vtoc->efi_parts[0].p_tag = V_USR;
+	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
+
+	vtoc->efi_parts[8].p_start = slice_size + start_block;
+	vtoc->efi_parts[8].p_size = resv;
+	vtoc->efi_parts[8].p_tag = V_RESERVED;
+
+	if (efi_write(fd, vtoc) != 0) {
+		/*
+		 * Some block drivers (like pcata) may not support EFI
+		 * GPT labels.  Print out a helpful error message dir-
+		 * ecting the user to manually label the disk and give
+		 * a specific slice.
+		 */
+		(void) close(fd);
+		efi_free(vtoc);
+
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "try using fdisk(1M) and then provide a specific slice"));
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+
+	(void) close(fd);
+	efi_free(vtoc);
+	return (0);
+}
+
+static boolean_t
+supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
+{
+	char *type;
+	nvlist_t **child;
+	uint_t children, c;
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
+	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
+	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
+	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
+	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "vdev type '%s' is not supported"), type);
+		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
+		return (B_FALSE);
+	}
+	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
+				return (B_FALSE);
+		}
+	}
+	return (B_TRUE);
+}
+
+/*
+ * check if this zvol is allowable for use as a dump device; zero if
+ * it is, > 0 if it isn't, < 0 if it isn't a zvol
+ */
+int
+zvol_check_dump_config(char *arg)
+{
+	zpool_handle_t *zhp = NULL;
+	nvlist_t *config, *nvroot;
+	char *p, *volname;
+	nvlist_t **top;
+	uint_t toplevels;
+	libzfs_handle_t *hdl;
+	char errbuf[1024];
+	char poolname[ZPOOL_MAXNAMELEN];
+	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
+	int ret = 1;
+
+	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
+		return (-1);
+	}
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "dump is not supported on device '%s'"), arg);
+
+	if ((hdl = libzfs_init()) == NULL)
+		return (1);
+	libzfs_print_on_error(hdl, B_TRUE);
+
+	volname = arg + pathlen;
+
+	/* check the configuration of the pool */
+	if ((p = strchr(volname, '/')) == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "malformed dataset name"));
+		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+		return (1);
+	} else if (p - volname >= ZFS_MAXNAMELEN) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset name is too long"));
+		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
+		return (1);
+	} else {
+		(void) strncpy(poolname, volname, p - volname);
+		poolname[p - volname] = '\0';
+	}
+
+	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "could not open pool '%s'"), poolname);
+		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
+		goto out;
+	}
+	config = zpool_get_config(zhp, NULL);
+	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "could not obtain vdev configuration for  '%s'"), poolname);
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
+		goto out;
+	}
+
+	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &top, &toplevels) == 0);
+	if (toplevels != 1) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' has multiple top level vdevs"), poolname);
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
+		goto out;
+	}
+
+	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
+		goto out;
+	}
+	ret = 0;
+
+out:
+	if (zhp)
+		zpool_close(zhp);
+	libzfs_fini(hdl);
+	return (ret);
+}
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
new file mode 100644
index 000000000..a7acf3974
--- /dev/null
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -0,0 +1,2102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libdevinfo.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/avl.h>
+#include <stddef.h>
+
+#include <libzfs.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+#include <fletcher.c> /* XXX */
+
+static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
+    int, avl_tree_t *, char **);
+
+/*
+ * Routines for dealing with the AVL tree of fs-nvlists
+ */
+typedef struct fsavl_node {
+	avl_node_t fn_node;
+	nvlist_t *fn_nvfs;
+	char *fn_snapname;
+	uint64_t fn_guid;
+} fsavl_node_t;
+
+static int
+fsavl_compare(const void *arg1, const void *arg2)
+{
+	const fsavl_node_t *fn1 = arg1;
+	const fsavl_node_t *fn2 = arg2;
+
+	if (fn1->fn_guid > fn2->fn_guid)
+		return (+1);
+	else if (fn1->fn_guid < fn2->fn_guid)
+		return (-1);
+	else
+		return (0);
+}
+
+/*
+ * Given the GUID of a snapshot, find its containing filesystem and
+ * (optionally) name.
+ */
+static nvlist_t *
+fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
+{
+	fsavl_node_t fn_find;
+	fsavl_node_t *fn;
+
+	fn_find.fn_guid = snapguid;
+
+	fn = avl_find(avl, &fn_find, NULL);
+	if (fn) {
+		if (snapname)
+			*snapname = fn->fn_snapname;
+		return (fn->fn_nvfs);
+	}
+	return (NULL);
+}
+
+static void
+fsavl_destroy(avl_tree_t *avl)
+{
+	fsavl_node_t *fn;
+	void *cookie;
+
+	if (avl == NULL)
+		return;
+
+	cookie = NULL;
+	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
+		free(fn);
+	avl_destroy(avl);
+	free(avl);
+}
+
+static avl_tree_t *
+fsavl_create(nvlist_t *fss)
+{
+	avl_tree_t *fsavl;
+	nvpair_t *fselem = NULL;
+
+	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
+		return (NULL);
+
+	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
+	    offsetof(fsavl_node_t, fn_node));
+
+	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
+		nvlist_t *nvfs, *snaps;
+		nvpair_t *snapelem = NULL;
+
+		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
+		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+
+		while ((snapelem =
+		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
+			fsavl_node_t *fn;
+			uint64_t guid;
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
+			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
+				fsavl_destroy(fsavl);
+				return (NULL);
+			}
+			fn->fn_nvfs = nvfs;
+			fn->fn_snapname = nvpair_name(snapelem);
+			fn->fn_guid = guid;
+
+			/*
+			 * Note: if there are multiple snaps with the
+			 * same GUID, we ignore all but one.
+			 */
+			if (avl_find(fsavl, fn, NULL) == NULL)
+				avl_add(fsavl, fn);
+			else
+				free(fn);
+		}
+	}
+
+	return (fsavl);
+}
+
+/*
+ * Routines for dealing with the giant nvlist of fs-nvlists, etc.
+ */
+typedef struct send_data {
+	uint64_t parent_fromsnap_guid;
+	nvlist_t *parent_snaps;
+	nvlist_t *fss;
+	nvlist_t *snapprops;
+	const char *fromsnap;
+	const char *tosnap;
+
+	/*
+	 * The header nvlist is of the following format:
+	 * {
+	 *   "tosnap" -> string
+	 *   "fromsnap" -> string (if incremental)
+	 *   "fss" -> {
+	 *	id -> {
+	 *
+	 *	 "name" -> string (full name; for debugging)
+	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
+	 *
+	 *	 "props" -> { name -> value (only if set here) }
+	 *	 "snaps" -> { name (lastname) -> number (guid) }
+	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
+	 *
+	 *	 "origin" -> number (guid) (if clone)
+	 *	 "sent" -> boolean (not on-disk)
+	 *	}
+	 *   }
+	 * }
+	 *
+	 */
+} send_data_t;
+
+static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
+
+static int
+send_iterate_snap(zfs_handle_t *zhp, void *arg)
+{
+	send_data_t *sd = arg;
+	uint64_t guid = zhp->zfs_dmustats.dds_guid;
+	char *snapname;
+	nvlist_t *nv;
+
+	snapname = strrchr(zhp->zfs_name, '@')+1;
+
+	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
+	/*
+	 * NB: if there is no fromsnap here (it's a newly created fs in
+	 * an incremental replication), we will substitute the tosnap.
+	 */
+	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
+	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
+	    strcmp(snapname, sd->tosnap) == 0)) {
+		sd->parent_fromsnap_guid = guid;
+	}
+
+	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
+	send_iterate_prop(zhp, nv);
+	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
+	nvlist_free(nv);
+
+	zfs_close(zhp);
+	return (0);
+}
+
+static void
+send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
+{
+	nvpair_t *elem = NULL;
+
+	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
+		char *propname = nvpair_name(elem);
+		zfs_prop_t prop = zfs_name_to_prop(propname);
+		nvlist_t *propnv;
+
+		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
+			continue;
+
+		verify(nvpair_value_nvlist(elem, &propnv) == 0);
+		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) {
+			/* these guys are modifyable, but have no source */
+			uint64_t value;
+			verify(nvlist_lookup_uint64(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
+				continue;
+		} else {
+			char *source;
+			if (nvlist_lookup_string(propnv,
+			    ZPROP_SOURCE, &source) != 0)
+				continue;
+			if (strcmp(source, zhp->zfs_name) != 0)
+				continue;
+		}
+
+		if (zfs_prop_user(propname) ||
+		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
+			char *value;
+			verify(nvlist_lookup_string(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			VERIFY(0 == nvlist_add_string(nv, propname, value));
+		} else {
+			uint64_t value;
+			verify(nvlist_lookup_uint64(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
+		}
+	}
+}
+
+static int
+send_iterate_fs(zfs_handle_t *zhp, void *arg)
+{
+	send_data_t *sd = arg;
+	nvlist_t *nvfs, *nv;
+	int rv;
+	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
+	uint64_t guid = zhp->zfs_dmustats.dds_guid;
+	char guidstring[64];
+
+	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
+	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
+	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
+	    sd->parent_fromsnap_guid));
+
+	if (zhp->zfs_dmustats.dds_origin[0]) {
+		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
+		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+		if (origin == NULL)
+			return (-1);
+		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
+		    origin->zfs_dmustats.dds_guid));
+	}
+
+	/* iterate over props */
+	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
+	send_iterate_prop(zhp, nv);
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
+	nvlist_free(nv);
+
+	/* iterate over snaps, and set sd->parent_fromsnap_guid */
+	sd->parent_fromsnap_guid = 0;
+	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
+	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
+	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
+	nvlist_free(sd->parent_snaps);
+	nvlist_free(sd->snapprops);
+
+	/* add this fs to nvlist */
+	(void) snprintf(guidstring, sizeof (guidstring),
+	    "0x%llx", (longlong_t)guid);
+	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
+	nvlist_free(nvfs);
+
+	/* iterate over children */
+	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
+
+	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
+
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
+    const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
+{
+	zfs_handle_t *zhp;
+	send_data_t sd = { 0 };
+	int error;
+
+	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+	if (zhp == NULL)
+		return (EZFS_BADTYPE);
+
+	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
+	sd.fromsnap = fromsnap;
+	sd.tosnap = tosnap;
+
+	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
+		nvlist_free(sd.fss);
+		if (avlp != NULL)
+			*avlp = NULL;
+		*nvlp = NULL;
+		return (error);
+	}
+
+	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
+		nvlist_free(sd.fss);
+		*nvlp = NULL;
+		return (EZFS_NOMEM);
+	}
+
+	*nvlp = sd.fss;
+	return (0);
+}
+
+/*
+ * Routines for dealing with the sorted snapshot functionality
+ */
+typedef struct zfs_node {
+	zfs_handle_t	*zn_handle;
+	avl_node_t	zn_avlnode;
+} zfs_node_t;
+
+static int
+zfs_sort_snaps(zfs_handle_t *zhp, void *data)
+{
+	avl_tree_t *avl = data;
+	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
+
+	node->zn_handle = zhp;
+	avl_add(avl, node);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+zfs_snapshot_compare(const void *larg, const void *rarg)
+{
+	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
+	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
+	uint64_t lcreate, rcreate;
+
+	/*
+	 * Sort them according to creation time.  We use the hidden
+	 * CREATETXG property to get an absolute ordering of snapshots.
+	 */
+	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
+	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
+
+	if (lcreate < rcreate)
+		return (-1);
+	else if (lcreate > rcreate)
+		return (+1);
+	else
+		return (0);
+}
+
+static int
+zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
+{
+	int ret = 0;
+	zfs_node_t *node;
+	avl_tree_t avl;
+	void *cookie = NULL;
+
+	avl_create(&avl, zfs_snapshot_compare,
+	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
+
+	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
+
+	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
+		ret |= callback(node->zn_handle, data);
+
+	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
+		free(node);
+
+	avl_destroy(&avl);
+
+	return (ret);
+}
+
+/*
+ * Routines specific to "zfs send"
+ */
+typedef struct send_dump_data {
+	/* these are all just the short snapname (the part after the @) */
+	const char *fromsnap;
+	const char *tosnap;
+	char lastsnap[ZFS_MAXNAMELEN];
+	boolean_t seenfrom, seento, replicate, doall, fromorigin;
+	boolean_t verbose;
+	int outfd;
+	boolean_t err;
+	nvlist_t *fss;
+	avl_tree_t *fsavl;
+} send_dump_data_t;
+
+/*
+ * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
+ * NULL) to the file descriptor specified by outfd.
+ */
+static int
+dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
+    int outfd)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	if (fromsnap)
+		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
+	zc.zc_cookie = outfd;
+	zc.zc_obj = fromorigin;
+
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
+		char errbuf[1024];
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "warning: cannot send '%s'"), zhp->zfs_name);
+
+		switch (errno) {
+
+		case EXDEV:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not an earlier snapshot from the same fs"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+
+		case ENOENT:
+			if (zfs_dataset_exists(hdl, zc.zc_name,
+			    ZFS_TYPE_SNAPSHOT)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "incremental source (@%s) does not exist"),
+				    zc.zc_value);
+			}
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		case EDQUOT:
+		case EFBIG:
+		case EIO:
+		case ENOLINK:
+		case ENOSPC:
+		case ENOSTR:
+		case ENXIO:
+		case EPIPE:
+		case ERANGE:
+		case EFAULT:
+		case EROFS:
+			zfs_error_aux(hdl, strerror(errno));
+			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+static int
+dump_snapshot(zfs_handle_t *zhp, void *arg)
+{
+	send_dump_data_t *sdd = arg;
+	const char *thissnap;
+	int err;
+
+	thissnap = strchr(zhp->zfs_name, '@') + 1;
+
+	if (sdd->fromsnap && !sdd->seenfrom &&
+	    strcmp(sdd->fromsnap, thissnap) == 0) {
+		sdd->seenfrom = B_TRUE;
+		(void) strcpy(sdd->lastsnap, thissnap);
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (sdd->seento || !sdd->seenfrom) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* send it */
+	if (sdd->verbose) {
+		(void) fprintf(stderr, "sending from @%s to %s\n",
+		    sdd->lastsnap, zhp->zfs_name);
+	}
+
+	err = dump_ioctl(zhp, sdd->lastsnap,
+	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
+	    sdd->outfd);
+
+	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
+		sdd->seento = B_TRUE;
+
+	(void) strcpy(sdd->lastsnap, thissnap);
+	zfs_close(zhp);
+	return (err);
+}
+
+static int
+dump_filesystem(zfs_handle_t *zhp, void *arg)
+{
+	int rv = 0;
+	send_dump_data_t *sdd = arg;
+	boolean_t missingfrom = B_FALSE;
+	zfs_cmd_t zc = { 0 };
+
+	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
+	    zhp->zfs_name, sdd->tosnap);
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+		(void) fprintf(stderr, "WARNING: "
+		    "could not send %s@%s: does not exist\n",
+		    zhp->zfs_name, sdd->tosnap);
+		sdd->err = B_TRUE;
+		return (0);
+	}
+
+	if (sdd->replicate && sdd->fromsnap) {
+		/*
+		 * If this fs does not have fromsnap, and we're doing
+		 * recursive, we need to send a full stream from the
+		 * beginning (or an incremental from the origin if this
+		 * is a clone).  If we're doing non-recursive, then let
+		 * them get the error.
+		 */
+		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
+		    zhp->zfs_name, sdd->fromsnap);
+		if (ioctl(zhp->zfs_hdl->libzfs_fd,
+		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+			missingfrom = B_TRUE;
+		}
+	}
+
+	if (sdd->doall) {
+		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
+		if (sdd->fromsnap == NULL || missingfrom)
+			sdd->seenfrom = B_TRUE;
+
+		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
+		if (!sdd->seenfrom) {
+			(void) fprintf(stderr,
+			    "WARNING: could not send %s@%s:\n"
+			    "incremental source (%s@%s) does not exist\n",
+			    zhp->zfs_name, sdd->tosnap,
+			    zhp->zfs_name, sdd->fromsnap);
+			sdd->err = B_TRUE;
+		} else if (!sdd->seento) {
+			(void) fprintf(stderr,
+			    "WARNING: could not send %s@%s:\n"
+			    "incremental source (%s@%s) "
+			    "is not earlier than it\n",
+			    zhp->zfs_name, sdd->tosnap,
+			    zhp->zfs_name, sdd->fromsnap);
+			sdd->err = B_TRUE;
+		}
+	} else {
+		zfs_handle_t *snapzhp;
+		char snapname[ZFS_MAXNAMELEN];
+
+		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
+		    zfs_get_name(zhp), sdd->tosnap);
+		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
+		if (snapzhp == NULL) {
+			rv = -1;
+		} else {
+			rv = dump_ioctl(snapzhp,
+			    missingfrom ? NULL : sdd->fromsnap,
+			    sdd->fromorigin || missingfrom,
+			    sdd->outfd);
+			sdd->seento = B_TRUE;
+			zfs_close(snapzhp);
+		}
+	}
+
+	return (rv);
+}
+
+static int
+dump_filesystems(zfs_handle_t *rzhp, void *arg)
+{
+	send_dump_data_t *sdd = arg;
+	nvpair_t *fspair;
+	boolean_t needagain, progress;
+
+	if (!sdd->replicate)
+		return (dump_filesystem(rzhp, sdd));
+
+again:
+	needagain = progress = B_FALSE;
+	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
+	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
+		nvlist_t *fslist;
+		char *fsname;
+		zfs_handle_t *zhp;
+		int err;
+		uint64_t origin_guid = 0;
+		nvlist_t *origin_nv;
+
+		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
+		if (nvlist_lookup_boolean(fslist, "sent") == 0)
+			continue;
+
+		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
+		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
+
+		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
+		if (origin_nv &&
+		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
+			/*
+			 * origin has not been sent yet;
+			 * skip this clone.
+			 */
+			needagain = B_TRUE;
+			continue;
+		}
+
+		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
+		if (zhp == NULL)
+			return (-1);
+		err = dump_filesystem(zhp, sdd);
+		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
+		progress = B_TRUE;
+		zfs_close(zhp);
+		if (err)
+			return (err);
+	}
+	if (needagain) {
+		assert(progress);
+		goto again;
+	}
+	return (0);
+}
+
+/*
+ * Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL.
+ * If 'doall', dump all intermediate snaps.
+ * If 'replicate', dump special header and do recursively.
+ */
+int
+zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    boolean_t replicate, boolean_t doall, boolean_t fromorigin,
+    boolean_t verbose, int outfd)
+{
+	char errbuf[1024];
+	send_dump_data_t sdd = { 0 };
+	int err;
+	nvlist_t *fss = NULL;
+	avl_tree_t *fsavl = NULL;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot send '%s'"), zhp->zfs_name);
+
+	if (fromsnap && fromsnap[0] == '\0') {
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "zero-length incremental source"));
+		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
+	}
+
+	if (replicate || doall) {
+		dmu_replay_record_t drr = { 0 };
+		char *packbuf = NULL;
+		size_t buflen = 0;
+		zio_cksum_t zc = { 0 };
+
+		assert(fromsnap || doall);
+
+		if (replicate) {
+			nvlist_t *hdrnv;
+
+			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
+			if (fromsnap) {
+				VERIFY(0 == nvlist_add_string(hdrnv,
+				    "fromsnap", fromsnap));
+			}
+			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
+
+			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
+			    fromsnap, tosnap, &fss, &fsavl);
+			if (err)
+				return (err);
+			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
+			err = nvlist_pack(hdrnv, &packbuf, &buflen,
+			    NV_ENCODE_XDR, 0);
+			nvlist_free(hdrnv);
+			if (err) {
+				fsavl_destroy(fsavl);
+				nvlist_free(fss);
+				return (zfs_standard_error(zhp->zfs_hdl,
+				    err, errbuf));
+			}
+		}
+
+		/* write first begin record */
+		drr.drr_type = DRR_BEGIN;
+		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
+		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
+		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
+		    sizeof (drr.drr_u.drr_begin.drr_toname),
+		    "%s@%s", zhp->zfs_name, tosnap);
+		drr.drr_payloadlen = buflen;
+		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
+		err = write(outfd, &drr, sizeof (drr));
+
+		/* write header nvlist */
+		if (err != -1) {
+			fletcher_4_incremental_native(packbuf, buflen, &zc);
+			err = write(outfd, packbuf, buflen);
+		}
+		free(packbuf);
+		if (err == -1) {
+			fsavl_destroy(fsavl);
+			nvlist_free(fss);
+			return (zfs_standard_error(zhp->zfs_hdl,
+			    errno, errbuf));
+		}
+
+		/* write end record */
+		if (err != -1) {
+			bzero(&drr, sizeof (drr));
+			drr.drr_type = DRR_END;
+			drr.drr_u.drr_end.drr_checksum = zc;
+			err = write(outfd, &drr, sizeof (drr));
+			if (err == -1) {
+				fsavl_destroy(fsavl);
+				nvlist_free(fss);
+				return (zfs_standard_error(zhp->zfs_hdl,
+				    errno, errbuf));
+			}
+		}
+	}
+
+	/* dump each stream */
+	sdd.fromsnap = fromsnap;
+	sdd.tosnap = tosnap;
+	sdd.outfd = outfd;
+	sdd.replicate = replicate;
+	sdd.doall = doall;
+	sdd.fromorigin = fromorigin;
+	sdd.fss = fss;
+	sdd.fsavl = fsavl;
+	sdd.verbose = verbose;
+	err = dump_filesystems(zhp, &sdd);
+	fsavl_destroy(fsavl);
+	nvlist_free(fss);
+
+	if (replicate || doall) {
+		/*
+		 * write final end record.  NB: want to do this even if
+		 * there was some error, because it might not be totally
+		 * failed.
+		 */
+		dmu_replay_record_t drr = { 0 };
+		drr.drr_type = DRR_END;
+		if (write(outfd, &drr, sizeof (drr)) == -1) {
+			return (zfs_standard_error(zhp->zfs_hdl,
+			    errno, errbuf));
+		}
+	}
+
+	return (err || sdd.err);
+}
+
+/*
+ * Routines specific to "zfs recv"
+ */
+
+static int
+recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
+    boolean_t byteswap, zio_cksum_t *zc)
+{
+	char *cp = buf;
+	int rv;
+	int len = ilen;
+
+	do {
+		rv = read(fd, cp, len);
+		cp += rv;
+		len -= rv;
+	} while (rv > 0);
+
+	if (rv < 0 || len != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "failed to read from stream"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
+		    "cannot receive")));
+	}
+
+	if (zc) {
+		if (byteswap)
+			fletcher_4_incremental_byteswap(buf, ilen, zc);
+		else
+			fletcher_4_incremental_native(buf, ilen, zc);
+	}
+	return (0);
+}
+
+static int
+recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
+    boolean_t byteswap, zio_cksum_t *zc)
+{
+	char *buf;
+	int err;
+
+	buf = zfs_alloc(hdl, len);
+	if (buf == NULL)
+		return (ENOMEM);
+
+	err = recv_read(hdl, fd, buf, len, byteswap, zc);
+	if (err != 0) {
+		free(buf);
+		return (err);
+	}
+
+	err = nvlist_unpack(buf, len, nvp, 0);
+	free(buf);
+	if (err != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (malformed nvlist)"));
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
+    int baselen, char *newname, recvflags_t flags)
+{
+	static int seq;
+	zfs_cmd_t zc = { 0 };
+	int err;
+	prop_changelist_t *clp;
+	zfs_handle_t *zhp;
+
+	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
+	if (zhp == NULL)
+		return (-1);
+	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+	    flags.force ? MS_FORCE : 0);
+	zfs_close(zhp);
+	if (clp == NULL)
+		return (-1);
+	err = changelist_prefix(clp);
+	if (err)
+		return (err);
+
+	if (tryname) {
+		(void) strcpy(newname, tryname);
+
+		zc.zc_objset_type = DMU_OST_ZFS;
+		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
+
+		if (flags.verbose) {
+			(void) printf("attempting rename %s to %s\n",
+			    zc.zc_name, zc.zc_value);
+		}
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+		if (err == 0)
+			changelist_rename(clp, name, tryname);
+	} else {
+		err = ENOENT;
+	}
+
+	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
+		seq++;
+
+		(void) strncpy(newname, name, baselen);
+		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
+		    "recv-%ld-%u", (long) getpid(), seq);
+		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
+
+		if (flags.verbose) {
+			(void) printf("failed - trying rename %s to %s\n",
+			    zc.zc_name, zc.zc_value);
+		}
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+		if (err == 0)
+			changelist_rename(clp, name, newname);
+		if (err && flags.verbose) {
+			(void) printf("failed (%u) - "
+			    "will try again on next pass\n", errno);
+		}
+		err = EAGAIN;
+	} else if (flags.verbose) {
+		if (err == 0)
+			(void) printf("success\n");
+		else
+			(void) printf("failed (%u)\n", errno);
+	}
+
+	(void) changelist_postfix(clp);
+	changelist_free(clp);
+
+	return (err);
+}
+
+static int
+recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
+    char *newname, recvflags_t flags)
+{
+	zfs_cmd_t zc = { 0 };
+	int err = 0;
+	prop_changelist_t *clp;
+	zfs_handle_t *zhp;
+
+	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
+	if (zhp == NULL)
+		return (-1);
+	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+	    flags.force ? MS_FORCE : 0);
+	zfs_close(zhp);
+	if (clp == NULL)
+		return (-1);
+	err = changelist_prefix(clp);
+	if (err)
+		return (err);
+
+	zc.zc_objset_type = DMU_OST_ZFS;
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	if (flags.verbose)
+		(void) printf("attempting destroy %s\n", zc.zc_name);
+	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
+
+	if (err == 0) {
+		if (flags.verbose)
+			(void) printf("success\n");
+		changelist_remove(clp, zc.zc_name);
+	}
+
+	(void) changelist_postfix(clp);
+	changelist_free(clp);
+
+	if (err != 0)
+		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
+
+	return (err);
+}
+
+typedef struct guid_to_name_data {
+	uint64_t guid;
+	char *name;
+} guid_to_name_data_t;
+
+static int
+guid_to_name_cb(zfs_handle_t *zhp, void *arg)
+{
+	guid_to_name_data_t *gtnd = arg;
+	int err;
+
+	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
+		(void) strcpy(gtnd->name, zhp->zfs_name);
+		return (EEXIST);
+	}
+	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
+	zfs_close(zhp);
+	return (err);
+}
+
+static int
+guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
+    char *name)
+{
+	/* exhaustive search all local snapshots */
+	guid_to_name_data_t gtnd;
+	int err = 0;
+	zfs_handle_t *zhp;
+	char *cp;
+
+	gtnd.guid = guid;
+	gtnd.name = name;
+
+	if (strchr(parent, '@') == NULL) {
+		zhp = make_dataset_handle(hdl, parent);
+		if (zhp != NULL) {
+			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
+			zfs_close(zhp);
+			if (err == EEXIST)
+				return (0);
+		}
+	}
+
+	cp = strchr(parent, '/');
+	if (cp)
+		*cp = '\0';
+	zhp = make_dataset_handle(hdl, parent);
+	if (cp)
+		*cp = '/';
+
+	if (zhp) {
+		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
+		zfs_close(zhp);
+	}
+
+	return (err == EEXIST ? 0 : ENOENT);
+
+}
+
+/*
+ * Return true if dataset guid1 is created before guid2.
+ */
+static int
+created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
+    uint64_t guid1, uint64_t guid2)
+{
+	nvlist_t *nvfs;
+	char *fsname, *snapname;
+	char buf[ZFS_MAXNAMELEN];
+	int rv;
+	zfs_node_t zn1, zn2;
+
+	if (guid2 == 0)
+		return (0);
+	if (guid1 == 0)
+		return (1);
+
+	nvfs = fsavl_find(avl, guid1, &snapname);
+	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
+	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
+	if (zn1.zn_handle == NULL)
+		return (-1);
+
+	nvfs = fsavl_find(avl, guid2, &snapname);
+	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
+	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
+	if (zn2.zn_handle == NULL) {
+		zfs_close(zn2.zn_handle);
+		return (-1);
+	}
+
+	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
+
+	zfs_close(zn1.zn_handle);
+	zfs_close(zn2.zn_handle);
+
+	return (rv);
+}
+
+static int
+recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
+    recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
+{
+	nvlist_t *local_nv;
+	avl_tree_t *local_avl;
+	nvpair_t *fselem, *nextfselem;
+	char *tosnap, *fromsnap;
+	char newname[ZFS_MAXNAMELEN];
+	int error;
+	boolean_t needagain, progress;
+
+	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
+	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
+
+	if (flags.dryrun)
+		return (0);
+
+again:
+	needagain = progress = B_FALSE;
+
+	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
+	    &local_nv, &local_avl)) != 0)
+		return (error);
+
+	/*
+	 * Process deletes and renames
+	 */
+	for (fselem = nvlist_next_nvpair(local_nv, NULL);
+	    fselem; fselem = nextfselem) {
+		nvlist_t *nvfs, *snaps;
+		nvlist_t *stream_nvfs = NULL;
+		nvpair_t *snapelem, *nextsnapelem;
+		uint64_t fromguid = 0;
+		uint64_t originguid = 0;
+		uint64_t stream_originguid = 0;
+		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
+		char *fsname, *stream_fsname;
+
+		nextfselem = nvlist_next_nvpair(local_nv, fselem);
+
+		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
+		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
+		    &parent_fromsnap_guid));
+		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
+
+		/*
+		 * First find the stream's fs, so we can check for
+		 * a different origin (due to "zfs promote")
+		 */
+		for (snapelem = nvlist_next_nvpair(snaps, NULL);
+		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
+			uint64_t thisguid;
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
+
+			if (stream_nvfs != NULL)
+				break;
+		}
+
+		/* check for promote */
+		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
+		    &stream_originguid);
+		if (stream_nvfs && originguid != stream_originguid) {
+			switch (created_before(hdl, local_avl,
+			    stream_originguid, originguid)) {
+			case 1: {
+				/* promote it! */
+				zfs_cmd_t zc = { 0 };
+				nvlist_t *origin_nvfs;
+				char *origin_fsname;
+
+				if (flags.verbose)
+					(void) printf("promoting %s\n", fsname);
+
+				origin_nvfs = fsavl_find(local_avl, originguid,
+				    NULL);
+				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
+				    "name", &origin_fsname));
+				(void) strlcpy(zc.zc_value, origin_fsname,
+				    sizeof (zc.zc_value));
+				(void) strlcpy(zc.zc_name, fsname,
+				    sizeof (zc.zc_name));
+				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+				if (error == 0)
+					progress = B_TRUE;
+				break;
+			}
+			default:
+				break;
+			case -1:
+				fsavl_destroy(local_avl);
+				nvlist_free(local_nv);
+				return (-1);
+			}
+			/*
+			 * We had/have the wrong origin, therefore our
+			 * list of snapshots is wrong.  Need to handle
+			 * them on the next pass.
+			 */
+			needagain = B_TRUE;
+			continue;
+		}
+
+		for (snapelem = nvlist_next_nvpair(snaps, NULL);
+		    snapelem; snapelem = nextsnapelem) {
+			uint64_t thisguid;
+			char *stream_snapname;
+			nvlist_t *found, *props;
+
+			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			found = fsavl_find(stream_avl, thisguid,
+			    &stream_snapname);
+
+			/* check for delete */
+			if (found == NULL) {
+				char name[ZFS_MAXNAMELEN];
+
+				if (!flags.force)
+					continue;
+
+				(void) snprintf(name, sizeof (name), "%s@%s",
+				    fsname, nvpair_name(snapelem));
+
+				error = recv_destroy(hdl, name,
+				    strlen(fsname)+1, newname, flags);
+				if (error)
+					needagain = B_TRUE;
+				else
+					progress = B_TRUE;
+				continue;
+			}
+
+			stream_nvfs = found;
+
+			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
+			    &props) && 0 == nvlist_lookup_nvlist(props,
+			    stream_snapname, &props)) {
+				zfs_cmd_t zc = { 0 };
+
+				zc.zc_cookie = B_TRUE; /* clear current props */
+				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
+				    "%s@%s", fsname, nvpair_name(snapelem));
+				if (zcmd_write_src_nvlist(hdl, &zc,
+				    props) == 0) {
+					(void) zfs_ioctl(hdl,
+					    ZFS_IOC_SET_PROP, &zc);
+					zcmd_free_nvlists(&zc);
+				}
+			}
+
+			/* check for different snapname */
+			if (strcmp(nvpair_name(snapelem),
+			    stream_snapname) != 0) {
+				char name[ZFS_MAXNAMELEN];
+				char tryname[ZFS_MAXNAMELEN];
+
+				(void) snprintf(name, sizeof (name), "%s@%s",
+				    fsname, nvpair_name(snapelem));
+				(void) snprintf(tryname, sizeof (name), "%s@%s",
+				    fsname, stream_snapname);
+
+				error = recv_rename(hdl, name, tryname,
+				    strlen(fsname)+1, newname, flags);
+				if (error)
+					needagain = B_TRUE;
+				else
+					progress = B_TRUE;
+			}
+
+			if (strcmp(stream_snapname, fromsnap) == 0)
+				fromguid = thisguid;
+		}
+
+		/* check for delete */
+		if (stream_nvfs == NULL) {
+			if (!flags.force)
+				continue;
+
+			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
+			    newname, flags);
+			if (error)
+				needagain = B_TRUE;
+			else
+				progress = B_TRUE;
+			continue;
+		}
+
+		if (fromguid == 0 && flags.verbose) {
+			(void) printf("local fs %s does not have fromsnap "
+			    "(%s in stream); must have been deleted locally; "
+			    "ignoring\n", fsname, fromsnap);
+			continue;
+		}
+
+		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
+		    "name", &stream_fsname));
+		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
+		    "parentfromsnap", &stream_parent_fromsnap_guid));
+
+		/* check for rename */
+		if ((stream_parent_fromsnap_guid != 0 &&
+		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
+		    strcmp(strrchr(fsname, '/'),
+		    strrchr(stream_fsname, '/')) != 0) {
+			nvlist_t *parent;
+			char tryname[ZFS_MAXNAMELEN];
+
+			parent = fsavl_find(local_avl,
+			    stream_parent_fromsnap_guid, NULL);
+			/*
+			 * NB: parent might not be found if we used the
+			 * tosnap for stream_parent_fromsnap_guid,
+			 * because the parent is a newly-created fs;
+			 * we'll be able to rename it after we recv the
+			 * new fs.
+			 */
+			if (parent != NULL) {
+				char *pname;
+
+				VERIFY(0 == nvlist_lookup_string(parent, "name",
+				    &pname));
+				(void) snprintf(tryname, sizeof (tryname),
+				    "%s%s", pname, strrchr(stream_fsname, '/'));
+			} else {
+				tryname[0] = '\0';
+				if (flags.verbose) {
+					(void) printf("local fs %s new parent "
+					    "not found\n", fsname);
+				}
+			}
+
+			error = recv_rename(hdl, fsname, tryname,
+			    strlen(tofs)+1, newname, flags);
+			if (error)
+				needagain = B_TRUE;
+			else
+				progress = B_TRUE;
+		}
+	}
+
+	fsavl_destroy(local_avl);
+	nvlist_free(local_nv);
+
+	if (needagain && progress) {
+		/* do another pass to fix up temporary names */
+		if (flags.verbose)
+			(void) printf("another pass:\n");
+		goto again;
+	}
+
+	return (needagain);
+}
+
+static int
+zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
+    recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
+    char **top_zfs)
+{
+	nvlist_t *stream_nv = NULL;
+	avl_tree_t *stream_avl = NULL;
+	char *fromsnap = NULL;
+	char tofs[ZFS_MAXNAMELEN];
+	char errbuf[1024];
+	dmu_replay_record_t drre;
+	int error;
+	boolean_t anyerr = B_FALSE;
+	boolean_t softerr = B_FALSE;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (strchr(destname, '@')) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can not specify snapshot name for multi-snapshot stream"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	assert(drr->drr_type == DRR_BEGIN);
+	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
+	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
+
+	/*
+	 * Read in the nvlist from the stream.
+	 */
+	if (drr->drr_payloadlen != 0) {
+		if (!flags.isprefix) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "must use -d to receive replication "
+			    "(send -R) stream"));
+			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+		}
+
+		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
+		    &stream_nv, flags.byteswap, zc);
+		if (error) {
+			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			goto out;
+		}
+	}
+
+	/*
+	 * Read in the end record and verify checksum.
+	 */
+	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
+	    flags.byteswap, NULL)))
+		goto out;
+	if (flags.byteswap) {
+		drre.drr_type = BSWAP_32(drre.drr_type);
+		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
+	}
+	if (drre.drr_type != DRR_END) {
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "incorrect header checksum"));
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+
+	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
+
+	if (drr->drr_payloadlen != 0) {
+		nvlist_t *stream_fss;
+
+		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
+		    &stream_fss));
+		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "couldn't allocate avl tree"));
+			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
+			goto out;
+		}
+
+		if (fromsnap != NULL) {
+			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
+			if (flags.isprefix) {
+				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
+				    "/@");
+				/* zfs_receive_one() will create_parents() */
+				(void) strlcat(tofs,
+				    &drr->drr_u.drr_begin.drr_toname[i],
+				    ZFS_MAXNAMELEN);
+				*strchr(tofs, '@') = '\0';
+			}
+			softerr = recv_incremental_replication(hdl, tofs,
+			    flags, stream_nv, stream_avl);
+		}
+	}
+
+
+	/* Finally, receive each contained stream */
+	do {
+		/*
+		 * we should figure out if it has a recoverable
+		 * error, in which case do a recv_skip() and drive on.
+		 * Note, if we fail due to already having this guid,
+		 * zfs_receive_one() will take care of it (ie,
+		 * recv_skip() and return 0).
+		 */
+		error = zfs_receive_impl(hdl, destname, flags, fd,
+		    stream_avl, top_zfs);
+		if (error == ENODATA) {
+			error = 0;
+			break;
+		}
+		anyerr |= error;
+	} while (error == 0);
+
+	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
+		/*
+		 * Now that we have the fs's they sent us, try the
+		 * renames again.
+		 */
+		softerr = recv_incremental_replication(hdl, tofs, flags,
+		    stream_nv, stream_avl);
+	}
+
+out:
+	fsavl_destroy(stream_avl);
+	if (stream_nv)
+		nvlist_free(stream_nv);
+	if (softerr)
+		error = -2;
+	if (anyerr)
+		error = -1;
+	return (error);
+}
+
+static int
+recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
+{
+	dmu_replay_record_t *drr;
+	void *buf = malloc(1<<20);
+
+	/* XXX would be great to use lseek if possible... */
+	drr = buf;
+
+	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
+	    byteswap, NULL) == 0) {
+		if (byteswap)
+			drr->drr_type = BSWAP_32(drr->drr_type);
+
+		switch (drr->drr_type) {
+		case DRR_BEGIN:
+			/* NB: not to be used on v2 stream packages */
+			assert(drr->drr_payloadlen == 0);
+			break;
+
+		case DRR_END:
+			free(buf);
+			return (0);
+
+		case DRR_OBJECT:
+			if (byteswap) {
+				drr->drr_u.drr_object.drr_bonuslen =
+				    BSWAP_32(drr->drr_u.drr_object.
+				    drr_bonuslen);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
+			    B_FALSE, NULL);
+			break;
+
+		case DRR_WRITE:
+			if (byteswap) {
+				drr->drr_u.drr_write.drr_length =
+				    BSWAP_64(drr->drr_u.drr_write.drr_length);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
+			break;
+
+		case DRR_FREEOBJECTS:
+		case DRR_FREE:
+			break;
+
+		default:
+			assert(!"invalid record type");
+		}
+	}
+
+	free(buf);
+	return (-1);
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ */
+static int
+zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
+    recvflags_t flags, dmu_replay_record_t *drr,
+    dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
+    char **top_zfs)
+{
+	zfs_cmd_t zc = { 0 };
+	time_t begin_time;
+	int ioctl_err, ioctl_errno, err, choplen;
+	char *cp;
+	struct drr_begin *drrb = &drr->drr_u.drr_begin;
+	char errbuf[1024];
+	char chopprefix[ZFS_MAXNAMELEN];
+	boolean_t newfs = B_FALSE;
+	boolean_t stream_wantsnewfs;
+	uint64_t parent_snapguid = 0;
+	prop_changelist_t *clp = NULL;
+	nvlist_t *snapprops_nvlist = NULL;
+
+	begin_time = time(NULL);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (stream_avl != NULL) {
+		char *snapname;
+		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
+		    &snapname);
+		nvlist_t *props;
+		int ret;
+
+		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
+		    &parent_snapguid);
+		err = nvlist_lookup_nvlist(fs, "props", &props);
+		if (err)
+			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
+
+		if (flags.canmountoff) {
+			VERIFY(0 == nvlist_add_uint64(props,
+			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
+		}
+		ret = zcmd_write_src_nvlist(hdl, &zc, props);
+		if (err)
+			nvlist_free(props);
+
+		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
+			VERIFY(0 == nvlist_lookup_nvlist(props,
+			    snapname, &snapprops_nvlist));
+		}
+
+		if (ret != 0)
+			return (-1);
+	}
+
+	/*
+	 * Determine how much of the snapshot name stored in the stream
+	 * we are going to tack on to the name they specified on the
+	 * command line, and how much we are going to chop off.
+	 *
+	 * If they specified a snapshot, chop the entire name stored in
+	 * the stream.
+	 */
+	(void) strcpy(chopprefix, drrb->drr_toname);
+	if (flags.isprefix) {
+		/*
+		 * They specified a fs with -d, we want to tack on
+		 * everything but the pool name stored in the stream
+		 */
+		if (strchr(tosnap, '@')) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "argument - snapshot not allowed with -d"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+		cp = strchr(chopprefix, '/');
+		if (cp == NULL)
+			cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	} else if (strchr(tosnap, '@') == NULL) {
+		/*
+		 * If they specified a filesystem without -d, we want to
+		 * tack on everything after the fs specified in the
+		 * first name from the stream.
+		 */
+		cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	}
+	choplen = strlen(chopprefix);
+
+	/*
+	 * Determine name of destination snapshot, store in zc_value.
+	 */
+	(void) strcpy(zc.zc_value, tosnap);
+	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
+	    sizeof (zc.zc_value));
+	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
+		zcmd_free_nvlists(&zc);
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	}
+
+	/*
+	 * Determine the name of the origin snapshot, store in zc_string.
+	 */
+	if (drrb->drr_flags & DRR_FLAG_CLONE) {
+		if (guid_to_name(hdl, tosnap,
+		    drrb->drr_fromguid, zc.zc_string) != 0) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "local origin for clone %s does not exist"),
+			    zc.zc_value);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		}
+		if (flags.verbose)
+			(void) printf("found clone origin %s\n", zc.zc_string);
+	}
+
+	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
+	    (drrb->drr_flags & DRR_FLAG_CLONE));
+
+	if (stream_wantsnewfs) {
+		/*
+		 * if the parent fs does not exist, look for it based on
+		 * the parent snap GUID
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot receive new filesystem stream"));
+
+		(void) strcpy(zc.zc_name, zc.zc_value);
+		cp = strrchr(zc.zc_name, '/');
+		if (cp)
+			*cp = '\0';
+		if (cp &&
+		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+			char suffix[ZFS_MAXNAMELEN];
+			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
+			if (guid_to_name(hdl, tosnap, parent_snapguid,
+			    zc.zc_value) == 0) {
+				*strchr(zc.zc_value, '@') = '\0';
+				(void) strcat(zc.zc_value, suffix);
+			}
+		}
+	} else {
+		/*
+		 * if the fs does not exist, look for it based on the
+		 * fromsnap GUID
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot receive incremental stream"));
+
+		(void) strcpy(zc.zc_name, zc.zc_value);
+		*strchr(zc.zc_name, '@') = '\0';
+
+		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+			char snap[ZFS_MAXNAMELEN];
+			(void) strcpy(snap, strchr(zc.zc_value, '@'));
+			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
+			    zc.zc_value) == 0) {
+				*strchr(zc.zc_value, '@') = '\0';
+				(void) strcat(zc.zc_value, snap);
+			}
+		}
+	}
+
+	(void) strcpy(zc.zc_name, zc.zc_value);
+	*strchr(zc.zc_name, '@') = '\0';
+
+	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+		zfs_handle_t *zhp;
+		/*
+		 * Destination fs exists.  Therefore this should either
+		 * be an incremental, or the stream specifies a new fs
+		 * (full stream or clone) and they want us to blow it
+		 * away (and have therefore specified -F and removed any
+		 * snapshots).
+		 */
+
+		if (stream_wantsnewfs) {
+			if (!flags.force) {
+				zcmd_free_nvlists(&zc);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "destination '%s' exists\n"
+				    "must specify -F to overwrite it"),
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+			}
+			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+			    &zc) == 0) {
+				zcmd_free_nvlists(&zc);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "destination has snapshots (eg. %s)\n"
+				    "must destroy them to overwrite it"),
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+			}
+		}
+
+		if ((zhp = zfs_open(hdl, zc.zc_name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+
+		if (stream_wantsnewfs &&
+		    zhp->zfs_dmustats.dds_origin[0]) {
+			zcmd_free_nvlists(&zc);
+			zfs_close(zhp);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' is a clone\n"
+			    "must destroy it to overwrite it"),
+			    zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+		}
+
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
+		    stream_wantsnewfs) {
+			/* We can't do online recv in this case */
+			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
+			if (clp == NULL) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+			if (changelist_prefix(clp) != 0) {
+				changelist_free(clp);
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		}
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
+		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
+			zfs_close(zhp);
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+		zfs_close(zhp);
+	} else {
+		/*
+		 * Destination filesystem does not exist.  Therefore we better
+		 * be creating a new filesystem (either from a full backup, or
+		 * a clone).  It would therefore be invalid if the user
+		 * specified only the pool name (i.e. if the destination name
+		 * contained no slash character).
+		 */
+		if (!stream_wantsnewfs ||
+		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' does not exist"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		}
+
+		/*
+		 * Trim off the final dataset component so we perform the
+		 * recvbackup ioctl to the filesystems's parent.
+		 */
+		*cp = '\0';
+
+		if (flags.isprefix && !flags.dryrun &&
+		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
+			zcmd_free_nvlists(&zc);
+			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
+		}
+
+		newfs = B_TRUE;
+	}
+
+	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
+	zc.zc_cookie = infd;
+	zc.zc_guid = flags.force;
+	if (flags.verbose) {
+		(void) printf("%s %s stream of %s into %s\n",
+		    flags.dryrun ? "would receive" : "receiving",
+		    drrb->drr_fromguid ? "incremental" : "full",
+		    drrb->drr_toname, zc.zc_value);
+		(void) fflush(stdout);
+	}
+
+	if (flags.dryrun) {
+		zcmd_free_nvlists(&zc);
+		return (recv_skip(hdl, infd, flags.byteswap));
+	}
+
+	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
+	ioctl_errno = errno;
+	zcmd_free_nvlists(&zc);
+
+	if (err == 0 && snapprops_nvlist) {
+		zfs_cmd_t zc2 = { 0 };
+
+		(void) strcpy(zc2.zc_name, zc.zc_value);
+		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
+			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
+			zcmd_free_nvlists(&zc2);
+		}
+	}
+
+	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
+		/*
+		 * It may be that this snapshot already exists,
+		 * in which case we want to consume & ignore it
+		 * rather than failing.
+		 */
+		avl_tree_t *local_avl;
+		nvlist_t *local_nv, *fs;
+		char *cp = strchr(zc.zc_value, '@');
+
+		/*
+		 * XXX Do this faster by just iterating over snaps in
+		 * this fs.  Also if zc_value does not exist, we will
+		 * get a strange "does not exist" error message.
+		 */
+		*cp = '\0';
+		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
+		    &local_nv, &local_avl) == 0) {
+			*cp = '@';
+			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
+			fsavl_destroy(local_avl);
+			nvlist_free(local_nv);
+
+			if (fs != NULL) {
+				if (flags.verbose) {
+					(void) printf("snap %s already exists; "
+					    "ignoring\n", zc.zc_value);
+				}
+				ioctl_err = recv_skip(hdl, infd,
+				    flags.byteswap);
+			}
+		}
+		*cp = '@';
+	}
+
+
+	if (ioctl_err != 0) {
+		switch (ioctl_errno) {
+		case ENODEV:
+			cp = strchr(zc.zc_value, '@');
+			*cp = '\0';
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "most recent snapshot of %s does not\n"
+			    "match incremental source"), zc.zc_value);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			*cp = '@';
+			break;
+		case ETXTBSY:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination %s has been modified\n"
+			    "since most recent snapshot"), zc.zc_name);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			break;
+		case EEXIST:
+			cp = strchr(zc.zc_value, '@');
+			if (newfs) {
+				/* it's the containing fs that exists */
+				*cp = '\0';
+			}
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination already exists"));
+			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
+			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
+			    zc.zc_value);
+			*cp = '@';
+			break;
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		case ECKSUM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid stream (checksum mismatch)"));
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
+		}
+	}
+
+	/*
+	 * Mount or recreate the /dev links for the target filesystem
+	 * (if created, or if we tore them down to do an incremental
+	 * restore), and the /dev links for the new snapshot (if
+	 * created). Also mount any children of the target filesystem
+	 * if we did an incremental receive.
+	 */
+	cp = strchr(zc.zc_value, '@');
+	if (cp && (ioctl_err == 0 || !newfs)) {
+		zfs_handle_t *h;
+
+		*cp = '\0';
+		h = zfs_open(hdl, zc.zc_value,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (h != NULL) {
+			if (h->zfs_type == ZFS_TYPE_VOLUME) {
+				*cp = '@';
+				err = zvol_create_link(hdl, h->zfs_name);
+				if (err == 0 && ioctl_err == 0)
+					err = zvol_create_link(hdl,
+					    zc.zc_value);
+			} else if (newfs) {
+				/*
+				 * Track the first/top of hierarchy fs,
+				 * for mounting and sharing later.
+				 */
+				if (top_zfs && *top_zfs == NULL)
+					*top_zfs = zfs_strdup(hdl, zc.zc_value);
+			}
+			zfs_close(h);
+		}
+		*cp = '@';
+	}
+
+	if (clp) {
+		err |= changelist_postfix(clp);
+		changelist_free(clp);
+	}
+
+	if (err || ioctl_err)
+		return (-1);
+
+	if (flags.verbose) {
+		char buf1[64];
+		char buf2[64];
+		uint64_t bytes = zc.zc_cookie;
+		time_t delta = time(NULL) - begin_time;
+		if (delta == 0)
+			delta = 1;
+		zfs_nicenum(bytes, buf1, sizeof (buf1));
+		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
+
+		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
+		    buf1, delta, buf2);
+	}
+
+	return (0);
+}
+
+static int
+zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
+    int infd, avl_tree_t *stream_avl, char **top_zfs)
+{
+	int err;
+	dmu_replay_record_t drr, drr_noswap;
+	struct drr_begin *drrb = &drr.drr_u.drr_begin;
+	char errbuf[1024];
+	zio_cksum_t zcksum = { 0 };
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (flags.isprefix &&
+	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
+		    "(%s) does not exist"), tosnap);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
+	}
+
+	/* read in the BEGIN record */
+	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
+	    &zcksum)))
+		return (err);
+
+	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
+		/* It's the double end record at the end of a package */
+		return (ENODATA);
+	}
+
+	/* the kernel needs the non-byteswapped begin record */
+	drr_noswap = drr;
+
+	flags.byteswap = B_FALSE;
+	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+		/*
+		 * We computed the checksum in the wrong byteorder in
+		 * recv_read() above; do it again correctly.
+		 */
+		bzero(&zcksum, sizeof (zio_cksum_t));
+		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
+		flags.byteswap = B_TRUE;
+
+		drr.drr_type = BSWAP_32(drr.drr_type);
+		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
+		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+		drrb->drr_version = BSWAP_64(drrb->drr_version);
+		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
+		drrb->drr_type = BSWAP_32(drrb->drr_type);
+		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
+		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+	}
+
+	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad magic number)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (strchr(drrb->drr_toname, '@') == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad snapshot name)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
+		return (zfs_receive_one(hdl, infd, tosnap, flags,
+		    &drr, &drr_noswap, stream_avl, top_zfs));
+	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
+		return (zfs_receive_package(hdl, infd, tosnap, flags,
+		    &drr, &zcksum, top_zfs));
+	} else {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "stream is unsupported version %llu"),
+		    drrb->drr_version);
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ * Return 0 on total success, -2 if some things couldn't be
+ * destroyed/renamed/promoted, -1 if some things couldn't be received.
+ * (-1 will override -2).
+ */
+int
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
+    int infd, avl_tree_t *stream_avl)
+{
+	char *top_zfs = NULL;
+	int err;
+
+	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
+
+	if (err == 0 && top_zfs) {
+		zfs_handle_t *zhp;
+		prop_changelist_t *clp;
+
+		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
+		if (zhp != NULL) {
+			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
+			    CL_GATHER_MOUNT_ALWAYS, 0);
+			zfs_close(zhp);
+			if (clp != NULL) {
+				/* mount and share received datasets */
+				err = changelist_postfix(clp);
+				changelist_free(clp);
+			}
+		}
+		if (zhp == NULL || clp == NULL || err)
+			err = -1;
+	}
+	if (top_zfs)
+		free(top_zfs);
+
+	return (err);
+}
diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
new file mode 100644
index 000000000..c7eb04e74
--- /dev/null
+++ b/lib/libzfs/libzfs_status.c
@@ -0,0 +1,317 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * This file contains the functions which analyze the status of a pool.  This
+ * include both the status of an active pool, as well as the status exported
+ * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
+ * the pool.  This status is independent (to a certain degree) from the state of
+ * the pool.  A pool's state describes only whether or not it is capable of
+ * providing the necessary fault tolerance for data.  The status describes the
+ * overall status of devices.  A pool that is online can still have a device
+ * that is experiencing errors.
+ *
+ * Only a subset of the possible faults can be detected using 'zpool status',
+ * and not all possible errors correspond to a FMA message ID.  The explanation
+ * is left up to the caller, depending on whether it is a live pool or an
+ * import.
+ */
+
+#include <libzfs.h>
+#include <string.h>
+#include <unistd.h>
+#include "libzfs_impl.h"
+
+/*
+ * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
+ * in libzfs.h.  Note that there are some status results which go past the end
+ * of this table, and hence have no associated message ID.
+ */
+static char *zfs_msgid_table[] = {
+	"ZFS-8000-14",
+	"ZFS-8000-2Q",
+	"ZFS-8000-3C",
+	"ZFS-8000-4J",
+	"ZFS-8000-5E",
+	"ZFS-8000-6X",
+	"ZFS-8000-72",
+	"ZFS-8000-8A",
+	"ZFS-8000-9P",
+	"ZFS-8000-A5",
+	"ZFS-8000-EY",
+	"ZFS-8000-HC",
+	"ZFS-8000-JQ",
+	"ZFS-8000-K4",
+};
+
+#define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
+
+/* ARGSUSED */
+static int
+vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_CANT_OPEN &&
+	    aux == VDEV_AUX_OPEN_FAILED);
+}
+
+/* ARGSUSED */
+static int
+vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_FAULTED);
+}
+
+/* ARGSUSED */
+static int
+vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_DEGRADED || errs != 0);
+}
+
+/* ARGSUSED */
+static int
+vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_CANT_OPEN);
+}
+
+/* ARGSUSED */
+static int
+vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_OFFLINE);
+}
+
+/*
+ * Detect if any leaf devices that have seen errors or could not be opened.
+ */
+static boolean_t
+find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
+{
+	nvlist_t **child;
+	vdev_stat_t *vs;
+	uint_t c, children;
+	char *type;
+
+	/*
+	 * Ignore problems within a 'replacing' vdev, since we're presumably in
+	 * the process of repairing any such errors, and don't want to call them
+	 * out again.  We'll pick up the fact that a resilver is happening
+	 * later.
+	 */
+	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
+	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
+		return (B_FALSE);
+
+	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_vdev_problem(child[c], func))
+				return (B_TRUE);
+	} else {
+		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &c) == 0);
+
+		if (func(vs->vs_state, vs->vs_aux,
+		    vs->vs_read_errors +
+		    vs->vs_write_errors +
+		    vs->vs_checksum_errors))
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Active pool health status.
+ *
+ * To determine the status for a pool, we make several passes over the config,
+ * picking the most egregious error we find.  In order of importance, we do the
+ * following:
+ *
+ *	- Check for a complete and valid configuration
+ *	- Look for any faulted or missing devices in a non-replicated config
+ *	- Check for any data errors
+ *	- Check for any faulted or missing devices in a replicated config
+ *	- Look for any devices showing errors
+ *	- Check for any resilvering devices
+ *
+ * There can obviously be multiple errors within a single pool, so this routine
+ * only picks the most damaging of all the current errors to report.
+ */
+static zpool_status_t
+check_status(nvlist_t *config, boolean_t isimport)
+{
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+	uint64_t nerr;
+	uint64_t version;
+	uint64_t stateval;
+	uint64_t suspended;
+	uint64_t hostid = 0;
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &stateval) == 0);
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+	/*
+	 * Pool last accessed by another system.
+	 */
+	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
+	    stateval == POOL_STATE_ACTIVE)
+		return (ZPOOL_STATUS_HOSTID_MISMATCH);
+
+	/*
+	 * Newer on-disk version.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
+		return (ZPOOL_STATUS_VERSION_NEWER);
+
+	/*
+	 * Check that the config is complete.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
+		return (ZPOOL_STATUS_BAD_GUID_SUM);
+
+	/*
+	 * Check whether the pool has suspended due to failed I/O.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
+	    &suspended) == 0) {
+		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
+			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
+		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+	}
+
+	/*
+	 * Could not read a log.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
+		return (ZPOOL_STATUS_BAD_LOG);
+	}
+
+	/*
+	 * Bad devices in non-replicated config.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_faulted))
+		return (ZPOOL_STATUS_FAULTED_DEV_NR);
+
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_missing))
+		return (ZPOOL_STATUS_MISSING_DEV_NR);
+
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_broken))
+		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
+
+	/*
+	 * Corrupted pool metadata
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
+		return (ZPOOL_STATUS_CORRUPT_POOL);
+
+	/*
+	 * Persistent data errors.
+	 */
+	if (!isimport) {
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
+		    &nerr) == 0 && nerr != 0)
+			return (ZPOOL_STATUS_CORRUPT_DATA);
+	}
+
+	/*
+	 * Missing devices in a replicated config.
+	 */
+	if (find_vdev_problem(nvroot, vdev_faulted))
+		return (ZPOOL_STATUS_FAULTED_DEV_R);
+	if (find_vdev_problem(nvroot, vdev_missing))
+		return (ZPOOL_STATUS_MISSING_DEV_R);
+	if (find_vdev_problem(nvroot, vdev_broken))
+		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
+
+	/*
+	 * Devices with errors
+	 */
+	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
+		return (ZPOOL_STATUS_FAILING_DEV);
+
+	/*
+	 * Offlined devices
+	 */
+	if (find_vdev_problem(nvroot, vdev_offlined))
+		return (ZPOOL_STATUS_OFFLINE_DEV);
+
+	/*
+	 * Currently resilvering
+	 */
+	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
+		return (ZPOOL_STATUS_RESILVERING);
+
+	/*
+	 * Outdated, but usable, version
+	 */
+	if (version < SPA_VERSION)
+		return (ZPOOL_STATUS_VERSION_OLDER);
+
+	return (ZPOOL_STATUS_OK);
+}
+
+zpool_status_t
+zpool_get_status(zpool_handle_t *zhp, char **msgid)
+{
+	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
+
+	if (ret >= NMSGID)
+		*msgid = NULL;
+	else
+		*msgid = zfs_msgid_table[ret];
+
+	return (ret);
+}
+
+zpool_status_t
+zpool_import_status(nvlist_t *config, char **msgid)
+{
+	zpool_status_t ret = check_status(config, B_TRUE);
+
+	if (ret >= NMSGID)
+		*msgid = NULL;
+	else
+		*msgid = zfs_msgid_table[ret];
+
+	return (ret);
+}
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
new file mode 100644
index 000000000..116521cb4
--- /dev/null
+++ b/lib/libzfs/libzfs_util.c
@@ -0,0 +1,1402 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Internal utility routines for the ZFS library.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <math.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#include <sys/types.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include "zfs_prop.h"
+
+int
+libzfs_errno(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_error);
+}
+
+const char *
+libzfs_error_action(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_action);
+}
+
+const char *
+libzfs_error_description(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_desc[0] != '\0')
+		return (hdl->libzfs_desc);
+
+	switch (hdl->libzfs_error) {
+	case EZFS_NOMEM:
+		return (dgettext(TEXT_DOMAIN, "out of memory"));
+	case EZFS_BADPROP:
+		return (dgettext(TEXT_DOMAIN, "invalid property value"));
+	case EZFS_PROPREADONLY:
+		return (dgettext(TEXT_DOMAIN, "read only property"));
+	case EZFS_PROPTYPE:
+		return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
+		    "datasets of this type"));
+	case EZFS_PROPNONINHERIT:
+		return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
+	case EZFS_PROPSPACE:
+		return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+	case EZFS_BADTYPE:
+		return (dgettext(TEXT_DOMAIN, "operation not applicable to "
+		    "datasets of this type"));
+	case EZFS_BUSY:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
+	case EZFS_EXISTS:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
+	case EZFS_NOENT:
+		return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+	case EZFS_BADSTREAM:
+		return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
+	case EZFS_DSREADONLY:
+		return (dgettext(TEXT_DOMAIN, "dataset is read only"));
+	case EZFS_VOLTOOBIG:
+		return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
+		    "this system"));
+	case EZFS_VOLHASDATA:
+		return (dgettext(TEXT_DOMAIN, "volume has data"));
+	case EZFS_INVALIDNAME:
+		return (dgettext(TEXT_DOMAIN, "invalid name"));
+	case EZFS_BADRESTORE:
+		return (dgettext(TEXT_DOMAIN, "unable to restore to "
+		    "destination"));
+	case EZFS_BADBACKUP:
+		return (dgettext(TEXT_DOMAIN, "backup failed"));
+	case EZFS_BADTARGET:
+		return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
+	case EZFS_NODEVICE:
+		return (dgettext(TEXT_DOMAIN, "no such device in pool"));
+	case EZFS_BADDEV:
+		return (dgettext(TEXT_DOMAIN, "invalid device"));
+	case EZFS_NOREPLICAS:
+		return (dgettext(TEXT_DOMAIN, "no valid replicas"));
+	case EZFS_RESILVERING:
+		return (dgettext(TEXT_DOMAIN, "currently resilvering"));
+	case EZFS_BADVERSION:
+		return (dgettext(TEXT_DOMAIN, "unsupported version"));
+	case EZFS_POOLUNAVAIL:
+		return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
+	case EZFS_DEVOVERFLOW:
+		return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
+	case EZFS_BADPATH:
+		return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
+	case EZFS_CROSSTARGET:
+		return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
+		    "pools"));
+	case EZFS_ZONED:
+		return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
+	case EZFS_MOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "mount failed"));
+	case EZFS_UMOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "umount failed"));
+	case EZFS_UNSHARENFSFAILED:
+		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
+	case EZFS_SHARENFSFAILED:
+		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+	case EZFS_UNSHARESMBFAILED:
+		return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
+	case EZFS_SHARESMBFAILED:
+		return (dgettext(TEXT_DOMAIN, "smb add share failed"));
+	case EZFS_ISCSISVCUNAVAIL:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgt service need to be enabled by "
+		    "a privileged user"));
+	case EZFS_DEVLINKS:
+		return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
+	case EZFS_PERM:
+		return (dgettext(TEXT_DOMAIN, "permission denied"));
+	case EZFS_NOSPC:
+		return (dgettext(TEXT_DOMAIN, "out of space"));
+	case EZFS_IO:
+		return (dgettext(TEXT_DOMAIN, "I/O error"));
+	case EZFS_INTR:
+		return (dgettext(TEXT_DOMAIN, "signal received"));
+	case EZFS_ISSPARE:
+		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
+		    "spare"));
+	case EZFS_INVALCONFIG:
+		return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
+	case EZFS_RECURSIVE:
+		return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
+	case EZFS_NOHISTORY:
+		return (dgettext(TEXT_DOMAIN, "no history available"));
+	case EZFS_UNSHAREISCSIFAILED:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgtd failed request to unshare"));
+	case EZFS_SHAREISCSIFAILED:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgtd failed request to share"));
+	case EZFS_POOLPROPS:
+		return (dgettext(TEXT_DOMAIN, "failed to retrieve "
+		    "pool properties"));
+	case EZFS_POOL_NOTSUP:
+		return (dgettext(TEXT_DOMAIN, "operation not supported "
+		    "on this type of pool"));
+	case EZFS_POOL_INVALARG:
+		return (dgettext(TEXT_DOMAIN, "invalid argument for "
+		    "this pool operation"));
+	case EZFS_NAMETOOLONG:
+		return (dgettext(TEXT_DOMAIN, "dataset name is too long"));
+	case EZFS_OPENFAILED:
+		return (dgettext(TEXT_DOMAIN, "open failed"));
+	case EZFS_NOCAP:
+		return (dgettext(TEXT_DOMAIN,
+		    "disk capacity information could not be retrieved"));
+	case EZFS_LABELFAILED:
+		return (dgettext(TEXT_DOMAIN, "write of label failed"));
+	case EZFS_BADWHO:
+		return (dgettext(TEXT_DOMAIN, "invalid user/group"));
+	case EZFS_BADPERM:
+		return (dgettext(TEXT_DOMAIN, "invalid permission"));
+	case EZFS_BADPERMSET:
+		return (dgettext(TEXT_DOMAIN, "invalid permission set name"));
+	case EZFS_NODELEGATION:
+		return (dgettext(TEXT_DOMAIN, "delegated administration is "
+		    "disabled on pool"));
+	case EZFS_PERMRDONLY:
+		return (dgettext(TEXT_DOMAIN, "snapshot permissions cannot be"
+		    " modified"));
+	case EZFS_BADCACHE:
+		return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
+	case EZFS_ISL2CACHE:
+		return (dgettext(TEXT_DOMAIN, "device is in use as a cache"));
+	case EZFS_VDEVNOTSUP:
+		return (dgettext(TEXT_DOMAIN, "vdev specification is not "
+		    "supported"));
+	case EZFS_NOTSUP:
+		return (dgettext(TEXT_DOMAIN, "operation not supported "
+		    "on this dataset"));
+	case EZFS_ACTIVE_SPARE:
+		return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
+		    "device"));
+	case EZFS_UNKNOWN:
+		return (dgettext(TEXT_DOMAIN, "unknown error"));
+	default:
+		assert(hdl->libzfs_error == 0);
+		return (dgettext(TEXT_DOMAIN, "no error"));
+	}
+}
+
+/*PRINTFLIKE2*/
+void
+zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	(void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
+	    fmt, ap);
+	hdl->libzfs_desc_active = 1;
+
+	va_end(ap);
+}
+
+static void
+zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
+{
+	(void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
+	    fmt, ap);
+	hdl->libzfs_error = error;
+
+	if (hdl->libzfs_desc_active)
+		hdl->libzfs_desc_active = 0;
+	else
+		hdl->libzfs_desc[0] = '\0';
+
+	if (hdl->libzfs_printerr) {
+		if (error == EZFS_UNKNOWN) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
+			    "error: %s\n"), libzfs_error_description(hdl));
+			abort();
+		}
+
+		(void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
+		    libzfs_error_description(hdl));
+		if (error == EZFS_NOMEM)
+			exit(1);
+	}
+}
+
+int
+zfs_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zfs_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	zfs_verror(hdl, error, fmt, ap);
+
+	va_end(ap);
+
+	return (-1);
+}
+
+static int
+zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
+    va_list ap)
+{
+	switch (error) {
+	case EPERM:
+	case EACCES:
+		zfs_verror(hdl, EZFS_PERM, fmt, ap);
+		return (-1);
+
+	case ECANCELED:
+		zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap);
+		return (-1);
+
+	case EIO:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		return (-1);
+
+	case EINTR:
+		zfs_verror(hdl, EZFS_INTR, fmt, ap);
+		return (-1);
+	}
+
+	return (0);
+}
+
+int
+zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zfs_standard_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+	switch (error) {
+	case ENXIO:
+	case ENODEV:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset does not exist"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is busy"));
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+		break;
+	case EROFS:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot permissions cannot be modified"));
+		zfs_verror(hdl, EZFS_PERMRDONLY, fmt, ap);
+		break;
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
+		break;
+	case ENOTSUP:
+		zfs_verror(hdl, EZFS_BADVERSION, fmt, ap);
+		break;
+	default:
+		zfs_error_aux(hdl, strerror(errno));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+		break;
+	}
+
+	va_end(ap);
+	return (-1);
+}
+
+int
+zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zpool_standard_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+	switch (error) {
+	case ENODEV:
+		zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+		break;
+
+	case ENXIO:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "one or more devices is currently unavailable"));
+		zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
+		break;
+
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
+		break;
+
+	case ENOTSUP:
+		zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap);
+		break;
+
+	case EINVAL:
+		zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap);
+		break;
+
+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
+	default:
+		zfs_error_aux(hdl, strerror(error));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+	}
+
+	va_end(ap);
+	return (-1);
+}
+
+/*
+ * Display an out of memory error message and abort the current program.
+ */
+int
+no_memory(libzfs_handle_t *hdl)
+{
+	return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
+}
+
+/*
+ * A safe form of malloc() which will die if the allocation fails.
+ */
+void *
+zfs_alloc(libzfs_handle_t *hdl, size_t size)
+{
+	void *data;
+
+	if ((data = calloc(1, size)) == NULL)
+		(void) no_memory(hdl);
+
+	return (data);
+}
+
+/*
+ * A safe form of realloc(), which also zeroes newly allocated space.
+ */
+void *
+zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize)
+{
+	void *ret;
+
+	if ((ret = realloc(ptr, newsize)) == NULL) {
+		(void) no_memory(hdl);
+		free(ptr);
+		return (NULL);
+	}
+
+	bzero((char *)ret + oldsize, (newsize - oldsize));
+	return (ret);
+}
+
+/*
+ * A safe form of strdup() which will die if the allocation fails.
+ */
+char *
+zfs_strdup(libzfs_handle_t *hdl, const char *str)
+{
+	char *ret;
+
+	if ((ret = strdup(str)) == NULL)
+		(void) no_memory(hdl);
+
+	return (ret);
+}
+
+/*
+ * Convert a number to an appropriately human-readable output.
+ */
+void
+zfs_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+	uint64_t n = num;
+	int index = 0;
+	char u;
+
+	while (n >= 1024) {
+		n /= 1024;
+		index++;
+	}
+
+	u = " KMGTPE"[index];
+
+	if (index == 0) {
+		(void) snprintf(buf, buflen, "%llu", (u_longlong_t) n);
+	} else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
+		/*
+		 * If this is an even multiple of the base, always display
+		 * without any decimal precision.
+		 */
+		(void) snprintf(buf, buflen, "%llu%c", (u_longlong_t) n, u);
+	} else {
+		/*
+		 * We want to choose a precision that reflects the best choice
+		 * for fitting in 5 characters.  This can get rather tricky when
+		 * we have numbers that are very close to an order of magnitude.
+		 * For example, when displaying 10239 (which is really 9.999K),
+		 * we want only a single place of precision for 10.0K.  We could
+		 * develop some complex heuristics for this, but it's much
+		 * easier just to try each combination in turn.
+		 */
+		int i;
+		for (i = 2; i >= 0; i--) {
+			if (snprintf(buf, buflen, "%.*f%c", i,
+			    (double)num / (1ULL << 10 * index), u) <= 5)
+				break;
+		}
+	}
+}
+
+void
+libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
+{
+	hdl->libzfs_printerr = printerr;
+}
+
+libzfs_handle_t *
+libzfs_init(void)
+{
+	libzfs_handle_t *hdl;
+
+	if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+		free(hdl);
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+		(void) close(hdl->libzfs_fd);
+		free(hdl);
+		return (NULL);
+	}
+
+	hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r");
+
+	zfs_prop_init();
+	zpool_prop_init();
+
+	return (hdl);
+}
+
+void
+libzfs_fini(libzfs_handle_t *hdl)
+{
+	(void) close(hdl->libzfs_fd);
+	if (hdl->libzfs_mnttab)
+		(void) fclose(hdl->libzfs_mnttab);
+	if (hdl->libzfs_sharetab)
+		(void) fclose(hdl->libzfs_sharetab);
+	zfs_uninit_libshare(hdl);
+	if (hdl->libzfs_log_str)
+		(void) free(hdl->libzfs_log_str);
+	zpool_free_handles(hdl);
+	namespace_clear(hdl);
+	free(hdl);
+}
+
+libzfs_handle_t *
+zpool_get_handle(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
+libzfs_handle_t *
+zfs_get_handle(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_hdl);
+}
+
+zpool_handle_t *
+zfs_get_pool_handle(const zfs_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
+/*
+ * Given a name, determine whether or not it's a valid path
+ * (starts with '/' or "./").  If so, walk the mnttab trying
+ * to match the device number.  If not, treat the path as an
+ * fs/vol/snap name.
+ */
+zfs_handle_t *
+zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
+{
+	struct stat64 statbuf;
+	struct extmnttab entry;
+	int ret;
+
+	if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
+		/*
+		 * It's not a valid path, assume it's a name of type 'argtype'.
+		 */
+		return (zfs_open(hdl, path, argtype));
+	}
+
+	if (stat64(path, &statbuf) != 0) {
+		(void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
+		return (NULL);
+	}
+
+	rewind(hdl->libzfs_mnttab);
+	while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) {
+		if (makedevice(entry.mnt_major, entry.mnt_minor) ==
+		    statbuf.st_dev) {
+			break;
+		}
+	}
+	if (ret != 0) {
+		return (NULL);
+	}
+
+	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
+		(void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
+		    path);
+		return (NULL);
+	}
+
+	return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM));
+}
+
+/*
+ * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from
+ * an ioctl().
+ */
+int
+zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
+{
+	if (len == 0)
+		len = 2048;
+	zc->zc_nvlist_dst_size = len;
+	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Called when an ioctl() which returns an nvlist fails with ENOMEM.  This will
+ * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was
+ * filled in by the kernel to indicate the actual required size.
+ */
+int
+zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+	free((void *)(uintptr_t)zc->zc_nvlist_dst);
+	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Called to free the src and dst nvlists stored in the command structure.
+ */
+void
+zcmd_free_nvlists(zfs_cmd_t *zc)
+{
+	free((void *)(uintptr_t)zc->zc_nvlist_conf);
+	free((void *)(uintptr_t)zc->zc_nvlist_src);
+	free((void *)(uintptr_t)zc->zc_nvlist_dst);
+}
+
+static int
+zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen,
+    nvlist_t *nvl)
+{
+	char *packed;
+	size_t len;
+
+	verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0);
+
+	if ((packed = zfs_alloc(hdl, len)) == NULL)
+		return (-1);
+
+	verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
+
+	*outnv = (uint64_t)(uintptr_t)packed;
+	*outlen = len;
+
+	return (0);
+}
+
+int
+zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf,
+	    &zc->zc_nvlist_conf_size, nvl));
+}
+
+int
+zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src,
+	    &zc->zc_nvlist_src_size, nvl));
+}
+
+/*
+ * Unpacks an nvlist from the ZFS ioctl command structure.
+ */
+int
+zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
+{
+	if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst,
+	    zc->zc_nvlist_dst_size, nvlp, 0) != 0)
+		return (no_memory(hdl));
+
+	return (0);
+}
+
+int
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
+{
+	int error;
+
+	zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str;
+	error = ioctl(hdl->libzfs_fd, request, zc);
+	if (hdl->libzfs_log_str) {
+		free(hdl->libzfs_log_str);
+		hdl->libzfs_log_str = NULL;
+	}
+	zc->zc_history = 0;
+
+	return (error);
+}
+
+/*
+ * ================================================================
+ * API shared by zfs and zpool property management
+ * ================================================================
+ */
+
+static void
+zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
+{
+	zprop_list_t *pl = cbp->cb_proplist;
+	int i;
+	char *title;
+	size_t len;
+
+	cbp->cb_first = B_FALSE;
+	if (cbp->cb_scripted)
+		return;
+
+	/*
+	 * Start with the length of the column headers.
+	 */
+	cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME"));
+	cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN,
+	    "PROPERTY"));
+	cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
+	    "VALUE"));
+	cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
+	    "SOURCE"));
+
+	/*
+	 * Go through and calculate the widths for each column.  For the
+	 * 'source' column, we kludge it up by taking the worst-case scenario of
+	 * inheriting from the longest name.  This is acceptable because in the
+	 * majority of cases 'SOURCE' is the last column displayed, and we don't
+	 * use the width anyway.  Note that the 'VALUE' column can be oversized,
+	 * if the name of the property is much longer the any values we find.
+	 */
+	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
+		/*
+		 * 'PROPERTY' column
+		 */
+		if (pl->pl_prop != ZPROP_INVAL) {
+			const char *propname = (type == ZFS_TYPE_POOL) ?
+			    zpool_prop_to_name(pl->pl_prop) :
+			    zfs_prop_to_name(pl->pl_prop);
+
+			len = strlen(propname);
+			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
+				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
+		} else {
+			len = strlen(pl->pl_user_prop);
+			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
+				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
+		}
+
+		/*
+		 * 'VALUE' column
+		 */
+		if ((pl->pl_prop != ZFS_PROP_NAME || !pl->pl_all) &&
+		    pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
+			cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
+
+		/*
+		 * 'NAME' and 'SOURCE' columns
+		 */
+		if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME :
+		    ZFS_PROP_NAME) &&
+		    pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) {
+			cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width;
+			cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width +
+			    strlen(dgettext(TEXT_DOMAIN, "inherited from"));
+		}
+	}
+
+	/*
+	 * Now go through and print the headers.
+	 */
+	for (i = 0; i < 4; i++) {
+		switch (cbp->cb_columns[i]) {
+		case GET_COL_NAME:
+			title = dgettext(TEXT_DOMAIN, "NAME");
+			break;
+		case GET_COL_PROPERTY:
+			title = dgettext(TEXT_DOMAIN, "PROPERTY");
+			break;
+		case GET_COL_VALUE:
+			title = dgettext(TEXT_DOMAIN, "VALUE");
+			break;
+		case GET_COL_SOURCE:
+			title = dgettext(TEXT_DOMAIN, "SOURCE");
+			break;
+		default:
+			title = NULL;
+		}
+
+		if (title != NULL) {
+			if (i == 3 || cbp->cb_columns[i + 1] == 0)
+				(void) printf("%s", title);
+			else
+				(void) printf("%-*s  ",
+				    cbp->cb_colwidths[cbp->cb_columns[i]],
+				    title);
+		}
+	}
+	(void) printf("\n");
+}
+
+/*
+ * Display a single line of output, according to the settings in the callback
+ * structure.
+ */
+void
+zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
+    const char *propname, const char *value, zprop_source_t sourcetype,
+    const char *source)
+{
+	int i;
+	const char *str;
+	char buf[128];
+
+	/*
+	 * Ignore those source types that the user has chosen to ignore.
+	 */
+	if ((sourcetype & cbp->cb_sources) == 0)
+		return;
+
+	if (cbp->cb_first)
+		zprop_print_headers(cbp, cbp->cb_type);
+
+	for (i = 0; i < 4; i++) {
+		switch (cbp->cb_columns[i]) {
+		case GET_COL_NAME:
+			str = name;
+			break;
+
+		case GET_COL_PROPERTY:
+			str = propname;
+			break;
+
+		case GET_COL_VALUE:
+			str = value;
+			break;
+
+		case GET_COL_SOURCE:
+			switch (sourcetype) {
+			case ZPROP_SRC_NONE:
+				str = "-";
+				break;
+
+			case ZPROP_SRC_DEFAULT:
+				str = "default";
+				break;
+
+			case ZPROP_SRC_LOCAL:
+				str = "local";
+				break;
+
+			case ZPROP_SRC_TEMPORARY:
+				str = "temporary";
+				break;
+
+			case ZPROP_SRC_INHERITED:
+				(void) snprintf(buf, sizeof (buf),
+				    "inherited from %s", source);
+				str = buf;
+				break;
+			}
+			break;
+
+		default:
+			continue;
+		}
+
+		if (cbp->cb_columns[i + 1] == 0)
+			(void) printf("%s", str);
+		else if (cbp->cb_scripted)
+			(void) printf("%s\t", str);
+		else
+			(void) printf("%-*s  ",
+			    cbp->cb_colwidths[cbp->cb_columns[i]],
+			    str);
+
+	}
+
+	(void) printf("\n");
+}
+
+/*
+ * Given a numeric suffix, convert the value into a number of bits that the
+ * resulting value must be shifted.
+ */
+static int
+str2shift(libzfs_handle_t *hdl, const char *buf)
+{
+	const char *ends = "BKMGTPEZ";
+	int i;
+
+	if (buf[0] == '\0')
+		return (0);
+	for (i = 0; i < strlen(ends); i++) {
+		if (toupper(buf[0]) == ends[i])
+			break;
+	}
+	if (i == strlen(ends)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid numeric suffix '%s'"), buf);
+		return (-1);
+	}
+
+	/*
+	 * We want to allow trailing 'b' characters for 'GB' or 'Mb'.  But don't
+	 * allow 'BB' - that's just weird.
+	 */
+	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
+	    toupper(buf[0]) != 'B'))
+		return (10*i);
+
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "invalid numeric suffix '%s'"), buf);
+	return (-1);
+}
+
+/*
+ * Convert a string of the form '100G' into a real number.  Used when setting
+ * properties or creating a volume.  'buf' is used to place an extended error
+ * message for the caller to use.
+ */
+int
+zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
+{
+	char *end;
+	int shift;
+
+	*num = 0;
+
+	/* Check to see if this looks like a number.  */
+	if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "bad numeric value '%s'"), value);
+		return (-1);
+	}
+
+	/* Rely on stroll() to process the numeric portion.  */
+	errno = 0;
+	*num = strtoll(value, &end, 10);
+
+	/*
+	 * Check for ERANGE, which indicates that the value is too large to fit
+	 * in a 64-bit value.
+	 */
+	if (errno == ERANGE) {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "numeric value is too large"));
+		return (-1);
+	}
+
+	/*
+	 * If we have a decimal value, then do the computation with floating
+	 * point arithmetic.  Otherwise, use standard arithmetic.
+	 */
+	if (*end == '.') {
+		double fval = strtod(value, &end);
+
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		fval *= pow(2, shift);
+
+		if (fval > UINT64_MAX) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num = (uint64_t)fval;
+	} else {
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		/* Check for overflow */
+		if (shift >= 64 || (*num << shift) >> shift != *num) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num <<= shift;
+	}
+
+	return (0);
+}
+
+/*
+ * Given a propname=value nvpair to set, parse any numeric properties
+ * (index, boolean, etc) if they are specified as strings and add the
+ * resulting nvpair to the returned nvlist.
+ *
+ * At the DSL layer, all properties are either 64-bit numbers or strings.
+ * We want the user to be able to ignore this fact and specify properties
+ * as native values (numbers, for example) or as strings (to simplify
+ * command line utilities).  This also handles converting index types
+ * (compression, checksum, etc) from strings to their on-disk index.
+ */
+int
+zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
+    zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp,
+    const char *errbuf)
+{
+	data_type_t datatype = nvpair_type(elem);
+	zprop_type_t proptype;
+	const char *propname;
+	char *value;
+	boolean_t isnone = B_FALSE;
+
+	if (type == ZFS_TYPE_POOL) {
+		proptype = zpool_prop_get_type(prop);
+		propname = zpool_prop_to_name(prop);
+	} else {
+		proptype = zfs_prop_get_type(prop);
+		propname = zfs_prop_to_name(prop);
+	}
+
+	/*
+	 * Convert any properties to the internal DSL value types.
+	 */
+	*svalp = NULL;
+	*ivalp = 0;
+
+	switch (proptype) {
+	case PROP_TYPE_STRING:
+		if (datatype != DATA_TYPE_STRING) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a string"), nvpair_name(elem));
+			goto error;
+		}
+		(void) nvpair_value_string(elem, svalp);
+		if (strlen(*svalp) >= ZFS_MAXPROPLEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' is too long"), nvpair_name(elem));
+			goto error;
+		}
+		break;
+
+	case PROP_TYPE_NUMBER:
+		if (datatype == DATA_TYPE_STRING) {
+			(void) nvpair_value_string(elem, &value);
+			if (strcmp(value, "none") == 0) {
+				isnone = B_TRUE;
+			} else if (zfs_nicestrtonum(hdl, value, ivalp)
+			    != 0) {
+				goto error;
+			}
+		} else if (datatype == DATA_TYPE_UINT64) {
+			(void) nvpair_value_uint64(elem, ivalp);
+		} else {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a number"), nvpair_name(elem));
+			goto error;
+		}
+
+		/*
+		 * Quota special: force 'none' and don't allow 0.
+		 */
+		if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone &&
+		    (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "use 'none' to disable quota/refquota"));
+			goto error;
+		}
+		break;
+
+	case PROP_TYPE_INDEX:
+		if (datatype != DATA_TYPE_STRING) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a string"), nvpair_name(elem));
+			goto error;
+		}
+
+		(void) nvpair_value_string(elem, &value);
+
+		if (zprop_string_to_index(prop, value, ivalp, type) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be one of '%s'"), propname,
+			    zprop_values(prop, type));
+			goto error;
+		}
+		break;
+
+	default:
+		abort();
+	}
+
+	/*
+	 * Add the result to our return set of properties.
+	 */
+	if (*svalp != NULL) {
+		if (nvlist_add_string(ret, propname, *svalp) != 0) {
+			(void) no_memory(hdl);
+			return (-1);
+		}
+	} else {
+		if (nvlist_add_uint64(ret, propname, *ivalp) != 0) {
+			(void) no_memory(hdl);
+			return (-1);
+		}
+	}
+
+	return (0);
+error:
+	(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+	return (-1);
+}
+
+static int
+addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
+    zfs_type_t type)
+{
+	int prop;
+	zprop_list_t *entry;
+
+	prop = zprop_name_to_prop(propname, type);
+
+	if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type))
+		prop = ZPROP_INVAL;
+
+	/*
+	 * When no property table entry can be found, return failure if
+	 * this is a pool property or if this isn't a user-defined
+	 * dataset property,
+	 */
+	if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
+	    !zfs_prop_user(propname))) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid property '%s'"), propname);
+		return (zfs_error(hdl, EZFS_BADPROP,
+		    dgettext(TEXT_DOMAIN, "bad property list")));
+	}
+
+	if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
+		return (-1);
+
+	entry->pl_prop = prop;
+	if (prop == ZPROP_INVAL) {
+		if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) {
+			free(entry);
+			return (-1);
+		}
+		entry->pl_width = strlen(propname);
+	} else {
+		entry->pl_width = zprop_width(prop, &entry->pl_fixed,
+		    type);
+	}
+
+	*listp = entry;
+
+	return (0);
+}
+
+/*
+ * Given a comma-separated list of properties, construct a property list
+ * containing both user-defined and native properties.  This function will
+ * return a NULL list if 'all' is specified, which can later be expanded
+ * by zprop_expand_list().
+ */
+int
+zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp,
+    zfs_type_t type)
+{
+	*listp = NULL;
+
+	/*
+	 * If 'all' is specified, return a NULL list.
+	 */
+	if (strcmp(props, "all") == 0)
+		return (0);
+
+	/*
+	 * If no props were specified, return an error.
+	 */
+	if (props[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no properties specified"));
+		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+		    "bad property list")));
+	}
+
+	/*
+	 * It would be nice to use getsubopt() here, but the inclusion of column
+	 * aliases makes this more effort than it's worth.
+	 */
+	while (*props != '\0') {
+		size_t len;
+		char *p;
+		char c;
+
+		if ((p = strchr(props, ',')) == NULL) {
+			len = strlen(props);
+			p = props + len;
+		} else {
+			len = p - props;
+		}
+
+		/*
+		 * Check for empty options.
+		 */
+		if (len == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "empty property name"));
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad property list")));
+		}
+
+		/*
+		 * Check all regular property names.
+		 */
+		c = props[len];
+		props[len] = '\0';
+
+		if (strcmp(props, "space") == 0) {
+			static char *spaceprops[] = {
+				"name", "avail", "used", "usedbysnapshots",
+				"usedbydataset", "usedbyrefreservation",
+				"usedbychildren", NULL
+			};
+			int i;
+
+			for (i = 0; spaceprops[i]; i++) {
+				if (addlist(hdl, spaceprops[i], listp, type))
+					return (-1);
+				listp = &(*listp)->pl_next;
+			}
+		} else {
+			if (addlist(hdl, props, listp, type))
+				return (-1);
+			listp = &(*listp)->pl_next;
+		}
+
+		props = p;
+		if (c == ',')
+			props++;
+	}
+
+	return (0);
+}
+
+void
+zprop_free_list(zprop_list_t *pl)
+{
+	zprop_list_t *next;
+
+	while (pl != NULL) {
+		next = pl->pl_next;
+		free(pl->pl_user_prop);
+		free(pl);
+		pl = next;
+	}
+}
+
+typedef struct expand_data {
+	zprop_list_t	**last;
+	libzfs_handle_t	*hdl;
+	zfs_type_t type;
+} expand_data_t;
+
+int
+zprop_expand_list_cb(int prop, void *cb)
+{
+	zprop_list_t *entry;
+	expand_data_t *edp = cb;
+
+	if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL)
+		return (ZPROP_INVAL);
+
+	entry->pl_prop = prop;
+	entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type);
+	entry->pl_all = B_TRUE;
+
+	*(edp->last) = entry;
+	edp->last = &entry->pl_next;
+
+	return (ZPROP_CONT);
+}
+
+int
+zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type)
+{
+	zprop_list_t *entry;
+	zprop_list_t **last;
+	expand_data_t exp;
+
+	if (*plp == NULL) {
+		/*
+		 * If this is the very first time we've been called for an 'all'
+		 * specification, expand the list to include all native
+		 * properties.
+		 */
+		last = plp;
+
+		exp.last = last;
+		exp.hdl = hdl;
+		exp.type = type;
+
+		if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE,
+		    B_FALSE, type) == ZPROP_INVAL)
+			return (-1);
+
+		/*
+		 * Add 'name' to the beginning of the list, which is handled
+		 * specially.
+		 */
+		if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
+			return (-1);
+
+		entry->pl_prop = (type == ZFS_TYPE_POOL) ?  ZPOOL_PROP_NAME :
+		    ZFS_PROP_NAME;
+		entry->pl_width = zprop_width(entry->pl_prop,
+		    &entry->pl_fixed, type);
+		entry->pl_all = B_TRUE;
+		entry->pl_next = *plp;
+		*plp = entry;
+	}
+	return (0);
+}
+
+int
+zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered,
+    zfs_type_t type)
+{
+	return (zprop_iter_common(func, cb, show_all, ordered, type));
+}
author	Brian Behlendorf <[email protected]>	2008-12-11 11:25:42 -0800
committer	Brian Behlendorf <[email protected]>	2008-12-11 11:25:42 -0800
commit	756a122d375586b0fd2d2ac34e7a6d1e7404d977 (patch)
tree	278cb353aadac7f40cd6b51d5690fb777bfcabdf /lib/libzfs
parent	764d9b1916581d3223ae504278e0d660323577c2 (diff)
parent	11cf0f3f8c85345c26f029626bc8eaff518cb9f4 (diff)