Move the world out of /zfs/ and seperate out module build tree

author: Brian Behlendorf <[email protected]> 2008-12-11 11:08:09 -0800
committer: Brian Behlendorf <[email protected]> 2008-12-11 11:08:09 -0800
commit: 172bb4bd5e4afef721dd4d2972d8680d983f144b (patch)
tree: 18ab1e97e5e409150066c529b5a981ecf600ef80 /lib
parent: 9e8b1e836caa454586797f771a7ad1817ebae315 (diff)
35 files changed, 23576 insertions, 0 deletions
diff --git a/lib/libspl/libspl/include/sys/list.h b/lib/libspl/libspl/include/sys/list.h
new file mode 100644
index 000000000..8339b6226
--- /dev/null
+++ b/lib/libspl/libspl/include/sys/list.h
@@ -0,0 +1,67 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_LIST_H
+#define	_SYS_LIST_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/list_impl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct list_node list_node_t;
+typedef struct list list_t;
+
+void list_create(list_t *, size_t, size_t);
+void list_destroy(list_t *);
+
+void list_insert_after(list_t *, void *, void *);
+void list_insert_before(list_t *, void *, void *);
+void list_insert_head(list_t *, void *);
+void list_insert_tail(list_t *, void *);
+void list_remove(list_t *, void *);
+void *list_remove_head(list_t *);
+void *list_remove_tail(list_t *);
+void list_move_tail(list_t *, list_t *);
+
+void *list_head(list_t *);
+void *list_tail(list_t *);
+void *list_next(list_t *, void *);
+void *list_prev(list_t *, void *);
+int list_is_empty(list_t *);
+
+void list_link_init(list_node_t *);
+void list_link_replace(list_node_t *, list_node_t *);
+
+int list_link_active(list_node_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LIST_H */
diff --git a/lib/libspl/libspl/include/sys/list_impl.h b/lib/libspl/libspl/include/sys/list_impl.h
new file mode 100644
index 000000000..9c42f8832
--- /dev/null
+++ b/lib/libspl/libspl/include/sys/list_impl.h
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_LIST_IMPL_H
+#define	_SYS_LIST_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+struct list_node {
+	struct list_node *list_next;
+	struct list_node *list_prev;
+};
+
+struct list {
+	size_t	list_size;
+	size_t	list_offset;
+	struct list_node list_head;
+};
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LIST_IMPL_H */
diff --git a/lib/libspl/libspl/list.c b/lib/libspl/libspl/list.c
new file mode 100644
index 000000000..e8db13a5c
--- /dev/null
+++ b/lib/libspl/libspl/list.c
@@ -0,0 +1,245 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/list.h>
+#include <sys/list_impl.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+
+#define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define	list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+#define	list_insert_after_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_prev = (node);			\
+	lnew->list_next = (node)->list_next;		\
+	(node)->list_next->list_prev = lnew;		\
+	(node)->list_next = lnew;			\
+}
+
+#define	list_insert_before_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_next = (node);			\
+	lnew->list_prev = (node)->list_prev;		\
+	(node)->list_prev->list_next = lnew;		\
+	(node)->list_prev = lnew;			\
+}
+
+#define	list_remove_node(node)					\
+	(node)->list_prev->list_next = (node)->list_next;	\
+	(node)->list_next->list_prev = (node)->list_prev;	\
+	(node)->list_next = (node)->list_prev = NULL
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+	ASSERT(list);
+	ASSERT(size > 0);
+	ASSERT(size >= offset + sizeof (list_node_t));
+
+	list->list_size = size;
+	list->list_offset = offset;
+	list->list_head.list_next = list->list_head.list_prev =
+	    &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+	list_node_t *node = &list->list_head;
+
+	ASSERT(list);
+	ASSERT(list->list_head.list_next == node);
+	ASSERT(list->list_head.list_prev == node);
+
+	node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_head(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_after_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_tail(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_before_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+	list_node_t *lold = list_d2l(list, object);
+	ASSERT(!list_empty(list));
+	ASSERT(lold->list_next != NULL);
+	list_remove_node(lold);
+}
+
+void *
+list_remove_head(list_t *list)
+{
+	list_node_t *head = list->list_head.list_next;
+	if (head == &list->list_head)
+		return (NULL);
+	list_remove_node(head);
+	return (list_object(list, head));
+}
+
+void *
+list_remove_tail(list_t *list)
+{
+	list_node_t *tail = list->list_head.list_prev;
+	if (tail == &list->list_head)
+		return (NULL);
+	list_remove_node(tail);
+	return (list_object(list, tail));
+}
+
+void *
+list_head(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_next != &list->list_head)
+		return (list_object(list, node->list_next));
+
+	return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_prev != &list->list_head)
+		return (list_object(list, node->list_prev));
+
+	return (NULL);
+}
+
+/*
+ *  Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+	list_node_t *dstnode = &dst->list_head;
+	list_node_t *srcnode = &src->list_head;
+
+	ASSERT(dst->list_size == src->list_size);
+	ASSERT(dst->list_offset == src->list_offset);
+
+	if (list_empty(src))
+		return;
+
+	dstnode->list_prev->list_next = srcnode->list_next;
+	srcnode->list_next->list_prev = dstnode->list_prev;
+	dstnode->list_prev = srcnode->list_prev;
+	srcnode->list_prev->list_next = dstnode;
+
+	/* empty src list */
+	srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+void
+list_link_replace(list_node_t *lold, list_node_t *lnew)
+{
+	ASSERT(list_link_active(lold));
+	ASSERT(!list_link_active(lnew));
+
+	lnew->list_next = lold->list_next;
+	lnew->list_prev = lold->list_prev;
+	lold->list_prev->list_next = lnew;
+	lold->list_next->list_prev = lnew;
+	lold->list_next = lold->list_prev = NULL;
+}
+
+void
+list_link_init(list_node_t *link)
+{
+	link->list_next = NULL;
+	link->list_prev = NULL;
+}
+
+int
+list_link_active(list_node_t *link)
+{
+	return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+	return (list_empty(list));
+}
diff --git a/lib/libspl/libspl/mkdirp.c b/lib/libspl/libspl/mkdirp.c
new file mode 100644
index 000000000..9c81f2a0b
--- /dev/null
+++ b/lib/libspl/libspl/mkdirp.c
@@ -0,0 +1,212 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1988 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Creates directory and it's parents if the parents do not
+ * exist yet.
+ *
+ * Returns -1 if fails for reasons other than non-existing
+ * parents.
+ * Does NOT simplify pathnames with . or .. in them.
+ */
+
+#include <sys/types.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+
+static char *simplify(const char *str);
+
+int
+mkdirp(const char *d, mode_t mode)
+{
+	char  *endptr, *ptr, *slash, *str;
+
+	str = simplify(d);
+
+	/* If space couldn't be allocated for the simplified names, return. */
+
+	if (str == NULL)
+		return (-1);
+
+		/* Try to make the directory */
+
+	if (mkdir(str, mode) == 0) {
+		free(str);
+		return (0);
+	}
+	if (errno != ENOENT) {
+		free(str);
+		return (-1);
+	}
+	endptr = strrchr(str, '\0');
+	slash = strrchr(str, '/');
+
+		/* Search upward for the non-existing parent */
+
+	while (slash != NULL) {
+
+		ptr = slash;
+		*ptr = '\0';
+
+			/* If reached an existing parent, break */
+
+		if (access(str, F_OK) == 0)
+			break;
+
+			/* If non-existing parent */
+
+		else {
+			slash = strrchr(str, '/');
+
+				/* If under / or current directory, make it. */
+
+			if (slash == NULL || slash == str) {
+				if (mkdir(str, mode) != 0 && errno != EEXIST) {
+					free(str);
+					return (-1);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Create directories starting from upmost non-existing parent */
+
+	while ((ptr = strchr(str, '\0')) != endptr) {
+		*ptr = '/';
+		if (mkdir(str, mode) != 0 && errno != EEXIST) {
+			/*
+			 *  If the mkdir fails because str already
+			 *  exists (EEXIST), then str has the form
+			 *  "existing-dir/..", and this is really
+			 *  ok. (Remember, this loop is creating the
+			 *  portion of the path that didn't exist)
+			 */
+			free(str);
+			return (-1);
+		}
+	}
+	free(str);
+	return (0);
+}
+
+/*
+ *	simplify - given a pathname, simplify that path by removing
+ *		   duplicate contiguous slashes.
+ *
+ *		   A simplified copy of the argument is returned to the
+ *		   caller, or NULL is returned on error.
+ *
+ *		   The caller should handle error reporting based upon the
+ *		   returned vlaue, and should free the returned value,
+ *		   when appropriate.
+ */
+
+static char *
+simplify(const char *str)
+{
+	int i;
+	size_t mbPathlen;	/* length of multi-byte path */
+	size_t wcPathlen;	/* length of wide-character path */
+	wchar_t *wptr;		/* scratch pointer */
+	wchar_t *wcPath;	/* wide-character version of the path */
+	char *mbPath;		/* The copy fo the path to be returned */
+
+	/*
+	 *  bail out if there is nothing there.
+	 */
+
+	if (!str)
+		return (NULL);
+
+	/*
+	 *  Get a copy of the argument.
+	 */
+
+	if ((mbPath = strdup(str)) == NULL) {
+		return (NULL);
+	}
+
+	/*
+	 *  convert the multi-byte version of the path to a
+	 *  wide-character rendering, for doing our figuring.
+	 */
+
+	mbPathlen = strlen(mbPath);
+
+	if ((wcPath = calloc(sizeof (wchar_t), mbPathlen+1)) == NULL) {
+		free(mbPath);
+		return (NULL);
+	}
+
+	if ((wcPathlen = mbstowcs(wcPath, mbPath, mbPathlen)) == (size_t)-1) {
+		free(mbPath);
+		free(wcPath);
+		return (NULL);
+	}
+
+	/*
+	 *  remove duplicate slashes first ("//../" -> "/")
+	 */
+
+	for (wptr = wcPath, i = 0; i < wcPathlen; i++) {
+		*wptr++ = wcPath[i];
+
+		if (wcPath[i] == '/') {
+			i++;
+
+			while (wcPath[i] == '/') {
+				i++;
+			}
+
+			i--;
+		}
+	}
+
+	*wptr = '\0';
+
+	/*
+	 *  now convert back to the multi-byte format.
+	 */
+
+	if (wcstombs(mbPath, wcPath, mbPathlen) == (size_t)-1) {
+		free(mbPath);
+		free(wcPath);
+		return (NULL);
+	}
+
+	free(wcPath);
+	return (mbPath);
+}
diff --git a/lib/libspl/libspl/strlcat.c b/lib/libspl/libspl/strlcat.c
new file mode 100644
index 000000000..07d1403dd
--- /dev/null
+++ b/lib/libspl/libspl/strlcat.c
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "lint.h"
+#include <string.h>
+#include <sys/types.h>
+
+/*
+ * Appends src to the dstsize buffer at dst. The append will never
+ * overflow the destination buffer and the buffer will always be null
+ * terminated. Never reference beyond &dst[dstsize-1] when computing
+ * the length of the pre-existing string.
+ */
+
+size_t
+strlcat(char *dst, const char *src, size_t dstsize)
+{
+	char *df = dst;
+	size_t left = dstsize;
+	size_t l1;
+	size_t l2 = strlen(src);
+	size_t copied;
+
+	while (left-- != 0 && *df != '\0')
+		df++;
+	l1 = df - dst;
+	if (dstsize == l1)
+		return (l1 + l2);
+
+	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
+	(void) memcpy(dst + l1, src, copied);
+	dst[l1+copied] = '\0';
+	return (l1 + l2);
+}
diff --git a/lib/libspl/libspl/strlcpy.c b/lib/libspl/libspl/strlcpy.c
new file mode 100644
index 000000000..7a8009b89
--- /dev/null
+++ b/lib/libspl/libspl/strlcpy.c
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "lint.h"
+#include <string.h>
+#include <sys/types.h>
+
+/*
+ * Copies src to the dstsize buffer at dst. The copy will never
+ * overflow the destination buffer and the buffer will always be null
+ * terminated.
+ */
+
+size_t
+strlcpy(char *dst, const char *src, size_t len)
+{
+	size_t slen = strlen(src);
+	size_t copied;
+
+	if (len == 0)
+		return (slen);
+
+	if (slen >= len)
+		copied = len - 1;
+	else
+		copied = slen;
+	(void) memcpy(dst, src, copied);
+	dst[copied] = '\0';
+	return (slen);
+}
diff --git a/lib/libspl/libspl/strnlen.c b/lib/libspl/libspl/strnlen.c
new file mode 100644
index 000000000..605245b6b
--- /dev/null
+++ b/lib/libspl/libspl/strnlen.c
@@ -0,0 +1,47 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.
+ * All rights reserved.  Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "lint.h"
+#include <string.h>
+#include <sys/types.h>
+
+/*
+ * Returns the number of non-NULL bytes in string argument,
+ * but not more than maxlen.  Does not look past str + maxlen.
+ */
+size_t
+strnlen(const char *str, size_t maxlen)
+{
+	const char *ptr;
+
+	ptr = memchr(str, 0, maxlen);
+	if (ptr == NULL)
+		return (maxlen);
+
+	return (ptr - str);
+}
diff --git a/lib/libspl/libspl/u8_textprep.c b/lib/libspl/libspl/u8_textprep.c
new file mode 100644
index 000000000..8faf1a97e
--- /dev/null
+++ b/lib/libspl/libspl/u8_textprep.c
@@ -0,0 +1,2132 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+/*
+ * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458).
+ *
+ * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F),
+ * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also
+ * the section 3C man pages.
+ * Interface stability: Committed.
+ */
+
+#include <sys/types.h>
+#ifdef	_KERNEL
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#else
+#include <sys/u8_textprep.h>
+#include <strings.h>
+#endif	/* _KERNEL */
+#include <sys/byteorder.h>
+#include <sys/errno.h>
+#include <sys/u8_textprep_data.h>
+
+
+/* The maximum possible number of bytes in a UTF-8 character. */
+#define	U8_MB_CUR_MAX			(4)
+
+/*
+ * The maximum number of bytes needed for a UTF-8 character to cover
+ * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2.
+ */
+#define	U8_MAX_BYTES_UCS2		(3)
+
+/* The maximum possible number of bytes in a Stream-Safe Text. */
+#define	U8_STREAM_SAFE_TEXT_MAX		(128)
+
+/*
+ * The maximum number of characters in a combining/conjoining sequence and
+ * the actual upperbound limit of a combining/conjoining sequence.
+ */
+#define	U8_MAX_CHARS_A_SEQ		(32)
+#define	U8_UPPER_LIMIT_IN_A_SEQ		(31)
+
+/* The combining class value for Starter. */
+#define	U8_COMBINING_CLASS_STARTER	(0)
+
+/*
+ * Some Hangul related macros at below.
+ *
+ * The first and the last of Hangul syllables, Hangul Jamo Leading consonants,
+ * Vowels, and optional Trailing consonants in Unicode scalar values.
+ *
+ * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not
+ * the actual U+11A8. This is due to that the trailing consonant is optional
+ * and thus we are doing a pre-calculation of subtracting one.
+ *
+ * Each of 19 modern leading consonants has total 588 possible syllables since
+ * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for
+ * no trailing consonant case, i.e., 21 x 28 = 588.
+ *
+ * We also have bunch of Hangul related macros at below. Please bear in mind
+ * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is
+ * a Hangul Jamo or not but the value does not guarantee that it is a Hangul
+ * Jamo; it just guarantee that it will be most likely.
+ */
+#define	U8_HANGUL_SYL_FIRST		(0xAC00U)
+#define	U8_HANGUL_SYL_LAST		(0xD7A3U)
+
+#define	U8_HANGUL_JAMO_L_FIRST		(0x1100U)
+#define	U8_HANGUL_JAMO_L_LAST		(0x1112U)
+#define	U8_HANGUL_JAMO_V_FIRST		(0x1161U)
+#define	U8_HANGUL_JAMO_V_LAST		(0x1175U)
+#define	U8_HANGUL_JAMO_T_FIRST		(0x11A7U)
+#define	U8_HANGUL_JAMO_T_LAST		(0x11C2U)
+
+#define	U8_HANGUL_V_COUNT		(21)
+#define	U8_HANGUL_VT_COUNT		(588)
+#define	U8_HANGUL_T_COUNT		(28)
+
+#define	U8_HANGUL_JAMO_1ST_BYTE		(0xE1U)
+
+#define	U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \
+	(s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \
+	(s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \
+	(s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU));
+
+#define	U8_HANGUL_JAMO_L(u) \
+	((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST)
+
+#define	U8_HANGUL_JAMO_V(u) \
+	((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST)
+
+#define	U8_HANGUL_JAMO_T(u) \
+	((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
+
+#define	U8_HANGUL_JAMO(u) \
+	((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
+
+#define	U8_HANGUL_SYLLABLE(u) \
+	((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST)
+
+#define	U8_HANGUL_COMPOSABLE_L_V(s, u) \
+	((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u)))
+
+#define	U8_HANGUL_COMPOSABLE_LV_T(s, u) \
+	((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u)))
+
+/* The types of decomposition mappings. */
+#define	U8_DECOMP_BOTH			(0xF5U)
+#define	U8_DECOMP_CANONICAL		(0xF6U)
+
+/* The indicator for 16-bit table. */
+#define	U8_16BIT_TABLE_INDICATOR	(0x8000U)
+
+/* The following are some convenience macros. */
+#define	U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3) \
+	(u) = ((uint32_t)(b1) & 0x0F) << 12 | ((uint32_t)(b2) & 0x3F) << 6 | \
+		(uint32_t)(b3) & 0x3F;
+
+#define	U8_SIMPLE_SWAP(a, b, t) \
+	(t) = (a); \
+	(a) = (b); \
+	(b) = (t);
+
+#define	U8_ASCII_TOUPPER(c) \
+	(((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c))
+
+#define	U8_ASCII_TOLOWER(c) \
+	(((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c))
+
+#define	U8_ISASCII(c)			(((uchar_t)(c)) < 0x80U)
+/*
+ * The following macro assumes that the two characters that are to be
+ * swapped are adjacent to each other and 'a' comes before 'b'.
+ *
+ * If the assumptions are not met, then, the macro will fail.
+ */
+#define	U8_SWAP_COMB_MARKS(a, b) \
+	for (k = 0; k < disp[(a)]; k++) \
+		u8t[k] = u8s[start[(a)] + k]; \
+	for (k = 0; k < disp[(b)]; k++) \
+		u8s[start[(a)] + k] = u8s[start[(b)] + k]; \
+	start[(b)] = start[(a)] + disp[(b)]; \
+	for (k = 0; k < disp[(a)]; k++) \
+		u8s[start[(b)] + k] = u8t[k]; \
+	U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \
+	U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc);
+
+/* The possible states during normalization. */
+typedef enum {
+	U8_STATE_START = 0,
+	U8_STATE_HANGUL_L = 1,
+	U8_STATE_HANGUL_LV = 2,
+	U8_STATE_HANGUL_LVT = 3,
+	U8_STATE_HANGUL_V = 4,
+	U8_STATE_HANGUL_T = 5,
+	U8_STATE_COMBINING_MARK = 6
+} u8_normalization_states_t;
+
+/*
+ * The three vectors at below are used to check bytes of a given UTF-8
+ * character are valid and not containing any malformed byte values.
+ *
+ * We used to have a quite relaxed UTF-8 binary representation but then there
+ * was some security related issues and so the Unicode Consortium defined
+ * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it
+ * one more time at the Unicode 3.2. The following three tables are based on
+ * that.
+ */
+
+#define	U8_ILLEGAL_NEXT_BYTE_COMMON(c)	((c) < 0x80 || (c) > 0xBF)
+
+#define	I_				U8_ILLEGAL_CHAR
+#define	O_				U8_OUT_OF_RANGE_CHAR
+
+const int8_t u8_number_of_bytes[0x100] = {
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+
+/*	80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*  	90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*  	A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*	B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*	C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF  */
+	I_, I_, 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+
+/*	D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF  */
+	2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+
+/*	E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF  */
+	3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+
+/*	F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF  */
+	4,  4,  4,  4,  4,  O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_,
+};
+
+#undef	I_
+#undef	O_
+
+const uint8_t u8_valid_min_2nd_byte[0x100] = {
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+/*	C0    C1    C2    C3    C4    C5    C6    C7    */
+	0,    0,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	C8    C9    CA    CB    CC    CD    CE    CF    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	D0    D1    D2    D3    D4    D5    D6    D7    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	D8    D9    DA    DB    DC    DD    DE    DF    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	E0    E1    E2    E3    E4    E5    E6    E7    */
+	0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	E8    E9    EA    EB    EC    ED    EE    EF    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	F0    F1    F2    F3    F4    F5    F6    F7    */
+	0x90, 0x80, 0x80, 0x80, 0x80, 0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+};
+
+const uint8_t u8_valid_max_2nd_byte[0x100] = {
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+/*	C0    C1    C2    C3    C4    C5    C6    C7    */
+	0,    0,    0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	C8    C9    CA    CB    CC    CD    CE    CF    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	D0    D1    D2    D3    D4    D5    D6    D7    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	D8    D9    DA    DB    DC    DD    DE    DF    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	E0    E1    E2    E3    E4    E5    E6    E7    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	E8    E9    EA    EB    EC    ED    EE    EF    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
+/*	F0    F1    F2    F3    F4    F5    F6    F7    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+};
+
+
+/*
+ * The u8_validate() validates on the given UTF-8 character string and
+ * calculate the byte length. It is quite similar to mblen(3C) except that
+ * this will validate against the list of characters if required and
+ * specific to UTF-8 and Unicode.
+ */
+int
+u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum)
+{
+	uchar_t *ib;
+	uchar_t *ibtail;
+	uchar_t **p;
+	uchar_t *s1;
+	uchar_t *s2;
+	uchar_t f;
+	int sz;
+	size_t i;
+	int ret_val;
+	boolean_t second;
+	boolean_t no_need_to_validate_entire;
+	boolean_t check_additional;
+	boolean_t validate_ucs2_range_only;
+
+	if (! u8str)
+		return (0);
+
+	ib = (uchar_t *)u8str;
+	ibtail = ib + n;
+
+	ret_val = 0;
+
+	no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE);
+	check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL;
+	validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE;
+
+	while (ib < ibtail) {
+		/*
+		 * The first byte of a UTF-8 character tells how many
+		 * bytes will follow for the character. If the first byte
+		 * is an illegal byte value or out of range value, we just
+		 * return -1 with an appropriate error number.
+		 */
+		sz = u8_number_of_bytes[*ib];
+		if (sz == U8_ILLEGAL_CHAR) {
+			*errnum = EILSEQ;
+			return (-1);
+		}
+
+		if (sz == U8_OUT_OF_RANGE_CHAR ||
+		    (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) {
+			*errnum = ERANGE;
+			return (-1);
+		}
+
+		/*
+		 * If we don't have enough bytes to check on, that's also
+		 * an error. As you can see, we give illegal byte sequence
+		 * checking higher priority then EINVAL cases.
+		 */
+		if ((ibtail - ib) < sz) {
+			*errnum = EINVAL;
+			return (-1);
+		}
+
+		if (sz == 1) {
+			ib++;
+			ret_val++;
+		} else {
+			/*
+			 * Check on the multi-byte UTF-8 character. For more
+			 * details on this, see comment added for the used
+			 * data structures at the beginning of the file.
+			 */
+			f = *ib++;
+			ret_val++;
+			second = B_TRUE;
+			for (i = 1; i < sz; i++) {
+				if (second) {
+					if (*ib < u8_valid_min_2nd_byte[f] ||
+					    *ib > u8_valid_max_2nd_byte[f]) {
+						*errnum = EILSEQ;
+						return (-1);
+					}
+					second = B_FALSE;
+				} else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) {
+					*errnum = EILSEQ;
+					return (-1);
+				}
+				ib++;
+				ret_val++;
+			}
+		}
+
+		if (check_additional) {
+			for (p = (uchar_t **)list, i = 0; p[i]; i++) {
+				s1 = ib - sz;
+				s2 = p[i];
+				while (s1 < ib) {
+					if (*s1 != *s2 || *s2 == '\0')
+						break;
+					s1++;
+					s2++;
+				}
+
+				if (s1 >= ib && *s2 == '\0') {
+					*errnum = EBADF;
+					return (-1);
+				}
+			}
+		}
+
+		if (no_need_to_validate_entire)
+			break;
+	}
+
+	return (ret_val);
+}
+
+/*
+ * The do_case_conv() looks at the mapping tables and returns found
+ * bytes if any. If not found, the input bytes are returned. The function
+ * always terminate the return bytes with a null character assuming that
+ * there are plenty of room to do so.
+ *
+ * The case conversions are simple case conversions mapping a character to
+ * another character as specified in the Unicode data. The byte size of
+ * the mapped character could be different from that of the input character.
+ *
+ * The return value is the byte length of the returned character excluding
+ * the terminating null byte.
+ */
+static size_t
+do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper)
+{
+	size_t i;
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b3_tbl;
+	uint16_t b3_base;
+	uint16_t b4 = 0;
+	size_t start_id;
+	size_t end_id;
+
+	/*
+	 * At this point, the only possible values for sz are 2, 3, and 4.
+	 * The u8s should point to a vector that is well beyond the size of
+	 * 5 bytes.
+	 */
+	if (sz == 2) {
+		b3 = u8s[0] = s[0];
+		b4 = u8s[1] = s[1];
+	} else if (sz == 3) {
+		b2 = u8s[0] = s[0];
+		b3 = u8s[1] = s[1];
+		b4 = u8s[2] = s[2];
+	} else if (sz == 4) {
+		b1 = u8s[0] = s[0];
+		b2 = u8s[1] = s[1];
+		b3 = u8s[2] = s[2];
+		b4 = u8s[3] = s[3];
+	} else {
+		/* This is not possible but just in case as a fallback. */
+		if (is_it_toupper)
+			*u8s = U8_ASCII_TOUPPER(*s);
+		else
+			*u8s = U8_ASCII_TOLOWER(*s);
+		u8s[1] = '\0';
+
+		return (1);
+	}
+	u8s[sz] = '\0';
+
+	/*
+	 * Let's find out if we have a corresponding character.
+	 */
+	b1 = u8_common_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	b2 = u8_case_common_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	if (is_it_toupper) {
+		b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id;
+		if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+			return ((size_t)sz);
+
+		start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1];
+
+		/* Either there is no match or an error at the table. */
+		if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
+			return ((size_t)sz);
+
+		b3_base = u8_toupper_b3_tbl[uv][b2][b3].base;
+
+		for (i = 0; start_id < end_id; start_id++)
+			u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id];
+	} else {
+		b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id;
+		if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+			return ((size_t)sz);
+
+		start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1];
+
+		if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
+			return ((size_t)sz);
+
+		b3_base = u8_tolower_b3_tbl[uv][b2][b3].base;
+
+		for (i = 0; start_id < end_id; start_id++)
+			u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id];
+	}
+
+	/*
+	 * If i is still zero, that means there is no corresponding character.
+	 */
+	if (i == 0)
+		return ((size_t)sz);
+
+	u8s[i] = '\0';
+
+	return (i);
+}
+
+/*
+ * The do_case_compare() function compares the two input strings, s1 and s2,
+ * one character at a time doing case conversions if applicable and return
+ * the comparison result as like strcmp().
+ *
+ * Since, in empirical sense, most of text data are 7-bit ASCII characters,
+ * we treat the 7-bit ASCII characters as a special case trying to yield
+ * faster processing time.
+ */
+static int
+do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1,
+	size_t n2, boolean_t is_it_toupper, int *errnum)
+{
+	int f;
+	int sz1;
+	int sz2;
+	size_t j;
+	size_t i1;
+	size_t i2;
+	uchar_t u8s1[U8_MB_CUR_MAX + 1];
+	uchar_t u8s2[U8_MB_CUR_MAX + 1];
+
+	i1 = i2 = 0;
+	while (i1 < n1 && i2 < n2) {
+		/*
+		 * Find out what would be the byte length for this UTF-8
+		 * character at string s1 and also find out if this is
+		 * an illegal start byte or not and if so, issue a proper
+		 * error number and yet treat this byte as a character.
+		 */
+		sz1 = u8_number_of_bytes[*s1];
+		if (sz1 < 0) {
+			*errnum = EILSEQ;
+			sz1 = 1;
+		}
+
+		/*
+		 * For 7-bit ASCII characters mainly, we do a quick case
+		 * conversion right at here.
+		 *
+		 * If we don't have enough bytes for this character, issue
+		 * an EINVAL error and use what are available.
+		 *
+		 * If we have enough bytes, find out if there is
+		 * a corresponding uppercase character and if so, copy over
+		 * the bytes for a comparison later. If there is no
+		 * corresponding uppercase character, then, use what we have
+		 * for the comparison.
+		 */
+		if (sz1 == 1) {
+			if (is_it_toupper)
+				u8s1[0] = U8_ASCII_TOUPPER(*s1);
+			else
+				u8s1[0] = U8_ASCII_TOLOWER(*s1);
+			s1++;
+			u8s1[1] = '\0';
+		} else if ((i1 + sz1) > n1) {
+			*errnum = EINVAL;
+			for (j = 0; (i1 + j) < n1; )
+				u8s1[j++] = *s1++;
+			u8s1[j] = '\0';
+		} else {
+			(void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper);
+			s1 += sz1;
+		}
+
+		/* Do the same for the string s2. */
+		sz2 = u8_number_of_bytes[*s2];
+		if (sz2 < 0) {
+			*errnum = EILSEQ;
+			sz2 = 1;
+		}
+
+		if (sz2 == 1) {
+			if (is_it_toupper)
+				u8s2[0] = U8_ASCII_TOUPPER(*s2);
+			else
+				u8s2[0] = U8_ASCII_TOLOWER(*s2);
+			s2++;
+			u8s2[1] = '\0';
+		} else if ((i2 + sz2) > n2) {
+			*errnum = EINVAL;
+			for (j = 0; (i2 + j) < n2; )
+				u8s2[j++] = *s2++;
+			u8s2[j] = '\0';
+		} else {
+			(void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper);
+			s2 += sz2;
+		}
+
+		/* Now compare the two characters. */
+		if (sz1 == 1 && sz2 == 1) {
+			if (*u8s1 > *u8s2)
+				return (1);
+			if (*u8s1 < *u8s2)
+				return (-1);
+		} else {
+			f = strcmp((const char *)u8s1, (const char *)u8s2);
+			if (f != 0)
+				return (f);
+		}
+
+		/*
+		 * They were the same. Let's move on to the next
+		 * characters then.
+		 */
+		i1 += sz1;
+		i2 += sz2;
+	}
+
+	/*
+	 * We compared until the end of either or both strings.
+	 *
+	 * If we reached to or went over the ends for the both, that means
+	 * they are the same.
+	 *
+	 * If we reached only one of the two ends, that means the other string
+	 * has something which then the fact can be used to determine
+	 * the return value.
+	 */
+	if (i1 >= n1) {
+		if (i2 >= n2)
+			return (0);
+		return (-1);
+	}
+	return (1);
+}
+
+/*
+ * The combining_class() function checks on the given bytes and find out
+ * the corresponding Unicode combining class value. The return value 0 means
+ * it is a Starter. Any illegal UTF-8 character will also be treated as
+ * a Starter.
+ */
+static uchar_t
+combining_class(size_t uv, uchar_t *s, size_t sz)
+{
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b4 = 0;
+
+	if (sz == 1 || sz > 4)
+		return (0);
+
+	if (sz == 2) {
+		b3 = s[0];
+		b4 = s[1];
+	} else if (sz == 3) {
+		b2 = s[0];
+		b3 = s[1];
+		b4 = s[2];
+	} else if (sz == 4) {
+		b1 = s[0];
+		b2 = s[1];
+		b3 = s[2];
+		b4 = s[3];
+	}
+
+	b1 = u8_common_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return (0);
+
+	b2 = u8_combining_class_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return (0);
+
+	b3 = u8_combining_class_b3_tbl[uv][b2][b3];
+	if (b3 == U8_TBL_ELEMENT_NOT_DEF)
+		return (0);
+
+	return (u8_combining_class_b4_tbl[uv][b3][b4]);
+}
+
+/*
+ * The do_decomp() function finds out a matching decomposition if any
+ * and return. If there is no match, the input bytes are copied and returned.
+ * The function also checks if there is a Hangul, decomposes it if necessary
+ * and returns.
+ *
+ * To save time, a single byte 7-bit ASCII character should be handled by
+ * the caller.
+ *
+ * The function returns the number of bytes returned sans always terminating
+ * the null byte. It will also return a state that will tell if there was
+ * a Hangul character decomposed which then will be used by the caller.
+ */
+static size_t
+do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
+	boolean_t canonical_decomposition, u8_normalization_states_t *state)
+{
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b3_tbl;
+	uint16_t b3_base;
+	uint16_t b4 = 0;
+	size_t start_id;
+	size_t end_id;
+	size_t i;
+	uint32_t u1;
+
+	if (sz == 2) {
+		b3 = u8s[0] = s[0];
+		b4 = u8s[1] = s[1];
+		u8s[2] = '\0';
+	} else if (sz == 3) {
+		/* Convert it to a Unicode scalar value. */
+		U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]);
+
+		/*
+		 * If this is a Hangul syllable, we decompose it into
+		 * a leading consonant, a vowel, and an optional trailing
+		 * consonant and then return.
+		 */
+		if (U8_HANGUL_SYLLABLE(u1)) {
+			u1 -= U8_HANGUL_SYL_FIRST;
+
+			b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT;
+			b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT)
+			    / U8_HANGUL_T_COUNT;
+			b3 = u1 % U8_HANGUL_T_COUNT;
+
+			U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1);
+			U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2);
+			if (b3) {
+				b3 += U8_HANGUL_JAMO_T_FIRST;
+				U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3);
+
+				u8s[9] = '\0';
+				*state = U8_STATE_HANGUL_LVT;
+				return (9);
+			}
+
+			u8s[6] = '\0';
+			*state = U8_STATE_HANGUL_LV;
+			return (6);
+		}
+
+		b2 = u8s[0] = s[0];
+		b3 = u8s[1] = s[1];
+		b4 = u8s[2] = s[2];
+		u8s[3] = '\0';
+
+		/*
+		 * If this is a Hangul Jamo, we know there is nothing
+		 * further that we can decompose.
+		 */
+		if (U8_HANGUL_JAMO_L(u1)) {
+			*state = U8_STATE_HANGUL_L;
+			return (3);
+		}
+
+		if (U8_HANGUL_JAMO_V(u1)) {
+			if (*state == U8_STATE_HANGUL_L)
+				*state = U8_STATE_HANGUL_LV;
+			else
+				*state = U8_STATE_HANGUL_V;
+			return (3);
+		}
+
+		if (U8_HANGUL_JAMO_T(u1)) {
+			if (*state == U8_STATE_HANGUL_LV)
+				*state = U8_STATE_HANGUL_LVT;
+			else
+				*state = U8_STATE_HANGUL_T;
+			return (3);
+		}
+	} else if (sz == 4) {
+		b1 = u8s[0] = s[0];
+		b2 = u8s[1] = s[1];
+		b3 = u8s[2] = s[2];
+		b4 = u8s[3] = s[3];
+		u8s[4] = '\0';
+	} else {
+		/*
+		 * This is a fallback and should not happen if the function
+		 * was called properly.
+		 */
+		u8s[0] = s[0];
+		u8s[1] = '\0';
+		*state = U8_STATE_START;
+		return (1);
+	}
+
+	/*
+	 * At this point, this rountine does not know what it would get.
+	 * The caller should sort it out if the state isn't a Hangul one.
+	 */
+	*state = U8_STATE_START;
+
+	/* Try to find matching decomposition mapping byte sequence. */
+	b1 = u8_common_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	b2 = u8_decomp_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id;
+	if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	/*
+	 * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR
+	 * which is 0x8000, this means we couldn't fit the mappings into
+	 * the cardinality of a unsigned byte.
+	 */
+	if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
+		b3_tbl -= U8_16BIT_TABLE_INDICATOR;
+		start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4];
+		end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
+	} else {
+		start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1];
+	}
+
+	/* This also means there wasn't any matching decomposition. */
+	if (start_id >= end_id)
+		return ((size_t)sz);
+
+	/*
+	 * The final table for decomposition mappings has three types of
+	 * byte sequences depending on whether a mapping is for compatibility
+	 * decomposition, canonical decomposition, or both like the following:
+	 *
+	 * (1) Compatibility decomposition mappings:
+	 *
+	 *	+---+---+-...-+---+
+	 *	| B0| B1| ... | Bm|
+	 *	+---+---+-...-+---+
+	 *
+	 *	The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH).
+	 *
+	 * (2) Canonical decomposition mappings:
+	 *
+	 *	+---+---+---+-...-+---+
+	 *	| T | b0| b1| ... | bn|
+	 *	+---+---+---+-...-+---+
+	 *
+	 *	where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL).
+	 *
+	 * (3) Both mappings:
+	 *
+	 *	+---+---+---+---+-...-+---+---+---+-...-+---+
+	 *	| T | D | b0| b1| ... | bn| B0| B1| ... | Bm|
+	 *	+---+---+---+---+-...-+---+---+---+-...-+---+
+	 *
+	 *	where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement
+	 *	byte, b0 to bn are canonical mapping bytes and B0 to Bm are
+	 *	compatibility mapping bytes.
+	 *
+	 * Note that compatibility decomposition means doing recursive
+	 * decompositions using both compatibility decomposition mappings and
+	 * canonical decomposition mappings. On the other hand, canonical
+	 * decomposition means doing recursive decompositions using only
+	 * canonical decomposition mappings. Since the table we have has gone
+	 * through the recursions already, we do not need to do so during
+	 * runtime, i.e., the table has been completely flattened out
+	 * already.
+	 */
+
+	b3_base = u8_decomp_b3_tbl[uv][b2][b3].base;
+
+	/* Get the type, T, of the byte sequence. */
+	b1 = u8_decomp_final_tbl[uv][b3_base + start_id];
+
+	/*
+	 * If necessary, adjust start_id, end_id, or both. Note that if
+	 * this is compatibility decomposition mapping, there is no
+	 * adjustment.
+	 */
+	if (canonical_decomposition) {
+		/* Is the mapping only for compatibility decomposition? */
+		if (b1 < U8_DECOMP_BOTH)
+			return ((size_t)sz);
+
+		start_id++;
+
+		if (b1 == U8_DECOMP_BOTH) {
+			end_id = start_id +
+			    u8_decomp_final_tbl[uv][b3_base + start_id];
+			start_id++;
+		}
+	} else {
+		/*
+		 * Unless this is a compatibility decomposition mapping,
+		 * we adjust the start_id.
+		 */
+		if (b1 == U8_DECOMP_BOTH) {
+			start_id++;
+			start_id += u8_decomp_final_tbl[uv][b3_base + start_id];
+		} else if (b1 == U8_DECOMP_CANONICAL) {
+			start_id++;
+		}
+	}
+
+	for (i = 0; start_id < end_id; start_id++)
+		u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id];
+	u8s[i] = '\0';
+
+	return (i);
+}
+
+/*
+ * The find_composition_start() function uses the character bytes given and
+ * find out the matching composition mappings if any and return the address
+ * to the composition mappings as explained in the do_composition().
+ */
+static uchar_t *
+find_composition_start(size_t uv, uchar_t *s, size_t sz)
+{
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b3_tbl;
+	uint16_t b3_base;
+	uint16_t b4 = 0;
+	size_t start_id;
+	size_t end_id;
+
+	if (sz == 1) {
+		b4 = s[0];
+	} else if (sz == 2) {
+		b3 = s[0];
+		b4 = s[1];
+	} else if (sz == 3) {
+		b2 = s[0];
+		b3 = s[1];
+		b4 = s[2];
+	} else if (sz == 4) {
+		b1 = s[0];
+		b2 = s[1];
+		b3 = s[2];
+		b4 = s[3];
+	} else {
+		/*
+		 * This is a fallback and should not happen if the function
+		 * was called properly.
+		 */
+		return (NULL);
+	}
+
+	b1 = u8_composition_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return (NULL);
+
+	b2 = u8_composition_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return (NULL);
+
+	b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id;
+	if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+		return (NULL);
+
+	if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
+		b3_tbl -= U8_16BIT_TABLE_INDICATOR;
+		start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4];
+		end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
+	} else {
+		start_id = u8_composition_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1];
+	}
+
+	if (start_id >= end_id)
+		return (NULL);
+
+	b3_base = u8_composition_b3_tbl[uv][b2][b3].base;
+
+	return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id]));
+}
+
+/*
+ * The blocked() function checks on the combining class values of previous
+ * characters in this sequence and return whether it is blocked or not.
+ */
+static boolean_t
+blocked(uchar_t *comb_class, size_t last)
+{
+	uchar_t my_comb_class;
+	size_t i;
+
+	my_comb_class = comb_class[last];
+	for (i = 1; i < last; i++)
+		if (comb_class[i] >= my_comb_class ||
+		    comb_class[i] == U8_COMBINING_CLASS_STARTER)
+			return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * The do_composition() reads the character string pointed by 's' and
+ * do necessary canonical composition and then copy over the result back to
+ * the 's'.
+ *
+ * The input argument 's' cannot contain more than 32 characters.
+ */
+static size_t
+do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
+	uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
+{
+	uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t tc[U8_MB_CUR_MAX];
+	uint8_t saved_marks[U8_MAX_CHARS_A_SEQ];
+	size_t saved_marks_count;
+	uchar_t *p;
+	uchar_t *saved_p;
+	uchar_t *q;
+	size_t i;
+	size_t saved_i;
+	size_t j;
+	size_t k;
+	size_t l;
+	size_t C;
+	size_t saved_l;
+	size_t size;
+	uint32_t u1;
+	uint32_t u2;
+	boolean_t match_not_found = B_TRUE;
+
+	/*
+	 * This should never happen unless the callers are doing some strange
+	 * and unexpected things.
+	 *
+	 * The "last" is the index pointing to the last character not last + 1.
+	 */
+	if (last >= U8_MAX_CHARS_A_SEQ)
+		last = U8_UPPER_LIMIT_IN_A_SEQ;
+
+	for (i = l = 0; i <= last; i++) {
+		/*
+		 * The last or any non-Starters at the beginning, we don't
+		 * have any chance to do composition and so we just copy them
+		 * to the temporary buffer.
+		 */
+		if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) {
+SAVE_THE_CHAR:
+			p = s + start[i];
+			size = disp[i];
+			for (k = 0; k < size; k++)
+				t[l++] = *p++;
+			continue;
+		}
+
+		/*
+		 * If this could be a start of Hangul Jamos, then, we try to
+		 * conjoin them.
+		 */
+		if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) {
+			U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]],
+			    s[start[i] + 1], s[start[i] + 2]);
+			U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3],
+			    s[start[i] + 4], s[start[i] + 5]);
+
+			if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) {
+				u1 -= U8_HANGUL_JAMO_L_FIRST;
+				u2 -= U8_HANGUL_JAMO_V_FIRST;
+				u1 = U8_HANGUL_SYL_FIRST +
+				    (u1 * U8_HANGUL_V_COUNT + u2) *
+				    U8_HANGUL_T_COUNT;
+
+				i += 2;
+				if (i <= last) {
+					U8_PUT_3BYTES_INTO_UTF32(u2,
+					    s[start[i]], s[start[i] + 1],
+					    s[start[i] + 2]);
+
+					if (U8_HANGUL_JAMO_T(u2)) {
+						u1 += u2 -
+						    U8_HANGUL_JAMO_T_FIRST;
+						i++;
+					}
+				}
+
+				U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1);
+				i--;
+				l += 3;
+				continue;
+			}
+		}
+
+		/*
+		 * Let's then find out if this Starter has composition
+		 * mapping.
+		 */
+		p = find_composition_start(uv, s + start[i], disp[i]);
+		if (p == NULL)
+			goto SAVE_THE_CHAR;
+
+		/*
+		 * We have a Starter with composition mapping and the next
+		 * character is a non-Starter. Let's try to find out if
+		 * we can do composition.
+		 */
+
+		saved_p = p;
+		saved_i = i;
+		saved_l = l;
+		saved_marks_count = 0;
+
+TRY_THE_NEXT_MARK:
+		q = s + start[++i];
+		size = disp[i];
+
+		/*
+		 * The next for() loop compares the non-Starter pointed by
+		 * 'q' with the possible (joinable) characters pointed by 'p'.
+		 *
+		 * The composition final table entry pointed by the 'p'
+		 * looks like the following:
+		 *
+		 * +---+---+---+-...-+---+---+---+---+-...-+---+---+
+		 * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F |
+		 * +---+---+---+-...-+---+---+---+---+-...-+---+---+
+		 *
+		 * where C is the count byte indicating the number of
+		 * mapping pairs where each pair would be look like
+		 * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second
+		 * character of a canonical decomposition and the B0-Bm are
+		 * the bytes of a matching composite character. The F is
+		 * a filler byte after each character as the separator.
+		 */
+
+		match_not_found = B_TRUE;
+
+		for (C = *p++; C > 0; C--) {
+			for (k = 0; k < size; p++, k++)
+				if (*p != q[k])
+					break;
+
+			/* Have we found it? */
+			if (k >= size && *p == U8_TBL_ELEMENT_FILLER) {
+				match_not_found = B_FALSE;
+
+				l = saved_l;
+
+				while (*++p != U8_TBL_ELEMENT_FILLER)
+					t[l++] = *p;
+
+				break;
+			}
+
+			/* We didn't find; skip to the next pair. */
+			if (*p != U8_TBL_ELEMENT_FILLER)
+				while (*++p != U8_TBL_ELEMENT_FILLER)
+					;
+			while (*++p != U8_TBL_ELEMENT_FILLER)
+				;
+			p++;
+		}
+
+		/*
+		 * If there was no match, we will need to save the combining
+		 * mark for later appending. After that, if the next one
+		 * is a non-Starter and not blocked, then, we try once
+		 * again to do composition with the next non-Starter.
+		 *
+		 * If there was no match and this was a Starter, then,
+		 * this is a new start.
+		 *
+		 * If there was a match and a composition done and we have
+		 * more to check on, then, we retrieve a new composition final
+		 * table entry for the composite and then try to do the
+		 * composition again.
+		 */
+
+		if (match_not_found) {
+			if (comb_class[i] == U8_COMBINING_CLASS_STARTER) {
+				i--;
+				goto SAVE_THE_CHAR;
+			}
+
+			saved_marks[saved_marks_count++] = i;
+		}
+
+		if (saved_l == l) {
+			while (i < last) {
+				if (blocked(comb_class, i + 1))
+					saved_marks[saved_marks_count++] = ++i;
+				else
+					break;
+			}
+			if (i < last) {
+				p = saved_p;
+				goto TRY_THE_NEXT_MARK;
+			}
+		} else if (i < last) {
+			p = find_composition_start(uv, t + saved_l,
+			    l - saved_l);
+			if (p != NULL) {
+				saved_p = p;
+				goto TRY_THE_NEXT_MARK;
+			}
+		}
+
+		/*
+		 * There is no more composition possible.
+		 *
+		 * If there was no composition what so ever then we copy
+		 * over the original Starter and then append any non-Starters
+		 * remaining at the target string sequentially after that.
+		 */
+
+		if (saved_l == l) {
+			p = s + start[saved_i];
+			size = disp[saved_i];
+			for (j = 0; j < size; j++)
+				t[l++] = *p++;
+		}
+
+		for (k = 0; k < saved_marks_count; k++) {
+			p = s + start[saved_marks[k]];
+			size = disp[saved_marks[k]];
+			for (j = 0; j < size; j++)
+				t[l++] = *p++;
+		}
+	}
+
+	/*
+	 * If the last character is a Starter and if we have a character
+	 * (possibly another Starter) that can be turned into a composite,
+	 * we do so and we do so until there is no more of composition
+	 * possible.
+	 */
+	if (comb_class[last] == U8_COMBINING_CLASS_STARTER) {
+		p = *os;
+		saved_l = l - disp[last];
+
+		while (p < oslast) {
+			size = u8_number_of_bytes[*p];
+			if (size <= 1 || (p + size) > oslast)
+				break;
+
+			saved_p = p;
+
+			for (i = 0; i < size; i++)
+				tc[i] = *p++;
+
+			q = find_composition_start(uv, t + saved_l,
+			    l - saved_l);
+			if (q == NULL) {
+				p = saved_p;
+				break;
+			}
+
+			match_not_found = B_TRUE;
+
+			for (C = *q++; C > 0; C--) {
+				for (k = 0; k < size; q++, k++)
+					if (*q != tc[k])
+						break;
+
+				if (k >= size && *q == U8_TBL_ELEMENT_FILLER) {
+					match_not_found = B_FALSE;
+
+					l = saved_l;
+
+					while (*++q != U8_TBL_ELEMENT_FILLER) {
+						/*
+						 * This is practically
+						 * impossible but we don't
+						 * want to take any chances.
+						 */
+						if (l >=
+						    U8_STREAM_SAFE_TEXT_MAX) {
+							p = saved_p;
+							goto SAFE_RETURN;
+						}
+						t[l++] = *q;
+					}
+
+					break;
+				}
+
+				if (*q != U8_TBL_ELEMENT_FILLER)
+					while (*++q != U8_TBL_ELEMENT_FILLER)
+						;
+				while (*++q != U8_TBL_ELEMENT_FILLER)
+					;
+				q++;
+			}
+
+			if (match_not_found) {
+				p = saved_p;
+				break;
+			}
+		}
+SAFE_RETURN:
+		*os = p;
+	}
+
+	/*
+	 * Now we copy over the temporary string to the target string.
+	 * Since composition always reduces the number of characters or
+	 * the number of characters stay, we don't need to worry about
+	 * the buffer overflow here.
+	 */
+	for (i = 0; i < l; i++)
+		s[i] = t[i];
+	s[l] = '\0';
+
+	return (l);
+}
+
+/*
+ * The collect_a_seq() function checks on the given string s, collect
+ * a sequence of characters at u8s, and return the sequence. While it collects
+ * a sequence, it also applies case conversion, canonical or compatibility
+ * decomposition, canonical decomposition, or some or all of them and
+ * in that order.
+ *
+ * The collected sequence cannot be bigger than 32 characters since if
+ * it is having more than 31 characters, the sequence will be terminated
+ * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into
+ * a Stream-Safe Text. The collected sequence is always terminated with
+ * a null byte and the return value is the byte length of the sequence
+ * including 0. The return value does not include the terminating
+ * null byte.
+ */
+static size_t
+collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
+	boolean_t is_it_toupper,
+	boolean_t is_it_tolower,
+	boolean_t canonical_decomposition,
+	boolean_t compatibility_decomposition,
+	boolean_t canonical_composition,
+	int *errnum, u8_normalization_states_t *state)
+{
+	uchar_t *s;
+	int sz;
+	int saved_sz;
+	size_t i;
+	size_t j;
+	size_t k;
+	size_t l;
+	uchar_t comb_class[U8_MAX_CHARS_A_SEQ];
+	uchar_t disp[U8_MAX_CHARS_A_SEQ];
+	uchar_t start[U8_MAX_CHARS_A_SEQ];
+	uchar_t u8t[U8_MB_CUR_MAX];
+	uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t tc;
+	size_t last;
+	size_t saved_last;
+	uint32_t u1;
+
+	/*
+	 * Save the source string pointer which we will return a changed
+	 * pointer if we do processing.
+	 */
+	s = *source;
+
+	/*
+	 * The following is a fallback for just in case callers are not
+	 * checking the string boundaries before the calling.
+	 */
+	if (s >= slast) {
+		u8s[0] = '\0';
+
+		return (0);
+	}
+
+	/*
+	 * As the first thing, let's collect a character and do case
+	 * conversion if necessary.
+	 */
+
+	sz = u8_number_of_bytes[*s];
+
+	if (sz < 0) {
+		*errnum = EILSEQ;
+
+		u8s[0] = *s++;
+		u8s[1] = '\0';
+
+		*source = s;
+
+		return (1);
+	}
+
+	if (sz == 1) {
+		if (is_it_toupper)
+			u8s[0] = U8_ASCII_TOUPPER(*s);
+		else if (is_it_tolower)
+			u8s[0] = U8_ASCII_TOLOWER(*s);
+		else
+			u8s[0] = *s;
+		s++;
+		u8s[1] = '\0';
+	} else if ((s + sz) > slast) {
+		*errnum = EINVAL;
+
+		for (i = 0; s < slast; )
+			u8s[i++] = *s++;
+		u8s[i] = '\0';
+
+		*source = s;
+
+		return (i);
+	} else {
+		if (is_it_toupper || is_it_tolower) {
+			i = do_case_conv(uv, u8s, s, sz, is_it_toupper);
+			s += sz;
+			sz = i;
+		} else {
+			for (i = 0; i < sz; )
+				u8s[i++] = *s++;
+			u8s[i] = '\0';
+		}
+	}
+
+	/*
+	 * And then canonical/compatibility decomposition followed by
+	 * an optional canonical composition. Please be noted that
+	 * canonical composition is done only when a decomposition is
+	 * done.
+	 */
+	if (canonical_decomposition || compatibility_decomposition) {
+		if (sz == 1) {
+			*state = U8_STATE_START;
+
+			saved_sz = 1;
+
+			comb_class[0] = 0;
+			start[0] = 0;
+			disp[0] = 1;
+
+			last = 1;
+		} else {
+			saved_sz = do_decomp(uv, u8s, u8s, sz,
+			    canonical_decomposition, state);
+
+			last = 0;
+
+			for (i = 0; i < saved_sz; ) {
+				sz = u8_number_of_bytes[u8s[i]];
+
+				comb_class[last] = combining_class(uv,
+				    u8s + i, sz);
+				start[last] = i;
+				disp[last] = sz;
+
+				last++;
+				i += sz;
+			}
+
+			/*
+			 * Decomposition yields various Hangul related
+			 * states but not on combining marks. We need to
+			 * find out at here by checking on the last
+			 * character.
+			 */
+			if (*state == U8_STATE_START) {
+				if (comb_class[last - 1])
+					*state = U8_STATE_COMBINING_MARK;
+			}
+		}
+
+		saved_last = last;
+
+		while (s < slast) {
+			sz = u8_number_of_bytes[*s];
+
+			/*
+			 * If this is an illegal character, an incomplete
+			 * character, or an 7-bit ASCII Starter character,
+			 * then we have collected a sequence; break and let
+			 * the next call deal with the two cases.
+			 *
+			 * Note that this is okay only if you are using this
+			 * function with a fixed length string, not on
+			 * a buffer with multiple calls of one chunk at a time.
+			 */
+			if (sz <= 1) {
+				break;
+			} else if ((s + sz) > slast) {
+				break;
+			} else {
+				/*
+				 * If the previous character was a Hangul Jamo
+				 * and this character is a Hangul Jamo that
+				 * can be conjoined, we collect the Jamo.
+				 */
+				if (*s == U8_HANGUL_JAMO_1ST_BYTE) {
+					U8_PUT_3BYTES_INTO_UTF32(u1,
+					    *s, *(s + 1), *(s + 2));
+
+					if (U8_HANGUL_COMPOSABLE_L_V(*state,
+					    u1)) {
+						i = 0;
+						*state = U8_STATE_HANGUL_LV;
+						goto COLLECT_A_HANGUL;
+					}
+
+					if (U8_HANGUL_COMPOSABLE_LV_T(*state,
+					    u1)) {
+						i = 0;
+						*state = U8_STATE_HANGUL_LVT;
+						goto COLLECT_A_HANGUL;
+					}
+				}
+
+				/*
+				 * Regardless of whatever it was, if this is
+				 * a Starter, we don't collect the character
+				 * since that's a new start and we will deal
+				 * with it at the next time.
+				 */
+				i = combining_class(uv, s, sz);
+				if (i == U8_COMBINING_CLASS_STARTER)
+					break;
+
+				/*
+				 * We know the current character is a combining
+				 * mark. If the previous character wasn't
+				 * a Starter (not Hangul) or a combining mark,
+				 * then, we don't collect this combining mark.
+				 */
+				if (*state != U8_STATE_START &&
+				    *state != U8_STATE_COMBINING_MARK)
+					break;
+
+				*state = U8_STATE_COMBINING_MARK;
+COLLECT_A_HANGUL:
+				/*
+				 * If we collected a Starter and combining
+				 * marks up to 30, i.e., total 31 characters,
+				 * then, we terminate this degenerately long
+				 * combining sequence with a U+034F COMBINING
+				 * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in
+				 * UTF-8 and turn this into a Stream-Safe
+				 * Text. This will be extremely rare but
+				 * possible.
+				 *
+				 * The following will also guarantee that
+				 * we are not writing more than 32 characters
+				 * plus a NULL at u8s[].
+				 */
+				if (last >= U8_UPPER_LIMIT_IN_A_SEQ) {
+TURN_STREAM_SAFE:
+					*state = U8_STATE_START;
+					comb_class[last] = 0;
+					start[last] = saved_sz;
+					disp[last] = 2;
+					last++;
+
+					u8s[saved_sz++] = 0xCD;
+					u8s[saved_sz++] = 0x8F;
+
+					break;
+				}
+
+				/*
+				 * Some combining marks also do decompose into
+				 * another combining mark or marks.
+				 */
+				if (*state == U8_STATE_COMBINING_MARK) {
+					k = last;
+					l = sz;
+					i = do_decomp(uv, uts, s, sz,
+					    canonical_decomposition, state);
+					for (j = 0; j < i; ) {
+						sz = u8_number_of_bytes[uts[j]];
+
+						comb_class[last] =
+						    combining_class(uv,
+						    uts + j, sz);
+						start[last] = saved_sz + j;
+						disp[last] = sz;
+
+						last++;
+						if (last >=
+						    U8_UPPER_LIMIT_IN_A_SEQ) {
+							last = k;
+							goto TURN_STREAM_SAFE;
+						}
+						j += sz;
+					}
+
+					*state = U8_STATE_COMBINING_MARK;
+					sz = i;
+					s += l;
+
+					for (i = 0; i < sz; i++)
+						u8s[saved_sz++] = uts[i];
+				} else {
+					comb_class[last] = i;
+					start[last] = saved_sz;
+					disp[last] = sz;
+					last++;
+
+					for (i = 0; i < sz; i++)
+						u8s[saved_sz++] = *s++;
+				}
+
+				/*
+				 * If this is U+0345 COMBINING GREEK
+				 * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a.,
+				 * iota subscript, and need to be converted to
+				 * uppercase letter, convert it to U+0399 GREEK
+				 * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8),
+				 * i.e., convert to capital adscript form as
+				 * specified in the Unicode standard.
+				 *
+				 * This is the only special case of (ambiguous)
+				 * case conversion at combining marks and
+				 * probably the standard will never have
+				 * anything similar like this in future.
+				 */
+				if (is_it_toupper && sz >= 2 &&
+				    u8s[saved_sz - 2] == 0xCD &&
+				    u8s[saved_sz - 1] == 0x85) {
+					u8s[saved_sz - 2] = 0xCE;
+					u8s[saved_sz - 1] = 0x99;
+				}
+			}
+		}
+
+		/*
+		 * Let's try to ensure a canonical ordering for the collected
+		 * combining marks. We do this only if we have collected
+		 * at least one more non-Starter. (The decomposition mapping
+		 * data tables have fully (and recursively) expanded and
+		 * canonically ordered decompositions.)
+		 *
+		 * The U8_SWAP_COMB_MARKS() convenience macro has some
+		 * assumptions and we are meeting the assumptions.
+		 */
+		last--;
+		if (last >= saved_last) {
+			for (i = 0; i < last; i++)
+				for (j = last; j > i; j--)
+					if (comb_class[j] &&
+					    comb_class[j - 1] > comb_class[j]) {
+						U8_SWAP_COMB_MARKS(j - 1, j);
+					}
+		}
+
+		*source = s;
+
+		if (! canonical_composition) {
+			u8s[saved_sz] = '\0';
+			return (saved_sz);
+		}
+
+		/*
+		 * Now do the canonical composition. Note that we do this
+		 * only after a canonical or compatibility decomposition to
+		 * finish up NFC or NFKC.
+		 */
+		sz = do_composition(uv, u8s, comb_class, start, disp, last,
+		    &s, slast);
+	}
+
+	*source = s;
+
+	return ((size_t)sz);
+}
+
+/*
+ * The do_norm_compare() function does string comparion based on Unicode
+ * simple case mappings and Unicode Normalization definitions.
+ *
+ * It does so by collecting a sequence of character at a time and comparing
+ * the collected sequences from the strings.
+ *
+ * The meanings on the return values are the same as the usual strcmp().
+ */
+static int
+do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
+	int flag, int *errnum)
+{
+	int result;
+	size_t sz1;
+	size_t sz2;
+	uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t *s1last;
+	uchar_t *s2last;
+	boolean_t is_it_toupper;
+	boolean_t is_it_tolower;
+	boolean_t canonical_decomposition;
+	boolean_t compatibility_decomposition;
+	boolean_t canonical_composition;
+	u8_normalization_states_t state;
+
+	s1last = s1 + n1;
+	s2last = s2 + n2;
+
+	is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
+	is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
+	canonical_decomposition = flag & U8_CANON_DECOMP;
+	compatibility_decomposition = flag & U8_COMPAT_DECOMP;
+	canonical_composition = flag & U8_CANON_COMP;
+
+	while (s1 < s1last && s2 < s2last) {
+		/*
+		 * If the current character is a 7-bit ASCII and the last
+		 * character, or, if the current character and the next
+		 * character are both some 7-bit ASCII characters then
+		 * we treat the current character as a sequence.
+		 *
+		 * In any other cases, we need to call collect_a_seq().
+		 */
+
+		if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last ||
+		    ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) {
+			if (is_it_toupper)
+				u8s1[0] = U8_ASCII_TOUPPER(*s1);
+			else if (is_it_tolower)
+				u8s1[0] = U8_ASCII_TOLOWER(*s1);
+			else
+				u8s1[0] = *s1;
+			u8s1[1] = '\0';
+			sz1 = 1;
+			s1++;
+		} else {
+			state = U8_STATE_START;
+			sz1 = collect_a_seq(uv, u8s1, &s1, s1last,
+			    is_it_toupper, is_it_tolower,
+			    canonical_decomposition,
+			    compatibility_decomposition,
+			    canonical_composition, errnum, &state);
+		}
+
+		if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last ||
+		    ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) {
+			if (is_it_toupper)
+				u8s2[0] = U8_ASCII_TOUPPER(*s2);
+			else if (is_it_tolower)
+				u8s2[0] = U8_ASCII_TOLOWER(*s2);
+			else
+				u8s2[0] = *s2;
+			u8s2[1] = '\0';
+			sz2 = 1;
+			s2++;
+		} else {
+			state = U8_STATE_START;
+			sz2 = collect_a_seq(uv, u8s2, &s2, s2last,
+			    is_it_toupper, is_it_tolower,
+			    canonical_decomposition,
+			    compatibility_decomposition,
+			    canonical_composition, errnum, &state);
+		}
+
+		/*
+		 * Now compare the two characters. If they are the same,
+		 * we move on to the next character sequences.
+		 */
+		if (sz1 == 1 && sz2 == 1) {
+			if (*u8s1 > *u8s2)
+				return (1);
+			if (*u8s1 < *u8s2)
+				return (-1);
+		} else {
+			result = strcmp((const char *)u8s1, (const char *)u8s2);
+			if (result != 0)
+				return (result);
+		}
+	}
+
+	/*
+	 * We compared until the end of either or both strings.
+	 *
+	 * If we reached to or went over the ends for the both, that means
+	 * they are the same.
+	 *
+	 * If we reached only one end, that means the other string has
+	 * something which then can be used to determine the return value.
+	 */
+	if (s1 >= s1last) {
+		if (s2 >= s2last)
+			return (0);
+		return (-1);
+	}
+	return (1);
+}
+
+/*
+ * The u8_strcmp() function compares two UTF-8 strings quite similar to
+ * the strcmp(). For the comparison, however, Unicode Normalization specific
+ * equivalency and Unicode simple case conversion mappings based equivalency
+ * can be requested and checked against.
+ */
+int
+u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
+		int *errnum)
+{
+	int f;
+	size_t n1;
+	size_t n2;
+
+	*errnum = 0;
+
+	/*
+	 * Check on the requested Unicode version, case conversion, and
+	 * normalization flag values.
+	 */
+
+	if (uv > U8_UNICODE_LATEST) {
+		*errnum = ERANGE;
+		uv = U8_UNICODE_LATEST;
+	}
+
+	if (flag == 0) {
+		flag = U8_STRCMP_CS;
+	} else {
+		f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER |
+		    U8_STRCMP_CI_LOWER);
+		if (f == 0) {
+			flag |= U8_STRCMP_CS;
+		} else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER &&
+		    f != U8_STRCMP_CI_LOWER) {
+			*errnum = EBADF;
+			flag = U8_STRCMP_CS;
+		}
+
+		f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
+		if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC &&
+		    f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) {
+			*errnum = EBADF;
+			flag = U8_STRCMP_CS;
+		}
+	}
+
+	if (flag == U8_STRCMP_CS) {
+		return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n));
+	}
+
+	n1 = strlen(s1);
+	n2 = strlen(s2);
+	if (n != 0) {
+		if (n < n1)
+			n1 = n;
+		if (n < n2)
+			n2 = n;
+	}
+
+	/*
+	 * Simple case conversion can be done much faster and so we do
+	 * them separately here.
+	 */
+	if (flag == U8_STRCMP_CI_UPPER) {
+		return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
+		    n1, n2, B_TRUE, errnum));
+	} else if (flag == U8_STRCMP_CI_LOWER) {
+		return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
+		    n1, n2, B_FALSE, errnum));
+	}
+
+	return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2,
+	    flag, errnum));
+}
+
+size_t
+u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
+	int flag, size_t unicode_version, int *errnum)
+{
+	int f;
+	int sz;
+	uchar_t *ib;
+	uchar_t *ibtail;
+	uchar_t *ob;
+	uchar_t *obtail;
+	boolean_t do_not_ignore_null;
+	boolean_t do_not_ignore_invalid;
+	boolean_t is_it_toupper;
+	boolean_t is_it_tolower;
+	boolean_t canonical_decomposition;
+	boolean_t compatibility_decomposition;
+	boolean_t canonical_composition;
+	size_t ret_val;
+	size_t i;
+	size_t j;
+	uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1];
+	u8_normalization_states_t state;
+
+	if (unicode_version > U8_UNICODE_LATEST) {
+		*errnum = ERANGE;
+		return ((size_t)-1);
+	}
+
+	f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER);
+	if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) {
+		*errnum = EBADF;
+		return ((size_t)-1);
+	}
+
+	f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
+	if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC &&
+	    f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) {
+		*errnum = EBADF;
+		return ((size_t)-1);
+	}
+
+	if (inarray == NULL || *inlen == 0)
+		return (0);
+
+	if (outarray == NULL) {
+		*errnum = E2BIG;
+		return ((size_t)-1);
+	}
+
+	ib = (uchar_t *)inarray;
+	ob = (uchar_t *)outarray;
+	ibtail = ib + *inlen;
+	obtail = ob + *outlen;
+
+	do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL);
+	do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID);
+	is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
+	is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
+
+	ret_val = 0;
+
+	/*
+	 * If we don't have a normalization flag set, we do the simple case
+	 * conversion based text preparation separately below. Text
+	 * preparation involving Normalization will be done in the false task
+	 * block, again, separately since it will take much more time and
+	 * resource than doing simple case conversions.
+	 */
+	if (f == 0) {
+		while (ib < ibtail) {
+			if (*ib == '\0' && do_not_ignore_null)
+				break;
+
+			sz = u8_number_of_bytes[*ib];
+
+			if (sz < 0) {
+				if (do_not_ignore_invalid) {
+					*errnum = EILSEQ;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				sz = 1;
+				ret_val++;
+			}
+
+			if (sz == 1) {
+				if (ob >= obtail) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if (is_it_toupper)
+					*ob = U8_ASCII_TOUPPER(*ib);
+				else if (is_it_tolower)
+					*ob = U8_ASCII_TOLOWER(*ib);
+				else
+					*ob = *ib;
+				ib++;
+				ob++;
+			} else if ((ib + sz) > ibtail) {
+				if (do_not_ignore_invalid) {
+					*errnum = EINVAL;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if ((obtail - ob) < (ibtail - ib)) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				/*
+				 * We treat the remaining incomplete character
+				 * bytes as a character.
+				 */
+				ret_val++;
+
+				while (ib < ibtail)
+					*ob++ = *ib++;
+			} else {
+				if (is_it_toupper || is_it_tolower) {
+					i = do_case_conv(unicode_version, u8s,
+					    ib, sz, is_it_toupper);
+
+					if ((obtail - ob) < i) {
+						*errnum = E2BIG;
+						ret_val = (size_t)-1;
+						break;
+					}
+
+					ib += sz;
+
+					for (sz = 0; sz < i; sz++)
+						*ob++ = u8s[sz];
+				} else {
+					if ((obtail - ob) < sz) {
+						*errnum = E2BIG;
+						ret_val = (size_t)-1;
+						break;
+					}
+
+					for (i = 0; i < sz; i++)
+						*ob++ = *ib++;
+				}
+			}
+		}
+	} else {
+		canonical_decomposition = flag & U8_CANON_DECOMP;
+		compatibility_decomposition = flag & U8_COMPAT_DECOMP;
+		canonical_composition = flag & U8_CANON_COMP;
+
+		while (ib < ibtail) {
+			if (*ib == '\0' && do_not_ignore_null)
+				break;
+
+			/*
+			 * If the current character is a 7-bit ASCII
+			 * character and it is the last character, or,
+			 * if the current character is a 7-bit ASCII
+			 * character and the next character is also a 7-bit
+			 * ASCII character, then, we copy over this
+			 * character without going through collect_a_seq().
+			 *
+			 * In any other cases, we need to look further with
+			 * the collect_a_seq() function.
+			 */
+			if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail ||
+			    ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) {
+				if (ob >= obtail) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if (is_it_toupper)
+					*ob = U8_ASCII_TOUPPER(*ib);
+				else if (is_it_tolower)
+					*ob = U8_ASCII_TOLOWER(*ib);
+				else
+					*ob = *ib;
+				ib++;
+				ob++;
+			} else {
+				*errnum = 0;
+				state = U8_STATE_START;
+
+				j = collect_a_seq(unicode_version, u8s,
+				    &ib, ibtail,
+				    is_it_toupper,
+				    is_it_tolower,
+				    canonical_decomposition,
+				    compatibility_decomposition,
+				    canonical_composition,
+				    errnum, &state);
+
+				if (*errnum && do_not_ignore_invalid) {
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if ((obtail - ob) < j) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				for (i = 0; i < j; i++)
+					*ob++ = u8s[i];
+			}
+		}
+	}
+
+	*inlen = ibtail - ib;
+	*outlen = obtail - ob;
+
+	return (ret_val);
+}
diff --git a/lib/libuutil/include/libuutil.h b/lib/libuutil/include/libuutil.h
new file mode 100644
index 000000000..ccd46b977
--- /dev/null
+++ b/lib/libuutil/include/libuutil.h
@@ -0,0 +1,381 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBUUTIL_H
+#define	_LIBUUTIL_H
+
+#include <sys/types.h>
+#include <stdarg.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Standard flags codes.
+ */
+#define	UU_DEFAULT		0
+
+/*
+ * Standard error codes.
+ */
+#define	UU_ERROR_NONE		0	/* no error */
+#define	UU_ERROR_INVALID_ARGUMENT 1	/* invalid argument */
+#define	UU_ERROR_UNKNOWN_FLAG	2	/* passed flag invalid */
+#define	UU_ERROR_NO_MEMORY	3	/* out of memory */
+#define	UU_ERROR_CALLBACK_FAILED 4	/* callback-initiated error */
+#define	UU_ERROR_NOT_SUPPORTED	5	/* operation not supported */
+#define	UU_ERROR_EMPTY		6	/* no value provided */
+#define	UU_ERROR_UNDERFLOW	7	/* value is too small */
+#define	UU_ERROR_OVERFLOW	8	/* value is too value */
+#define	UU_ERROR_INVALID_CHAR	9	/* value contains unexpected char */
+#define	UU_ERROR_INVALID_DIGIT	10	/* value contains digit not in base */
+
+#define	UU_ERROR_SYSTEM		99	/* underlying system error */
+#define	UU_ERROR_UNKNOWN	100	/* error status not known */
+
+/*
+ * Standard program exit codes.
+ */
+#define	UU_EXIT_OK	(*(uu_exit_ok()))
+#define	UU_EXIT_FATAL	(*(uu_exit_fatal()))
+#define	UU_EXIT_USAGE	(*(uu_exit_usage()))
+
+/*
+ * Exit status profiles.
+ */
+#define	UU_PROFILE_DEFAULT	0
+#define	UU_PROFILE_LAUNCHER	1
+
+/*
+ * Error reporting functions.
+ */
+uint32_t uu_error(void);
+const char *uu_strerror(uint32_t);
+
+/*
+ * Program notification functions.
+ */
+extern void uu_alt_exit(int);
+extern const char *uu_setpname(char *);
+extern const char *uu_getpname(void);
+/*PRINTFLIKE1*/
+extern void uu_warn(const char *, ...);
+extern void uu_vwarn(const char *, va_list);
+/*PRINTFLIKE1*/
+extern void uu_die(const char *, ...) __NORETURN;
+extern void uu_vdie(const char *, va_list) __NORETURN;
+/*PRINTFLIKE2*/
+extern void uu_xdie(int, const char *, ...) __NORETURN;
+extern void uu_vxdie(int, const char *, va_list) __NORETURN;
+
+/*
+ * Exit status functions (not to be used directly)
+ */
+extern int *uu_exit_ok(void);
+extern int *uu_exit_fatal(void);
+extern int *uu_exit_usage(void);
+
+/*
+ * string->number conversions
+ */
+extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t);
+extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t);
+
+/*
+ * Debug print facility functions.
+ */
+typedef struct uu_dprintf uu_dprintf_t;
+
+typedef enum {
+	UU_DPRINTF_SILENT,
+	UU_DPRINTF_FATAL,
+	UU_DPRINTF_WARNING,
+	UU_DPRINTF_NOTICE,
+	UU_DPRINTF_INFO,
+	UU_DPRINTF_DEBUG
+} uu_dprintf_severity_t;
+
+extern uu_dprintf_t *uu_dprintf_create(const char *, uu_dprintf_severity_t,
+    uint_t);
+/*PRINTFLIKE3*/
+extern void uu_dprintf(uu_dprintf_t *, uu_dprintf_severity_t,
+    const char *, ...);
+extern void uu_dprintf_destroy(uu_dprintf_t *);
+extern const char *uu_dprintf_getname(uu_dprintf_t *);
+
+/*
+ * Identifier test flags and function.
+ */
+#define	UU_NAME_DOMAIN		0x1	/* allow SUNW, or com.sun, prefix */
+#define	UU_NAME_PATH		0x2	/* allow '/'-delimited paths */
+
+int uu_check_name(const char *, uint_t);
+
+/*
+ * File creation functions.
+ */
+extern int uu_open_tmp(const char *dir, uint_t uflags);
+
+/*
+ * Convenience functions.
+ */
+/*PRINTFLIKE1*/
+extern char *uu_msprintf(const char *format, ...);
+extern void *uu_zalloc(size_t);
+extern char *uu_strdup(const char *);
+extern void uu_free(void *);
+
+/*
+ * Comparison function type definition.
+ *   Developers should be careful in their use of the _private argument. If you
+ *   break interface guarantees, you get undefined behavior.
+ */
+typedef int uu_compare_fn_t(const void *__left, const void *__right,
+    void *__private);
+
+/*
+ * Walk variant flags.
+ *   A data structure need not provide support for all variants and
+ *   combinations.  Refer to the appropriate documentation.
+ */
+#define	UU_WALK_ROBUST		0x00000001	/* walk can survive removes */
+#define	UU_WALK_REVERSE		0x00000002	/* reverse walk order */
+
+#define	UU_WALK_PREORDER	0x00000010	/* walk tree in pre-order */
+#define	UU_WALK_POSTORDER	0x00000020	/* walk tree in post-order */
+
+/*
+ * Walk callback function return codes.
+ */
+#define	UU_WALK_ERROR		-1
+#define	UU_WALK_NEXT		0
+#define	UU_WALK_DONE		1
+
+/*
+ * Walk callback function type definition.
+ */
+typedef int uu_walk_fn_t(void *_elem, void *_private);
+
+/*
+ * lists: opaque structures
+ */
+typedef struct uu_list_pool uu_list_pool_t;
+typedef struct uu_list uu_list_t;
+
+typedef struct uu_list_node {
+	uintptr_t uln_opaque[2];
+} uu_list_node_t;
+
+typedef struct uu_list_walk uu_list_walk_t;
+
+typedef uintptr_t uu_list_index_t;
+
+/*
+ * lists: interface
+ *
+ * basic usage:
+ *	typedef struct foo {
+ *		...
+ *		uu_list_node_t foo_node;
+ *		...
+ *	} foo_t;
+ *
+ *	static int
+ *	foo_compare(void *l_arg, void *r_arg, void *private)
+ *	{
+ *		foo_t *l = l_arg;
+ *		foo_t *r = r_arg;
+ *
+ *		if (... l greater than r ...)
+ *			return (1);
+ *		if (... l less than r ...)
+ *			return (-1);
+ *		return (0);
+ *	}
+ *
+ *	...
+ *		// at initialization time
+ *		foo_pool = uu_list_pool_create("foo_pool",
+ *		    sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
+ *		    debugging? 0 : UU_AVL_POOL_DEBUG);
+ *	...
+ */
+uu_list_pool_t *uu_list_pool_create(const char *, size_t, size_t,
+    uu_compare_fn_t *, uint32_t);
+#define	UU_LIST_POOL_DEBUG	0x00000001
+
+void uu_list_pool_destroy(uu_list_pool_t *);
+
+/*
+ * usage:
+ *
+ *	foo_t *a;
+ *	a = malloc(sizeof(*a));
+ *	uu_list_node_init(a, &a->foo_list, pool);
+ *	...
+ *	uu_list_node_fini(a, &a->foo_list, pool);
+ *	free(a);
+ */
+void uu_list_node_init(void *, uu_list_node_t *, uu_list_pool_t *);
+void uu_list_node_fini(void *, uu_list_node_t *, uu_list_pool_t *);
+
+uu_list_t *uu_list_create(uu_list_pool_t *, void *_parent, uint32_t);
+#define	UU_LIST_DEBUG	0x00000001
+#define	UU_LIST_SORTED	0x00000002	/* list is sorted */
+
+void uu_list_destroy(uu_list_t *);	/* list must be empty */
+
+size_t uu_list_numnodes(uu_list_t *);
+
+void *uu_list_first(uu_list_t *);
+void *uu_list_last(uu_list_t *);
+
+void *uu_list_next(uu_list_t *, void *);
+void *uu_list_prev(uu_list_t *, void *);
+
+int uu_list_walk(uu_list_t *, uu_walk_fn_t *, void *, uint32_t);
+
+uu_list_walk_t *uu_list_walk_start(uu_list_t *, uint32_t);
+void *uu_list_walk_next(uu_list_walk_t *);
+void uu_list_walk_end(uu_list_walk_t *);
+
+void *uu_list_find(uu_list_t *, void *, void *, uu_list_index_t *);
+void uu_list_insert(uu_list_t *, void *, uu_list_index_t);
+
+void *uu_list_nearest_next(uu_list_t *, uu_list_index_t);
+void *uu_list_nearest_prev(uu_list_t *, uu_list_index_t);
+
+void *uu_list_teardown(uu_list_t *, void **);
+
+void uu_list_remove(uu_list_t *, void *);
+
+/*
+ * lists: interfaces for non-sorted lists only
+ */
+int uu_list_insert_before(uu_list_t *, void *_target, void *_elem);
+int uu_list_insert_after(uu_list_t *, void *_target, void *_elem);
+
+/*
+ * avl trees: opaque structures
+ */
+typedef struct uu_avl_pool uu_avl_pool_t;
+typedef struct uu_avl uu_avl_t;
+
+typedef struct uu_avl_node {
+#ifdef _LP64
+	uintptr_t uan_opaque[3];
+#else
+	uintptr_t uan_opaque[4];
+#endif
+} uu_avl_node_t;
+
+typedef struct uu_avl_walk uu_avl_walk_t;
+
+typedef uintptr_t uu_avl_index_t;
+
+/*
+ * avl trees: interface
+ *
+ * basic usage:
+ *	typedef struct foo {
+ *		...
+ *		uu_avl_node_t foo_node;
+ *		...
+ *	} foo_t;
+ *
+ *	static int
+ *	foo_compare(void *l_arg, void *r_arg, void *private)
+ *	{
+ *		foo_t *l = l_arg;
+ *		foo_t *r = r_arg;
+ *
+ *		if (... l greater than r ...)
+ *			return (1);
+ *		if (... l less than r ...)
+ *			return (-1);
+ *		return (0);
+ *	}
+ *
+ *	...
+ *		// at initialization time
+ *		foo_pool = uu_avl_pool_create("foo_pool",
+ *		    sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
+ *		    debugging? 0 : UU_AVL_POOL_DEBUG);
+ *	...
+ */
+uu_avl_pool_t *uu_avl_pool_create(const char *, size_t, size_t,
+    uu_compare_fn_t *, uint32_t);
+#define	UU_AVL_POOL_DEBUG	0x00000001
+
+void uu_avl_pool_destroy(uu_avl_pool_t *);
+
+/*
+ * usage:
+ *
+ *	foo_t *a;
+ *	a = malloc(sizeof(*a));
+ *	uu_avl_node_init(a, &a->foo_avl, pool);
+ *	...
+ *	uu_avl_node_fini(a, &a->foo_avl, pool);
+ *	free(a);
+ */
+void uu_avl_node_init(void *, uu_avl_node_t *, uu_avl_pool_t *);
+void uu_avl_node_fini(void *, uu_avl_node_t *, uu_avl_pool_t *);
+
+uu_avl_t *uu_avl_create(uu_avl_pool_t *, void *_parent, uint32_t);
+#define	UU_AVL_DEBUG	0x00000001
+
+void uu_avl_destroy(uu_avl_t *);	/* list must be empty */
+
+size_t uu_avl_numnodes(uu_avl_t *);
+
+void *uu_avl_first(uu_avl_t *);
+void *uu_avl_last(uu_avl_t *);
+
+void *uu_avl_next(uu_avl_t *, void *);
+void *uu_avl_prev(uu_avl_t *, void *);
+
+int uu_avl_walk(uu_avl_t *, uu_walk_fn_t *, void *, uint32_t);
+
+uu_avl_walk_t *uu_avl_walk_start(uu_avl_t *, uint32_t);
+void *uu_avl_walk_next(uu_avl_walk_t *);
+void uu_avl_walk_end(uu_avl_walk_t *);
+
+void *uu_avl_find(uu_avl_t *, void *, void *, uu_avl_index_t *);
+void uu_avl_insert(uu_avl_t *, void *, uu_avl_index_t);
+
+void *uu_avl_nearest_next(uu_avl_t *, uu_avl_index_t);
+void *uu_avl_nearest_prev(uu_avl_t *, uu_avl_index_t);
+
+void *uu_avl_teardown(uu_avl_t *, void **);
+
+void uu_avl_remove(uu_avl_t *, void *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBUUTIL_H */
diff --git a/lib/libuutil/include/libuutil_common.h b/lib/libuutil/include/libuutil_common.h
new file mode 100644
index 000000000..9ebaaedfd
--- /dev/null
+++ b/lib/libuutil/include/libuutil_common.h
@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBUUTIL_COMMON_H
+#define	_LIBUUTIL_COMMON_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libuutil.h>
+#include <libuutil_impl.h>
+
+#endif	/* _LIBUUTIL_COMMON_H */
diff --git a/lib/libuutil/include/libuutil_impl.h b/lib/libuutil/include/libuutil_impl.h
new file mode 100644
index 000000000..9466e5974
--- /dev/null
+++ b/lib/libuutil/include/libuutil_impl.h
@@ -0,0 +1,181 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBUUTIL_IMPL_H
+#define	_LIBUUTIL_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <libuutil.h>
+#include <pthread.h>
+
+#include <sys/avl_impl.h>
+#include <sys/byteorder.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+void uu_set_error(uint_t);
+#pragma rarely_called(uu_set_error)
+
+/*PRINTFLIKE1*/
+void uu_panic(const char *format, ...);
+#pragma rarely_called(uu_panic)
+
+struct uu_dprintf {
+	char	*uud_name;
+	uu_dprintf_severity_t uud_severity;
+	uint_t	uud_flags;
+};
+
+/*
+ * For debugging purposes, libuutil keeps around linked lists of all uu_lists
+ * and uu_avls, along with pointers to their parents.  These can cause false
+ * negatives when looking for memory leaks, so we encode the pointers by
+ * storing them with swapped endianness;  this is not perfect, but it's about
+ * the best we can do without wasting a lot of space.
+ */
+#ifdef _LP64
+#define	UU_PTR_ENCODE(ptr)		BSWAP_64((uintptr_t)(void *)(ptr))
+#else
+#define	UU_PTR_ENCODE(ptr)		BSWAP_32((uintptr_t)(void *)(ptr))
+#endif
+
+#define	UU_PTR_DECODE(ptr)		((void *)UU_PTR_ENCODE(ptr))
+
+/*
+ * uu_list structures
+ */
+typedef struct uu_list_node_impl {
+	struct uu_list_node_impl *uln_next;
+	struct uu_list_node_impl *uln_prev;
+} uu_list_node_impl_t;
+
+struct uu_list_walk {
+	uu_list_walk_t	*ulw_next;
+	uu_list_walk_t	*ulw_prev;
+
+	uu_list_t	*ulw_list;
+	int8_t		ulw_dir;
+	uint8_t		ulw_robust;
+	uu_list_node_impl_t *ulw_next_result;
+};
+
+struct uu_list {
+	uintptr_t	ul_next_enc;
+	uintptr_t	ul_prev_enc;
+
+	uu_list_pool_t	*ul_pool;
+	uintptr_t	ul_parent_enc;	/* encoded parent pointer */
+	size_t		ul_offset;
+	size_t		ul_numnodes;
+	uint8_t		ul_debug;
+	uint8_t		ul_sorted;
+	uint8_t		ul_index;	/* mark for uu_list_index_ts */
+
+	uu_list_node_impl_t ul_null_node;
+	uu_list_walk_t	ul_null_walk;	/* for robust walkers */
+};
+
+#define	UU_LIST_PTR(ptr)		((uu_list_t *)UU_PTR_DECODE(ptr))
+
+#define	UU_LIST_POOL_MAXNAME	64
+
+struct uu_list_pool {
+	uu_list_pool_t	*ulp_next;
+	uu_list_pool_t	*ulp_prev;
+
+	char		ulp_name[UU_LIST_POOL_MAXNAME];
+	size_t		ulp_nodeoffset;
+	size_t		ulp_objsize;
+	uu_compare_fn_t	*ulp_cmp;
+	uint8_t		ulp_debug;
+	uint8_t		ulp_last_index;
+	pthread_mutex_t	ulp_lock;		/* protects null_list */
+	uu_list_t	ulp_null_list;
+};
+
+/*
+ * uu_avl structures
+ */
+typedef struct avl_node		uu_avl_node_impl_t;
+
+struct uu_avl_walk {
+	uu_avl_walk_t	*uaw_next;
+	uu_avl_walk_t	*uaw_prev;
+
+	uu_avl_t	*uaw_avl;
+	void		*uaw_next_result;
+	int8_t		uaw_dir;
+	uint8_t		uaw_robust;
+};
+
+struct uu_avl {
+	uintptr_t	ua_next_enc;
+	uintptr_t	ua_prev_enc;
+
+	uu_avl_pool_t	*ua_pool;
+	uintptr_t	ua_parent_enc;
+	uint8_t		ua_debug;
+	uint8_t		ua_index;	/* mark for uu_avl_index_ts */
+
+	struct avl_tree	ua_tree;
+	uu_avl_walk_t	ua_null_walk;
+};
+
+#define	UU_AVL_PTR(x)		((uu_avl_t *)UU_PTR_DECODE(x))
+
+#define	UU_AVL_POOL_MAXNAME	64
+
+struct uu_avl_pool {
+	uu_avl_pool_t	*uap_next;
+	uu_avl_pool_t	*uap_prev;
+
+	char		uap_name[UU_AVL_POOL_MAXNAME];
+	size_t		uap_nodeoffset;
+	size_t		uap_objsize;
+	uu_compare_fn_t	*uap_cmp;
+	uint8_t		uap_debug;
+	uint8_t		uap_last_index;
+	pthread_mutex_t	uap_lock;		/* protects null_avl */
+	uu_avl_t	uap_null_avl;
+};
+
+/*
+ * atfork() handlers
+ */
+void uu_avl_lockup(void);
+void uu_avl_release(void);
+
+void uu_list_lockup(void);
+void uu_list_release(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBUUTIL_IMPL_H */
diff --git a/lib/libuutil/uu_alloc.c b/lib/libuutil/uu_alloc.c
new file mode 100644
index 000000000..05d862287
--- /dev/null
+++ b/lib/libuutil/uu_alloc.c
@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include "libuutil_common.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void *
+uu_zalloc(size_t n)
+{
+	void *p = malloc(n);
+
+	if (p == NULL) {
+		uu_set_error(UU_ERROR_SYSTEM);
+		return (NULL);
+	}
+
+	(void) memset(p, 0, n);
+
+	return (p);
+}
+
+void
+uu_free(void *p)
+{
+	free(p);
+}
+
+char *
+uu_strdup(const char *str)
+{
+	char *buf = NULL;
+
+	if (str != NULL) {
+		size_t sz;
+
+		sz = strlen(str) + 1;
+		buf = uu_zalloc(sz);
+		if (buf != NULL)
+			(void) memcpy(buf, str, sz);
+	}
+	return (buf);
+}
+
+char *
+uu_msprintf(const char *format, ...)
+{
+	va_list args;
+	char attic[1];
+	uint_t M, m;
+	char *b;
+
+	va_start(args, format);
+	M = vsnprintf(attic, 1, format, args);
+	va_end(args);
+
+	for (;;) {
+		m = M;
+		if ((b = uu_zalloc(m + 1)) == NULL)
+			return (NULL);
+
+		va_start(args, format);
+		M = vsnprintf(b, m + 1, format, args);
+		va_end(args);
+
+		if (M == m)
+			break;		/* sizes match */
+
+		uu_free(b);
+	}
+
+	return (b);
+}
diff --git a/lib/libuutil/uu_avl.c b/lib/libuutil/uu_avl.c
new file mode 100644
index 000000000..308e9208f
--- /dev/null
+++ b/lib/libuutil/uu_avl.c
@@ -0,0 +1,569 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/avl.h>
+
+static uu_avl_pool_t	uu_null_apool = { &uu_null_apool, &uu_null_apool };
+static pthread_mutex_t	uu_apool_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * The index mark change on every insert and delete, to catch stale
+ * references.
+ *
+ * We leave the low bit alone, since the avl code uses it.
+ */
+#define	INDEX_MAX		(sizeof (uintptr_t) - 2)
+#define	INDEX_NEXT(m)		(((m) == INDEX_MAX)? 2 : ((m) + 2) & INDEX_MAX)
+
+#define	INDEX_DECODE(i)		((i) & ~INDEX_MAX)
+#define	INDEX_ENCODE(p, n)	(((n) & ~INDEX_MAX) | (p)->ua_index)
+#define	INDEX_VALID(p, i)	(((i) & INDEX_MAX) == (p)->ua_index)
+#define	INDEX_CHECK(i)		(((i) & INDEX_MAX) != 0)
+
+/*
+ * When an element is inactive (not in a tree), we keep a marked pointer to
+ * its containing pool in its first word, and a NULL pointer in its second.
+ *
+ * On insert, we use these to verify that it comes from the correct pool.
+ */
+#define	NODE_ARRAY(p, n)	((uintptr_t *)((uintptr_t)(n) + \
+				    (pp)->uap_nodeoffset))
+
+#define	POOL_TO_MARKER(pp) (((uintptr_t)(pp) | 1))
+
+#define	DEAD_MARKER		0xc4
+
+uu_avl_pool_t *
+uu_avl_pool_create(const char *name, size_t objsize, size_t nodeoffset,
+    uu_compare_fn_t *compare_func, uint32_t flags)
+{
+	uu_avl_pool_t *pp, *next, *prev;
+
+	if (name == NULL ||
+	    uu_check_name(name, UU_NAME_DOMAIN) == -1 ||
+	    nodeoffset + sizeof (uu_avl_node_t) > objsize ||
+	    compare_func == NULL) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (NULL);
+	}
+
+	if (flags & ~UU_AVL_POOL_DEBUG) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	pp = uu_zalloc(sizeof (uu_avl_pool_t));
+	if (pp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	(void) strlcpy(pp->uap_name, name, sizeof (pp->uap_name));
+	pp->uap_nodeoffset = nodeoffset;
+	pp->uap_objsize = objsize;
+	pp->uap_cmp = compare_func;
+	if (flags & UU_AVL_POOL_DEBUG)
+		pp->uap_debug = 1;
+	pp->uap_last_index = 0;
+
+	(void) pthread_mutex_init(&pp->uap_lock, NULL);
+
+	pp->uap_null_avl.ua_next_enc = UU_PTR_ENCODE(&pp->uap_null_avl);
+	pp->uap_null_avl.ua_prev_enc = UU_PTR_ENCODE(&pp->uap_null_avl);
+
+	(void) pthread_mutex_lock(&uu_apool_list_lock);
+	pp->uap_next = next = &uu_null_apool;
+	pp->uap_prev = prev = next->uap_prev;
+	next->uap_prev = pp;
+	prev->uap_next = pp;
+	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+
+	return (pp);
+}
+
+void
+uu_avl_pool_destroy(uu_avl_pool_t *pp)
+{
+	if (pp->uap_debug) {
+		if (pp->uap_null_avl.ua_next_enc !=
+		    UU_PTR_ENCODE(&pp->uap_null_avl) ||
+		    pp->uap_null_avl.ua_prev_enc !=
+		    UU_PTR_ENCODE(&pp->uap_null_avl)) {
+			uu_panic("uu_avl_pool_destroy: Pool \"%.*s\" (%p) has "
+			    "outstanding avls, or is corrupt.\n",
+			    (int)sizeof (pp->uap_name), pp->uap_name,
+			    (void *)pp);
+		}
+	}
+	(void) pthread_mutex_lock(&uu_apool_list_lock);
+	pp->uap_next->uap_prev = pp->uap_prev;
+	pp->uap_prev->uap_next = pp->uap_next;
+	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+	pp->uap_prev = NULL;
+	pp->uap_next = NULL;
+	uu_free(pp);
+}
+
+void
+uu_avl_node_init(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
+{
+	uintptr_t *na = (uintptr_t *)np;
+
+	if (pp->uap_debug) {
+		uintptr_t offset = (uintptr_t)np - (uintptr_t)base;
+		if (offset + sizeof (*np) > pp->uap_objsize) {
+			uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't fit in object (size %ld)\n",
+			    base, (void *)np, (void *)pp, pp->uap_name,
+			    (long)offset, (long)pp->uap_objsize);
+		}
+		if (offset != pp->uap_nodeoffset) {
+			uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't match pool's offset (%ld)\n",
+			    base, (void *)np, (void *)pp, pp->uap_name,
+			    (long)offset, (long)pp->uap_objsize);
+		}
+	}
+
+	na[0] = POOL_TO_MARKER(pp);
+	na[1] = 0;
+}
+
+void
+uu_avl_node_fini(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
+{
+	uintptr_t *na = (uintptr_t *)np;
+
+	if (pp->uap_debug) {
+		if (na[0] == DEAD_MARKER && na[1] == DEAD_MARKER) {
+			uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
+			    "node already finied\n",
+			    base, (void *)np, (void *)pp, pp->uap_name);
+		}
+		if (na[0] != POOL_TO_MARKER(pp) || na[1] != 0) {
+			uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
+			    "node corrupt, in tree, or in different pool\n",
+			    base, (void *)np, (void *)pp, pp->uap_name);
+		}
+	}
+
+	na[0] = DEAD_MARKER;
+	na[1] = DEAD_MARKER;
+	na[2] = DEAD_MARKER;
+}
+
+struct uu_avl_node_compare_info {
+	uu_compare_fn_t	*ac_compare;
+	void		*ac_private;
+	void		*ac_right;
+	void		*ac_found;
+};
+
+static int
+uu_avl_node_compare(const void *l, const void *r)
+{
+	struct uu_avl_node_compare_info *info =
+	    (struct uu_avl_node_compare_info *)l;
+
+	int res = info->ac_compare(r, info->ac_right, info->ac_private);
+
+	if (res == 0) {
+		if (info->ac_found == NULL)
+			info->ac_found = (void *)r;
+		return (-1);
+	}
+	if (res < 0)
+		return (1);
+	return (-1);
+}
+
+uu_avl_t *
+uu_avl_create(uu_avl_pool_t *pp, void *parent, uint32_t flags)
+{
+	uu_avl_t *ap, *next, *prev;
+
+	if (flags & ~UU_AVL_DEBUG) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	ap = uu_zalloc(sizeof (*ap));
+	if (ap == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	ap->ua_pool = pp;
+	ap->ua_parent_enc = UU_PTR_ENCODE(parent);
+	ap->ua_debug = pp->uap_debug || (flags & UU_AVL_DEBUG);
+	ap->ua_index = (pp->uap_last_index = INDEX_NEXT(pp->uap_last_index));
+
+	avl_create(&ap->ua_tree, &uu_avl_node_compare, pp->uap_objsize,
+	    pp->uap_nodeoffset);
+
+	ap->ua_null_walk.uaw_next = &ap->ua_null_walk;
+	ap->ua_null_walk.uaw_prev = &ap->ua_null_walk;
+
+	(void) pthread_mutex_lock(&pp->uap_lock);
+	next = &pp->uap_null_avl;
+	prev = UU_PTR_DECODE(next->ua_prev_enc);
+	ap->ua_next_enc = UU_PTR_ENCODE(next);
+	ap->ua_prev_enc = UU_PTR_ENCODE(prev);
+	next->ua_prev_enc = UU_PTR_ENCODE(ap);
+	prev->ua_next_enc = UU_PTR_ENCODE(ap);
+	(void) pthread_mutex_unlock(&pp->uap_lock);
+
+	return (ap);
+}
+
+void
+uu_avl_destroy(uu_avl_t *ap)
+{
+	uu_avl_pool_t *pp = ap->ua_pool;
+
+	if (ap->ua_debug) {
+		if (avl_numnodes(&ap->ua_tree) != 0) {
+			uu_panic("uu_avl_destroy(%p): tree not empty\n",
+			    (void *)ap);
+		}
+		if (ap->ua_null_walk.uaw_next != &ap->ua_null_walk ||
+		    ap->ua_null_walk.uaw_prev != &ap->ua_null_walk) {
+			uu_panic("uu_avl_destroy(%p):  outstanding walkers\n",
+			    (void *)ap);
+		}
+	}
+	(void) pthread_mutex_lock(&pp->uap_lock);
+	UU_AVL_PTR(ap->ua_next_enc)->ua_prev_enc = ap->ua_prev_enc;
+	UU_AVL_PTR(ap->ua_prev_enc)->ua_next_enc = ap->ua_next_enc;
+	(void) pthread_mutex_unlock(&pp->uap_lock);
+	ap->ua_prev_enc = UU_PTR_ENCODE(NULL);
+	ap->ua_next_enc = UU_PTR_ENCODE(NULL);
+
+	ap->ua_pool = NULL;
+	avl_destroy(&ap->ua_tree);
+
+	uu_free(ap);
+}
+
+size_t
+uu_avl_numnodes(uu_avl_t *ap)
+{
+	return (avl_numnodes(&ap->ua_tree));
+}
+
+void *
+uu_avl_first(uu_avl_t *ap)
+{
+	return (avl_first(&ap->ua_tree));
+}
+
+void *
+uu_avl_last(uu_avl_t *ap)
+{
+	return (avl_last(&ap->ua_tree));
+}
+
+void *
+uu_avl_next(uu_avl_t *ap, void *node)
+{
+	return (AVL_NEXT(&ap->ua_tree, node));
+}
+
+void *
+uu_avl_prev(uu_avl_t *ap, void *node)
+{
+	return (AVL_PREV(&ap->ua_tree, node));
+}
+
+static void
+_avl_walk_init(uu_avl_walk_t *wp, uu_avl_t *ap, uint32_t flags)
+{
+	uu_avl_walk_t *next, *prev;
+
+	int robust = (flags & UU_WALK_ROBUST);
+	int direction = (flags & UU_WALK_REVERSE)? -1 : 1;
+
+	(void) memset(wp, 0, sizeof (*wp));
+	wp->uaw_avl = ap;
+	wp->uaw_robust = robust;
+	wp->uaw_dir = direction;
+
+	if (direction > 0)
+		wp->uaw_next_result = avl_first(&ap->ua_tree);
+	else
+		wp->uaw_next_result = avl_last(&ap->ua_tree);
+
+	if (ap->ua_debug || robust) {
+		wp->uaw_next = next = &ap->ua_null_walk;
+		wp->uaw_prev = prev = next->uaw_prev;
+		next->uaw_prev = wp;
+		prev->uaw_next = wp;
+	}
+}
+
+static void *
+_avl_walk_advance(uu_avl_walk_t *wp, uu_avl_t *ap)
+{
+	void *np = wp->uaw_next_result;
+
+	avl_tree_t *t = &ap->ua_tree;
+
+	if (np == NULL)
+		return (NULL);
+
+	wp->uaw_next_result = (wp->uaw_dir > 0)? AVL_NEXT(t, np) :
+	    AVL_PREV(t, np);
+
+	return (np);
+}
+
+static void
+_avl_walk_fini(uu_avl_walk_t *wp)
+{
+	if (wp->uaw_next != NULL) {
+		wp->uaw_next->uaw_prev = wp->uaw_prev;
+		wp->uaw_prev->uaw_next = wp->uaw_next;
+		wp->uaw_next = NULL;
+		wp->uaw_prev = NULL;
+	}
+	wp->uaw_avl = NULL;
+	wp->uaw_next_result = NULL;
+}
+
+uu_avl_walk_t *
+uu_avl_walk_start(uu_avl_t *ap, uint32_t flags)
+{
+	uu_avl_walk_t *wp;
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	wp = uu_zalloc(sizeof (*wp));
+	if (wp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	_avl_walk_init(wp, ap, flags);
+	return (wp);
+}
+
+void *
+uu_avl_walk_next(uu_avl_walk_t *wp)
+{
+	return (_avl_walk_advance(wp, wp->uaw_avl));
+}
+
+void
+uu_avl_walk_end(uu_avl_walk_t *wp)
+{
+	_avl_walk_fini(wp);
+	uu_free(wp);
+}
+
+int
+uu_avl_walk(uu_avl_t *ap, uu_walk_fn_t *func, void *private, uint32_t flags)
+{
+	void *e;
+	uu_avl_walk_t my_walk;
+
+	int status = UU_WALK_NEXT;
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (-1);
+	}
+
+	_avl_walk_init(&my_walk, ap, flags);
+	while (status == UU_WALK_NEXT &&
+	    (e = _avl_walk_advance(&my_walk, ap)) != NULL)
+		status = (*func)(e, private);
+	_avl_walk_fini(&my_walk);
+
+	if (status >= 0)
+		return (0);
+	uu_set_error(UU_ERROR_CALLBACK_FAILED);
+	return (-1);
+}
+
+void
+uu_avl_remove(uu_avl_t *ap, void *elem)
+{
+	uu_avl_walk_t *wp;
+	uu_avl_pool_t *pp = ap->ua_pool;
+	uintptr_t *na = NODE_ARRAY(pp, elem);
+
+	if (ap->ua_debug) {
+		/*
+		 * invalidate outstanding uu_avl_index_ts.
+		 */
+		ap->ua_index = INDEX_NEXT(ap->ua_index);
+	}
+
+	/*
+	 * Robust walkers most be advanced, if we are removing the node
+	 * they are currently using.  In debug mode, non-robust walkers
+	 * are also on the walker list.
+	 */
+	for (wp = ap->ua_null_walk.uaw_next; wp != &ap->ua_null_walk;
+	    wp = wp->uaw_next) {
+		if (wp->uaw_robust) {
+			if (elem == wp->uaw_next_result)
+				(void) _avl_walk_advance(wp, ap);
+		} else if (wp->uaw_next_result != NULL) {
+			uu_panic("uu_avl_remove(%p, %p): active non-robust "
+			    "walker\n", (void *)ap, elem);
+		}
+	}
+
+	avl_remove(&ap->ua_tree, elem);
+
+	na[0] = POOL_TO_MARKER(pp);
+	na[1] = 0;
+}
+
+void *
+uu_avl_teardown(uu_avl_t *ap, void **cookie)
+{
+	void *elem = avl_destroy_nodes(&ap->ua_tree, cookie);
+
+	if (elem != NULL) {
+		uu_avl_pool_t *pp = ap->ua_pool;
+		uintptr_t *na = NODE_ARRAY(pp, elem);
+
+		na[0] = POOL_TO_MARKER(pp);
+		na[1] = 0;
+	}
+	return (elem);
+}
+
+void *
+uu_avl_find(uu_avl_t *ap, void *elem, void *private, uu_avl_index_t *out)
+{
+	struct uu_avl_node_compare_info info;
+	void *result;
+
+	info.ac_compare = ap->ua_pool->uap_cmp;
+	info.ac_private = private;
+	info.ac_right = elem;
+	info.ac_found = NULL;
+
+	result = avl_find(&ap->ua_tree, &info, out);
+	if (out != NULL)
+		*out = INDEX_ENCODE(ap, *out);
+
+	if (ap->ua_debug && result != NULL)
+		uu_panic("uu_avl_find: internal error: avl_find succeeded\n");
+
+	return (info.ac_found);
+}
+
+void
+uu_avl_insert(uu_avl_t *ap, void *elem, uu_avl_index_t idx)
+{
+	if (ap->ua_debug) {
+		uu_avl_pool_t *pp = ap->ua_pool;
+		uintptr_t *na = NODE_ARRAY(pp, elem);
+
+		if (na[1] != 0)
+			uu_panic("uu_avl_insert(%p, %p, %p): node already "
+			    "in tree, or corrupt\n",
+			    (void *)ap, elem, (void *)idx);
+		if (na[0] == 0)
+			uu_panic("uu_avl_insert(%p, %p, %p): node not "
+			    "initialized\n",
+			    (void *)ap, elem, (void *)idx);
+		if (na[0] != POOL_TO_MARKER(pp))
+			uu_panic("uu_avl_insert(%p, %p, %p): node from "
+			    "other pool, or corrupt\n",
+			    (void *)ap, elem, (void *)idx);
+
+		if (!INDEX_VALID(ap, idx))
+			uu_panic("uu_avl_insert(%p, %p, %p): %s\n",
+			    (void *)ap, elem, (void *)idx,
+			    INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+
+		/*
+		 * invalidate outstanding uu_avl_index_ts.
+		 */
+		ap->ua_index = INDEX_NEXT(ap->ua_index);
+	}
+	avl_insert(&ap->ua_tree, elem, INDEX_DECODE(idx));
+}
+
+void *
+uu_avl_nearest_next(uu_avl_t *ap, uu_avl_index_t idx)
+{
+	if (ap->ua_debug && !INDEX_VALID(ap, idx))
+		uu_panic("uu_avl_nearest_next(%p, %p): %s\n",
+		    (void *)ap, (void *)idx, INDEX_CHECK(idx)?
+		    "outdated index" : "invalid index");
+	return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_AFTER));
+}
+
+void *
+uu_avl_nearest_prev(uu_avl_t *ap, uu_avl_index_t idx)
+{
+	if (ap->ua_debug && !INDEX_VALID(ap, idx))
+		uu_panic("uu_avl_nearest_prev(%p, %p): %s\n",
+		    (void *)ap, (void *)idx, INDEX_CHECK(idx)?
+		    "outdated index" : "invalid index");
+	return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_BEFORE));
+}
+
+/*
+ * called from uu_lockup() and uu_release(), as part of our fork1()-safety.
+ */
+void
+uu_avl_lockup(void)
+{
+	uu_avl_pool_t *pp;
+
+	(void) pthread_mutex_lock(&uu_apool_list_lock);
+	for (pp = uu_null_apool.uap_next; pp != &uu_null_apool;
+	    pp = pp->uap_next)
+		(void) pthread_mutex_lock(&pp->uap_lock);
+}
+
+void
+uu_avl_release(void)
+{
+	uu_avl_pool_t *pp;
+
+	for (pp = uu_null_apool.uap_next; pp != &uu_null_apool;
+	    pp = pp->uap_next)
+		(void) pthread_mutex_unlock(&pp->uap_lock);
+	(void) pthread_mutex_unlock(&uu_apool_list_lock);
+}
diff --git a/lib/libuutil/uu_dprintf.c b/lib/libuutil/uu_dprintf.c
new file mode 100644
index 000000000..5b990a52b
--- /dev/null
+++ b/lib/libuutil/uu_dprintf.c
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <errno.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#define	FACILITY_FMT	"%s (%s): "
+
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+static const char *
+strseverity(uu_dprintf_severity_t severity)
+{
+	switch (severity) {
+	case UU_DPRINTF_SILENT:
+		return (dgettext(TEXT_DOMAIN, "silent"));
+	case UU_DPRINTF_FATAL:
+		return (dgettext(TEXT_DOMAIN, "FATAL"));
+	case UU_DPRINTF_WARNING:
+		return (dgettext(TEXT_DOMAIN, "WARNING"));
+	case UU_DPRINTF_NOTICE:
+		return (dgettext(TEXT_DOMAIN, "note"));
+	case UU_DPRINTF_INFO:
+		return (dgettext(TEXT_DOMAIN, "info"));
+	case UU_DPRINTF_DEBUG:
+		return (dgettext(TEXT_DOMAIN, "debug"));
+	default:
+		return (dgettext(TEXT_DOMAIN, "unspecified"));
+	}
+}
+
+uu_dprintf_t *
+uu_dprintf_create(const char *name, uu_dprintf_severity_t severity,
+    uint_t flags)
+{
+	uu_dprintf_t *D;
+
+	if (uu_check_name(name, UU_NAME_DOMAIN) == -1) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (NULL);
+	}
+
+	if ((D = uu_zalloc(sizeof (uu_dprintf_t))) == NULL)
+		return (NULL);
+
+	if (name != NULL) {
+		D->uud_name = strdup(name);
+		if (D->uud_name == NULL) {
+			uu_free(D);
+			return (NULL);
+		}
+	} else {
+		D->uud_name = NULL;
+	}
+
+	D->uud_severity = severity;
+	D->uud_flags = flags;
+
+	return (D);
+}
+
+/*PRINTFLIKE3*/
+void
+uu_dprintf(uu_dprintf_t *D, uu_dprintf_severity_t severity,
+    const char *format, ...)
+{
+	va_list alist;
+
+	/* XXX Assert that severity is not UU_DPRINTF_SILENT. */
+
+	if (severity > D->uud_severity)
+		return;
+
+	(void) fprintf(stderr, FACILITY_FMT, D->uud_name,
+	    strseverity(severity));
+
+	va_start(alist, format);
+	(void) vfprintf(stderr, format, alist);
+	va_end(alist);
+}
+
+void
+uu_dprintf_destroy(uu_dprintf_t *D)
+{
+	if (D->uud_name)
+		free(D->uud_name);
+
+	uu_free(D);
+}
+
+const char *
+uu_dprintf_getname(uu_dprintf_t *D)
+{
+	return (D->uud_name);
+}
diff --git a/lib/libuutil/uu_ident.c b/lib/libuutil/uu_ident.c
new file mode 100644
index 000000000..9a643845f
--- /dev/null
+++ b/lib/libuutil/uu_ident.c
@@ -0,0 +1,122 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <string.h>
+
+/*
+ * We require names of the form:
+ *	[provider,]identifier[/[provider,]identifier]...
+ *
+ * Where provider is either a stock symbol (SUNW) or a java-style reversed
+ * domain name (com.sun).
+ *
+ * Both providers and identifiers must start with a letter, and may
+ * only contain alphanumerics, dashes, and underlines.  Providers
+ * may also contain periods.
+ *
+ * Note that we do _not_ use the macros in <ctype.h>, since they are affected
+ * by the current locale settings.
+ */
+
+#define	IS_ALPHA(c) \
+	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+
+#define	IS_DIGIT(c) \
+	((c) >= '0' && (c) <= '9')
+
+static int
+is_valid_ident(const char *s, const char *e, int allowdot)
+{
+	char c;
+
+	if (s >= e)
+		return (0);		/* name is empty */
+
+	c = *s++;
+	if (!IS_ALPHA(c))
+		return (0);		/* does not start with letter */
+
+	while (s < e && (c = *s++) != 0) {
+		if (IS_ALPHA(c) || IS_DIGIT(c) || c == '-' || c == '_' ||
+		    (allowdot && c == '.'))
+			continue;
+		return (0);		/* invalid character */
+	}
+	return (1);
+}
+
+static int
+is_valid_component(const char *b, const char *e, uint_t flags)
+{
+	char *sp;
+
+	if (flags & UU_NAME_DOMAIN) {
+		sp = strchr(b, ',');
+		if (sp != NULL && sp < e) {
+			if (!is_valid_ident(b, sp, 1))
+				return (0);
+			b = sp + 1;
+		}
+	}
+
+	return (is_valid_ident(b, e, 0));
+}
+
+int
+uu_check_name(const char *name, uint_t flags)
+{
+	const char *end = name + strlen(name);
+	const char *p;
+
+	if (flags & ~(UU_NAME_DOMAIN | UU_NAME_PATH)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (-1);
+	}
+
+	if (!(flags & UU_NAME_PATH)) {
+		if (!is_valid_component(name, end, flags))
+			goto bad;
+		return (0);
+	}
+
+	while ((p = strchr(name, '/')) != NULL) {
+		if (!is_valid_component(name, p - 1, flags))
+			goto bad;
+		name = p + 1;
+	}
+	if (!is_valid_component(name, end, flags))
+		goto bad;
+
+	return (0);
+
+bad:
+	uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+	return (-1);
+}
diff --git a/lib/libuutil/uu_list.c b/lib/libuutil/uu_list.c
new file mode 100644
index 000000000..35c7ba800
--- /dev/null
+++ b/lib/libuutil/uu_list.c
@@ -0,0 +1,718 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#define	ELEM_TO_NODE(lp, e) \
+	((uu_list_node_impl_t *)((uintptr_t)(e) + (lp)->ul_offset))
+
+#define	NODE_TO_ELEM(lp, n) \
+	((void *)((uintptr_t)(n) - (lp)->ul_offset))
+
+/*
+ * uu_list_index_ts define a location for insertion.  They are simply a
+ * pointer to the object after the insertion point.  We store a mark
+ * in the low-bits of the index, to help prevent mistakes.
+ *
+ * When debugging, the index mark changes on every insert and delete, to
+ * catch stale references.
+ */
+#define	INDEX_MAX		(sizeof (uintptr_t) - 1)
+#define	INDEX_NEXT(m)		(((m) == INDEX_MAX)? 1 : ((m) + 1) & INDEX_MAX)
+
+#define	INDEX_TO_NODE(i)	((uu_list_node_impl_t *)((i) & ~INDEX_MAX))
+#define	NODE_TO_INDEX(p, n)	(((uintptr_t)(n) & ~INDEX_MAX) | (p)->ul_index)
+#define	INDEX_VALID(p, i)	(((i) & INDEX_MAX) == (p)->ul_index)
+#define	INDEX_CHECK(i)		(((i) & INDEX_MAX) != 0)
+
+#define	POOL_TO_MARKER(pp) ((void *)((uintptr_t)(pp) | 1))
+
+static uu_list_pool_t	uu_null_lpool = { &uu_null_lpool, &uu_null_lpool };
+static pthread_mutex_t	uu_lpool_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+uu_list_pool_t *
+uu_list_pool_create(const char *name, size_t objsize,
+    size_t nodeoffset, uu_compare_fn_t *compare_func, uint32_t flags)
+{
+	uu_list_pool_t *pp, *next, *prev;
+
+	if (name == NULL ||
+	    uu_check_name(name, UU_NAME_DOMAIN) == -1 ||
+	    nodeoffset + sizeof (uu_list_node_t) > objsize) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (NULL);
+	}
+
+	if (flags & ~UU_LIST_POOL_DEBUG) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	pp = uu_zalloc(sizeof (uu_list_pool_t));
+	if (pp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	(void) strlcpy(pp->ulp_name, name, sizeof (pp->ulp_name));
+	pp->ulp_nodeoffset = nodeoffset;
+	pp->ulp_objsize = objsize;
+	pp->ulp_cmp = compare_func;
+	if (flags & UU_LIST_POOL_DEBUG)
+		pp->ulp_debug = 1;
+	pp->ulp_last_index = 0;
+
+	(void) pthread_mutex_init(&pp->ulp_lock, NULL);
+
+	pp->ulp_null_list.ul_next_enc = UU_PTR_ENCODE(&pp->ulp_null_list);
+	pp->ulp_null_list.ul_prev_enc = UU_PTR_ENCODE(&pp->ulp_null_list);
+
+	(void) pthread_mutex_lock(&uu_lpool_list_lock);
+	pp->ulp_next = next = &uu_null_lpool;
+	pp->ulp_prev = prev = next->ulp_prev;
+	next->ulp_prev = pp;
+	prev->ulp_next = pp;
+	(void) pthread_mutex_unlock(&uu_lpool_list_lock);
+
+	return (pp);
+}
+
+void
+uu_list_pool_destroy(uu_list_pool_t *pp)
+{
+	if (pp->ulp_debug) {
+		if (pp->ulp_null_list.ul_next_enc !=
+		    UU_PTR_ENCODE(&pp->ulp_null_list) ||
+		    pp->ulp_null_list.ul_prev_enc !=
+		    UU_PTR_ENCODE(&pp->ulp_null_list)) {
+			uu_panic("uu_list_pool_destroy: Pool \"%.*s\" (%p) has "
+			    "outstanding lists, or is corrupt.\n",
+			    (int)sizeof (pp->ulp_name), pp->ulp_name,
+			    (void *)pp);
+		}
+	}
+	(void) pthread_mutex_lock(&uu_lpool_list_lock);
+	pp->ulp_next->ulp_prev = pp->ulp_prev;
+	pp->ulp_prev->ulp_next = pp->ulp_next;
+	(void) pthread_mutex_unlock(&uu_lpool_list_lock);
+	pp->ulp_prev = NULL;
+	pp->ulp_next = NULL;
+	uu_free(pp);
+}
+
+void
+uu_list_node_init(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
+{
+	uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg;
+
+	if (pp->ulp_debug) {
+		uintptr_t offset = (uintptr_t)np - (uintptr_t)base;
+		if (offset + sizeof (*np) > pp->ulp_objsize) {
+			uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't fit in object (size %ld)\n",
+			    base, (void *)np, (void *)pp, pp->ulp_name,
+			    (long)offset, (long)pp->ulp_objsize);
+		}
+		if (offset != pp->ulp_nodeoffset) {
+			uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
+			    "offset %ld doesn't match pool's offset (%ld)\n",
+			    base, (void *)np, (void *)pp, pp->ulp_name,
+			    (long)offset, (long)pp->ulp_objsize);
+		}
+	}
+	np->uln_next = POOL_TO_MARKER(pp);
+	np->uln_prev = NULL;
+}
+
+void
+uu_list_node_fini(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
+{
+	uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg;
+
+	if (pp->ulp_debug) {
+		if (np->uln_next == NULL &&
+		    np->uln_prev == NULL) {
+			uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
+			    "node already finied\n",
+			    base, (void *)np_arg, (void *)pp, pp->ulp_name);
+		}
+		if (np->uln_next != POOL_TO_MARKER(pp) ||
+		    np->uln_prev != NULL) {
+			uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
+			    "node corrupt or on list\n",
+			    base, (void *)np_arg, (void *)pp, pp->ulp_name);
+		}
+	}
+	np->uln_next = NULL;
+	np->uln_prev = NULL;
+}
+
+uu_list_t *
+uu_list_create(uu_list_pool_t *pp, void *parent, uint32_t flags)
+{
+	uu_list_t *lp, *next, *prev;
+
+	if (flags & ~(UU_LIST_DEBUG | UU_LIST_SORTED)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	if ((flags & UU_LIST_SORTED) && pp->ulp_cmp == NULL) {
+		if (pp->ulp_debug)
+			uu_panic("uu_list_create(%p, ...): requested "
+			    "UU_LIST_SORTED, but pool has no comparison func\n",
+			    (void *)pp);
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (NULL);
+	}
+
+	lp = uu_zalloc(sizeof (*lp));
+	if (lp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	lp->ul_pool = pp;
+	lp->ul_parent_enc = UU_PTR_ENCODE(parent);
+	lp->ul_offset = pp->ulp_nodeoffset;
+	lp->ul_debug = pp->ulp_debug || (flags & UU_LIST_DEBUG);
+	lp->ul_sorted = (flags & UU_LIST_SORTED);
+	lp->ul_numnodes = 0;
+	lp->ul_index = (pp->ulp_last_index = INDEX_NEXT(pp->ulp_last_index));
+
+	lp->ul_null_node.uln_next = &lp->ul_null_node;
+	lp->ul_null_node.uln_prev = &lp->ul_null_node;
+
+	lp->ul_null_walk.ulw_next = &lp->ul_null_walk;
+	lp->ul_null_walk.ulw_prev = &lp->ul_null_walk;
+
+	(void) pthread_mutex_lock(&pp->ulp_lock);
+	next = &pp->ulp_null_list;
+	prev = UU_PTR_DECODE(next->ul_prev_enc);
+	lp->ul_next_enc = UU_PTR_ENCODE(next);
+	lp->ul_prev_enc = UU_PTR_ENCODE(prev);
+	next->ul_prev_enc = UU_PTR_ENCODE(lp);
+	prev->ul_next_enc = UU_PTR_ENCODE(lp);
+	(void) pthread_mutex_unlock(&pp->ulp_lock);
+
+	return (lp);
+}
+
+void
+uu_list_destroy(uu_list_t *lp)
+{
+	uu_list_pool_t *pp = lp->ul_pool;
+
+	if (lp->ul_debug) {
+		if (lp->ul_null_node.uln_next != &lp->ul_null_node ||
+		    lp->ul_null_node.uln_prev != &lp->ul_null_node) {
+			uu_panic("uu_list_destroy(%p):  list not empty\n",
+			    (void *)lp);
+		}
+		if (lp->ul_numnodes != 0) {
+			uu_panic("uu_list_destroy(%p):  numnodes is nonzero, "
+			    "but list is empty\n", (void *)lp);
+		}
+		if (lp->ul_null_walk.ulw_next != &lp->ul_null_walk ||
+		    lp->ul_null_walk.ulw_prev != &lp->ul_null_walk) {
+			uu_panic("uu_list_destroy(%p):  outstanding walkers\n",
+			    (void *)lp);
+		}
+	}
+
+	(void) pthread_mutex_lock(&pp->ulp_lock);
+	UU_LIST_PTR(lp->ul_next_enc)->ul_prev_enc = lp->ul_prev_enc;
+	UU_LIST_PTR(lp->ul_prev_enc)->ul_next_enc = lp->ul_next_enc;
+	(void) pthread_mutex_unlock(&pp->ulp_lock);
+	lp->ul_prev_enc = UU_PTR_ENCODE(NULL);
+	lp->ul_next_enc = UU_PTR_ENCODE(NULL);
+	lp->ul_pool = NULL;
+	uu_free(lp);
+}
+
+static void
+list_insert(uu_list_t *lp, uu_list_node_impl_t *np, uu_list_node_impl_t *prev,
+    uu_list_node_impl_t *next)
+{
+	if (lp->ul_debug) {
+		if (next->uln_prev != prev || prev->uln_next != next)
+			uu_panic("insert(%p): internal error: %p and %p not "
+			    "neighbors\n", (void *)lp, (void *)next,
+			    (void *)prev);
+
+		if (np->uln_next != POOL_TO_MARKER(lp->ul_pool) ||
+		    np->uln_prev != NULL) {
+			uu_panic("insert(%p): elem %p node %p corrupt, "
+			    "not initialized, or already in a list.\n",
+			    (void *)lp, NODE_TO_ELEM(lp, np), (void *)np);
+		}
+		/*
+		 * invalidate outstanding uu_list_index_ts.
+		 */
+		lp->ul_index = INDEX_NEXT(lp->ul_index);
+	}
+	np->uln_next = next;
+	np->uln_prev = prev;
+	next->uln_prev = np;
+	prev->uln_next = np;
+
+	lp->ul_numnodes++;
+}
+
+void
+uu_list_insert(uu_list_t *lp, void *elem, uu_list_index_t idx)
+{
+	uu_list_node_impl_t *np;
+
+	np = INDEX_TO_NODE(idx);
+	if (np == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (!INDEX_VALID(lp, idx))
+			uu_panic("uu_list_insert(%p, %p, %p): %s\n",
+			    (void *)lp, elem, (void *)idx,
+			    INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_insert(%p, %p, %p): out-of-date "
+			    "index\n", (void *)lp, elem, (void *)idx);
+	}
+
+	list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
+}
+
+void *
+uu_list_find(uu_list_t *lp, void *elem, void *private, uu_list_index_t *out)
+{
+	int sorted = lp->ul_sorted;
+	uu_compare_fn_t *func = lp->ul_pool->ulp_cmp;
+	uu_list_node_impl_t *np;
+
+	if (func == NULL) {
+		if (out != NULL)
+			*out = 0;
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (NULL);
+	}
+	for (np = lp->ul_null_node.uln_next; np != &lp->ul_null_node;
+	    np = np->uln_next) {
+		void *ep = NODE_TO_ELEM(lp, np);
+		int cmp = func(ep, elem, private);
+		if (cmp == 0) {
+			if (out != NULL)
+				*out = NODE_TO_INDEX(lp, np);
+			return (ep);
+		}
+		if (sorted && cmp > 0) {
+			if (out != NULL)
+				*out = NODE_TO_INDEX(lp, np);
+			return (NULL);
+		}
+	}
+	if (out != NULL)
+		*out = NODE_TO_INDEX(lp, 0);
+	return (NULL);
+}
+
+void *
+uu_list_nearest_next(uu_list_t *lp, uu_list_index_t idx)
+{
+	uu_list_node_impl_t *np = INDEX_TO_NODE(idx);
+
+	if (np == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (!INDEX_VALID(lp, idx))
+			uu_panic("uu_list_nearest_next(%p, %p): %s\n",
+			    (void *)lp, (void *)idx,
+			    INDEX_CHECK(idx)? "outdated index" :
+			    "invalid index");
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_nearest_next(%p, %p): out-of-date "
+			    "index\n", (void *)lp, (void *)idx);
+	}
+
+	if (np == &lp->ul_null_node)
+		return (NULL);
+	else
+		return (NODE_TO_ELEM(lp, np));
+}
+
+void *
+uu_list_nearest_prev(uu_list_t *lp, uu_list_index_t idx)
+{
+	uu_list_node_impl_t *np = INDEX_TO_NODE(idx);
+
+	if (np == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (!INDEX_VALID(lp, idx))
+			uu_panic("uu_list_nearest_prev(%p, %p): %s\n",
+			    (void *)lp, (void *)idx, INDEX_CHECK(idx)?
+			    "outdated index" : "invalid index");
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_nearest_prev(%p, %p): out-of-date "
+			    "index\n", (void *)lp, (void *)idx);
+	}
+
+	if ((np = np->uln_prev) == &lp->ul_null_node)
+		return (NULL);
+	else
+		return (NODE_TO_ELEM(lp, np));
+}
+
+static void
+list_walk_init(uu_list_walk_t *wp, uu_list_t *lp, uint32_t flags)
+{
+	uu_list_walk_t *next, *prev;
+
+	int robust = (flags & UU_WALK_ROBUST);
+	int direction = (flags & UU_WALK_REVERSE)? -1 : 1;
+
+	(void) memset(wp, 0, sizeof (*wp));
+	wp->ulw_list = lp;
+	wp->ulw_robust = robust;
+	wp->ulw_dir = direction;
+	if (direction > 0)
+		wp->ulw_next_result = lp->ul_null_node.uln_next;
+	else
+		wp->ulw_next_result = lp->ul_null_node.uln_prev;
+
+	if (lp->ul_debug || robust) {
+		/*
+		 * Add this walker to the list's list of walkers so
+		 * uu_list_remove() can advance us if somebody tries to
+		 * remove ulw_next_result.
+		 */
+		wp->ulw_next = next = &lp->ul_null_walk;
+		wp->ulw_prev = prev = next->ulw_prev;
+		next->ulw_prev = wp;
+		prev->ulw_next = wp;
+	}
+}
+
+static uu_list_node_impl_t *
+list_walk_advance(uu_list_walk_t *wp, uu_list_t *lp)
+{
+	uu_list_node_impl_t *np = wp->ulw_next_result;
+	uu_list_node_impl_t *next;
+
+	if (np == &lp->ul_null_node)
+		return (NULL);
+
+	next = (wp->ulw_dir > 0)? np->uln_next : np->uln_prev;
+
+	wp->ulw_next_result = next;
+	return (np);
+}
+
+static void
+list_walk_fini(uu_list_walk_t *wp)
+{
+	/* GLXXX debugging? */
+	if (wp->ulw_next != NULL) {
+		wp->ulw_next->ulw_prev = wp->ulw_prev;
+		wp->ulw_prev->ulw_next = wp->ulw_next;
+		wp->ulw_next = NULL;
+		wp->ulw_prev = NULL;
+	}
+	wp->ulw_list = NULL;
+	wp->ulw_next_result = NULL;
+}
+
+uu_list_walk_t *
+uu_list_walk_start(uu_list_t *lp, uint32_t flags)
+{
+	uu_list_walk_t *wp;
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (NULL);
+	}
+
+	wp = uu_zalloc(sizeof (*wp));
+	if (wp == NULL) {
+		uu_set_error(UU_ERROR_NO_MEMORY);
+		return (NULL);
+	}
+
+	list_walk_init(wp, lp, flags);
+	return (wp);
+}
+
+void *
+uu_list_walk_next(uu_list_walk_t *wp)
+{
+	uu_list_t *lp = wp->ulw_list;
+	uu_list_node_impl_t *np = list_walk_advance(wp, lp);
+
+	if (np == NULL)
+		return (NULL);
+
+	return (NODE_TO_ELEM(lp, np));
+}
+
+void
+uu_list_walk_end(uu_list_walk_t *wp)
+{
+	list_walk_fini(wp);
+	uu_free(wp);
+}
+
+int
+uu_list_walk(uu_list_t *lp, uu_walk_fn_t *func, void *private, uint32_t flags)
+{
+	uu_list_node_impl_t *np;
+
+	int status = UU_WALK_NEXT;
+
+	int robust = (flags & UU_WALK_ROBUST);
+	int reverse = (flags & UU_WALK_REVERSE);
+
+	if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
+		uu_set_error(UU_ERROR_UNKNOWN_FLAG);
+		return (-1);
+	}
+
+	if (lp->ul_debug || robust) {
+		uu_list_walk_t my_walk;
+		void *e;
+
+		list_walk_init(&my_walk, lp, flags);
+		while (status == UU_WALK_NEXT &&
+		    (e = uu_list_walk_next(&my_walk)) != NULL)
+			status = (*func)(e, private);
+		list_walk_fini(&my_walk);
+	} else {
+		if (!reverse) {
+			for (np = lp->ul_null_node.uln_next;
+			    status == UU_WALK_NEXT && np != &lp->ul_null_node;
+			    np = np->uln_next) {
+				status = (*func)(NODE_TO_ELEM(lp, np), private);
+			}
+		} else {
+			for (np = lp->ul_null_node.uln_prev;
+			    status == UU_WALK_NEXT && np != &lp->ul_null_node;
+			    np = np->uln_prev) {
+				status = (*func)(NODE_TO_ELEM(lp, np), private);
+			}
+		}
+	}
+	if (status >= 0)
+		return (0);
+	uu_set_error(UU_ERROR_CALLBACK_FAILED);
+	return (-1);
+}
+
+void
+uu_list_remove(uu_list_t *lp, void *elem)
+{
+	uu_list_node_impl_t *np = ELEM_TO_NODE(lp, elem);
+	uu_list_walk_t *wp;
+
+	if (lp->ul_debug) {
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_remove(%p, %p): elem not on list\n",
+			    (void *)lp, elem);
+		/*
+		 * invalidate outstanding uu_list_index_ts.
+		 */
+		lp->ul_index = INDEX_NEXT(lp->ul_index);
+	}
+
+	/*
+	 * robust walkers must be advanced.  In debug mode, non-robust
+	 * walkers are also on the list.  If there are any, it's an error.
+	 */
+	for (wp = lp->ul_null_walk.ulw_next; wp != &lp->ul_null_walk;
+	    wp = wp->ulw_next) {
+		if (wp->ulw_robust) {
+			if (np == wp->ulw_next_result)
+				(void) list_walk_advance(wp, lp);
+		} else if (wp->ulw_next_result != NULL) {
+			uu_panic("uu_list_remove(%p, %p): active non-robust "
+			    "walker\n", (void *)lp, elem);
+		}
+	}
+
+	np->uln_next->uln_prev = np->uln_prev;
+	np->uln_prev->uln_next = np->uln_next;
+
+	lp->ul_numnodes--;
+
+	np->uln_next = POOL_TO_MARKER(lp->ul_pool);
+	np->uln_prev = NULL;
+}
+
+void *
+uu_list_teardown(uu_list_t *lp, void **cookie)
+{
+	void *ep;
+
+	/*
+	 * XXX: disable list modification until list is empty
+	 */
+	if (lp->ul_debug && *cookie != NULL)
+		uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n",
+		    (void *)lp, (void *)cookie);
+
+	ep = uu_list_first(lp);
+	if (ep)
+		uu_list_remove(lp, ep);
+	return (ep);
+}
+
+int
+uu_list_insert_before(uu_list_t *lp, void *target, void *elem)
+{
+	uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target);
+
+	if (target == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_insert_before(%p, %p, %p): %p is "
+			    "not currently on a list\n",
+			    (void *)lp, target, elem, target);
+	}
+	if (lp->ul_sorted) {
+		if (lp->ul_debug)
+			uu_panic("uu_list_insert_before(%p, ...): list is "
+			    "UU_LIST_SORTED\n", (void *)lp);
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (-1);
+	}
+
+	list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
+	return (0);
+}
+
+int
+uu_list_insert_after(uu_list_t *lp, void *target, void *elem)
+{
+	uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target);
+
+	if (target == NULL)
+		np = &lp->ul_null_node;
+
+	if (lp->ul_debug) {
+		if (np->uln_prev == NULL)
+			uu_panic("uu_list_insert_after(%p, %p, %p): %p is "
+			    "not currently on a list\n",
+			    (void *)lp, target, elem, target);
+	}
+	if (lp->ul_sorted) {
+		if (lp->ul_debug)
+			uu_panic("uu_list_insert_after(%p, ...): list is "
+			    "UU_LIST_SORTED\n", (void *)lp);
+		uu_set_error(UU_ERROR_NOT_SUPPORTED);
+		return (-1);
+	}
+
+	list_insert(lp, ELEM_TO_NODE(lp, elem), np, np->uln_next);
+	return (0);
+}
+
+size_t
+uu_list_numnodes(uu_list_t *lp)
+{
+	return (lp->ul_numnodes);
+}
+
+void *
+uu_list_first(uu_list_t *lp)
+{
+	uu_list_node_impl_t *n = lp->ul_null_node.uln_next;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+void *
+uu_list_last(uu_list_t *lp)
+{
+	uu_list_node_impl_t *n = lp->ul_null_node.uln_prev;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+void *
+uu_list_next(uu_list_t *lp, void *elem)
+{
+	uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem);
+
+	n = n->uln_next;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+void *
+uu_list_prev(uu_list_t *lp, void *elem)
+{
+	uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem);
+
+	n = n->uln_prev;
+	if (n == &lp->ul_null_node)
+		return (NULL);
+	return (NODE_TO_ELEM(lp, n));
+}
+
+/*
+ * called from uu_lockup() and uu_release(), as part of our fork1()-safety.
+ */
+void
+uu_list_lockup(void)
+{
+	uu_list_pool_t *pp;
+
+	(void) pthread_mutex_lock(&uu_lpool_list_lock);
+	for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool;
+	    pp = pp->ulp_next)
+		(void) pthread_mutex_lock(&pp->ulp_lock);
+}
+
+void
+uu_list_release(void)
+{
+	uu_list_pool_t *pp;
+
+	for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool;
+	    pp = pp->ulp_next)
+		(void) pthread_mutex_unlock(&pp->ulp_lock);
+	(void) pthread_mutex_unlock(&uu_lpool_list_lock);
+}
diff --git a/lib/libuutil/uu_misc.c b/lib/libuutil/uu_misc.c
new file mode 100644
index 000000000..74ec177c1
--- /dev/null
+++ b/lib/libuutil/uu_misc.c
@@ -0,0 +1,255 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <libintl.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/debug.h>
+#include <thread.h>
+#include <unistd.h>
+
+#if !defined(TEXT_DOMAIN)
+#define	TEXT_DOMAIN "SYS_TEST"
+#endif
+
+/*
+ * All of the old code under !defined(PTHREAD_ONCE_KEY_NP)
+ * is here to enable the building of a native version of
+ * libuutil.so when the build machine has not yet been upgraded
+ * to a version of libc that provides pthread_key_create_once_np().
+ * It should all be deleted when solaris_nevada ships.
+ * The code is not MT-safe in a relaxed memory model.
+ */
+
+#if defined(PTHREAD_ONCE_KEY_NP)
+static pthread_key_t	uu_error_key = PTHREAD_ONCE_KEY_NP;
+#else	/* PTHREAD_ONCE_KEY_NP */
+static pthread_key_t	uu_error_key = 0;
+static pthread_mutex_t	uu_key_lock = PTHREAD_MUTEX_INITIALIZER;
+#endif	/* PTHREAD_ONCE_KEY_NP */
+
+static int		uu_error_key_setup = 0;
+
+static pthread_mutex_t	uu_panic_lock = PTHREAD_MUTEX_INITIALIZER;
+/* LINTED static unused */
+static const char	*uu_panic_format;
+/* LINTED static unused */
+static va_list		uu_panic_args;
+static pthread_t	uu_panic_thread;
+
+static uint32_t		_uu_main_error;
+
+void
+uu_set_error(uint_t code)
+{
+	if (thr_main() != 0) {
+		_uu_main_error = code;
+		return;
+	}
+#if defined(PTHREAD_ONCE_KEY_NP)
+	if (pthread_key_create_once_np(&uu_error_key, NULL) != 0)
+		uu_error_key_setup = -1;
+	else
+		uu_error_key_setup = 1;
+#else	/* PTHREAD_ONCE_KEY_NP */
+	if (uu_error_key_setup == 0) {
+		(void) pthread_mutex_lock(&uu_key_lock);
+		if (uu_error_key_setup == 0) {
+			if (pthread_key_create(&uu_error_key, NULL) != 0)
+				uu_error_key_setup = -1;
+			else
+				uu_error_key_setup = 1;
+		}
+		(void) pthread_mutex_unlock(&uu_key_lock);
+	}
+#endif	/* PTHREAD_ONCE_KEY_NP */
+	if (uu_error_key_setup > 0)
+		(void) pthread_setspecific(uu_error_key,
+		    (void *)(uintptr_t)code);
+}
+
+uint32_t
+uu_error(void)
+{
+	if (thr_main() != 0)
+		return (_uu_main_error);
+
+	if (uu_error_key_setup < 0)	/* can't happen? */
+		return (UU_ERROR_UNKNOWN);
+
+	/*
+	 * Because UU_ERROR_NONE == 0, if uu_set_error() was
+	 * never called, then this will return UU_ERROR_NONE:
+	 */
+	return ((uint32_t)(uintptr_t)pthread_getspecific(uu_error_key));
+}
+
+const char *
+uu_strerror(uint32_t code)
+{
+	const char *str;
+
+	switch (code) {
+	case UU_ERROR_NONE:
+		str = dgettext(TEXT_DOMAIN, "No error");
+		break;
+
+	case UU_ERROR_INVALID_ARGUMENT:
+		str = dgettext(TEXT_DOMAIN, "Invalid argument");
+		break;
+
+	case UU_ERROR_UNKNOWN_FLAG:
+		str = dgettext(TEXT_DOMAIN, "Unknown flag passed");
+		break;
+
+	case UU_ERROR_NO_MEMORY:
+		str = dgettext(TEXT_DOMAIN, "Out of memory");
+		break;
+
+	case UU_ERROR_CALLBACK_FAILED:
+		str = dgettext(TEXT_DOMAIN, "Callback-initiated failure");
+		break;
+
+	case UU_ERROR_NOT_SUPPORTED:
+		str = dgettext(TEXT_DOMAIN, "Operation not supported");
+		break;
+
+	case UU_ERROR_EMPTY:
+		str = dgettext(TEXT_DOMAIN, "No value provided");
+		break;
+
+	case UU_ERROR_UNDERFLOW:
+		str = dgettext(TEXT_DOMAIN, "Value too small");
+		break;
+
+	case UU_ERROR_OVERFLOW:
+		str = dgettext(TEXT_DOMAIN, "Value too large");
+		break;
+
+	case UU_ERROR_INVALID_CHAR:
+		str = dgettext(TEXT_DOMAIN,
+		    "Value contains unexpected character");
+		break;
+
+	case UU_ERROR_INVALID_DIGIT:
+		str = dgettext(TEXT_DOMAIN,
+		    "Value contains digit not in base");
+		break;
+
+	case UU_ERROR_SYSTEM:
+		str = dgettext(TEXT_DOMAIN, "Underlying system error");
+		break;
+
+	case UU_ERROR_UNKNOWN:
+		str = dgettext(TEXT_DOMAIN, "Error status not known");
+		break;
+
+	default:
+		errno = ESRCH;
+		str = NULL;
+		break;
+	}
+	return (str);
+}
+
+void
+uu_panic(const char *format, ...)
+{
+	va_list args;
+
+	va_start(args, format);
+
+	(void) pthread_mutex_lock(&uu_panic_lock);
+	if (uu_panic_thread == 0) {
+		uu_panic_thread = pthread_self();
+		uu_panic_format = format;
+		va_copy(uu_panic_args, args);
+	}
+	(void) pthread_mutex_unlock(&uu_panic_lock);
+
+	(void) vfprintf(stderr, format, args);
+
+	if (uu_panic_thread == pthread_self())
+		abort();
+	else
+		for (;;)
+			(void) pause();
+}
+
+int
+assfail(const char *astring, const char *file, int line)
+{
+	__assert(astring, file, line);
+	/*NOTREACHED*/
+	return (0);
+}
+
+static void
+uu_lockup(void)
+{
+	(void) pthread_mutex_lock(&uu_panic_lock);
+#if !defined(PTHREAD_ONCE_KEY_NP)
+	(void) pthread_mutex_lock(&uu_key_lock);
+#endif
+	uu_avl_lockup();
+	uu_list_lockup();
+}
+
+static void
+uu_release(void)
+{
+	(void) pthread_mutex_unlock(&uu_panic_lock);
+#if !defined(PTHREAD_ONCE_KEY_NP)
+	(void) pthread_mutex_unlock(&uu_key_lock);
+#endif
+	uu_avl_release();
+	uu_list_release();
+}
+
+static void
+uu_release_child(void)
+{
+	uu_panic_format = NULL;
+	uu_panic_thread = 0;
+
+	uu_release();
+}
+
+#pragma init(uu_init)
+static void
+uu_init(void)
+{
+	(void) pthread_atfork(uu_lockup, uu_release, uu_release_child);
+}
diff --git a/lib/libuutil/uu_open.c b/lib/libuutil/uu_open.c
new file mode 100644
index 000000000..7256662e3
--- /dev/null
+++ b/lib/libuutil/uu_open.c
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <sys/time.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#ifdef _LP64
+#define	TMPPATHFMT	"%s/uu%ld"
+#else /* _LP64 */
+#define	TMPPATHFMT	"%s/uu%lld"
+#endif /* _LP64 */
+
+/*ARGSUSED*/
+int
+uu_open_tmp(const char *dir, uint_t uflags)
+{
+	int f;
+	char *fname = uu_zalloc(PATH_MAX);
+
+	if (fname == NULL)
+		return (-1);
+
+	for (;;) {
+		(void) snprintf(fname, PATH_MAX, "%s/uu%lld", dir, gethrtime());
+
+		f = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600);
+
+		if (f >= 0 || errno != EEXIST)
+			break;
+	}
+
+	if (f >= 0)
+		(void) unlink(fname);
+
+	uu_free(fname);
+
+	return (f);
+}
diff --git a/lib/libuutil/uu_pname.c b/lib/libuutil/uu_pname.c
new file mode 100644
index 000000000..3307a26dc
--- /dev/null
+++ b/lib/libuutil/uu_pname.c
@@ -0,0 +1,207 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <libintl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <errno.h>
+#include <wchar.h>
+#include <unistd.h>
+
+static const char PNAME_FMT[] = "%s: ";
+static const char ERRNO_FMT[] = ": %s\n";
+
+static const char *pname;
+
+static void
+uu_die_internal(int status, const char *format, va_list alist) __NORETURN;
+
+int uu_exit_ok_value = EXIT_SUCCESS;
+int uu_exit_fatal_value = EXIT_FAILURE;
+int uu_exit_usage_value = 2;
+
+int *
+uu_exit_ok(void)
+{
+	return (&uu_exit_ok_value);
+}
+
+int *
+uu_exit_fatal(void)
+{
+	return (&uu_exit_fatal_value);
+}
+
+int *
+uu_exit_usage(void)
+{
+	return (&uu_exit_usage_value);
+}
+
+void
+uu_alt_exit(int profile)
+{
+	switch (profile) {
+	case UU_PROFILE_DEFAULT:
+		uu_exit_ok_value = EXIT_SUCCESS;
+		uu_exit_fatal_value = EXIT_FAILURE;
+		uu_exit_usage_value = 2;
+		break;
+	case UU_PROFILE_LAUNCHER:
+		uu_exit_ok_value = EXIT_SUCCESS;
+		uu_exit_fatal_value = 124;
+		uu_exit_usage_value = 125;
+		break;
+	}
+}
+
+static void
+uu_warn_internal(int err, const char *format, va_list alist)
+{
+	if (pname != NULL)
+		(void) fprintf(stderr, PNAME_FMT, pname);
+
+	(void) vfprintf(stderr, format, alist);
+
+	if (strrchr(format, '\n') == NULL)
+		(void) fprintf(stderr, ERRNO_FMT, strerror(err));
+}
+
+void
+uu_vwarn(const char *format, va_list alist)
+{
+	uu_warn_internal(errno, format, alist);
+}
+
+/*PRINTFLIKE1*/
+void
+uu_warn(const char *format, ...)
+{
+	va_list alist;
+	va_start(alist, format);
+	uu_warn_internal(errno, format, alist);
+	va_end(alist);
+}
+
+static void
+uu_die_internal(int status, const char *format, va_list alist)
+{
+	uu_warn_internal(errno, format, alist);
+#ifdef DEBUG
+	{
+		char *cp;
+
+		if (!issetugid()) {
+			cp = getenv("UU_DIE_ABORTS");
+			if (cp != NULL && *cp != '\0')
+				abort();
+		}
+	}
+#endif
+	exit(status);
+}
+
+void
+uu_vdie(const char *format, va_list alist)
+{
+	uu_die_internal(UU_EXIT_FATAL, format, alist);
+}
+
+/*PRINTFLIKE1*/
+void
+uu_die(const char *format, ...)
+{
+	va_list alist;
+	va_start(alist, format);
+	uu_die_internal(UU_EXIT_FATAL, format, alist);
+	va_end(alist);
+}
+
+void
+uu_vxdie(int status, const char *format, va_list alist)
+{
+	uu_die_internal(status, format, alist);
+}
+
+/*PRINTFLIKE2*/
+void
+uu_xdie(int status, const char *format, ...)
+{
+	va_list alist;
+	va_start(alist, format);
+	uu_die_internal(status, format, alist);
+	va_end(alist);
+}
+
+const char *
+uu_setpname(char *arg0)
+{
+	/*
+	 * Having a NULL argv[0], while uncommon, is possible.  It
+	 * makes more sense to handle this event in uu_setpname rather
+	 * than in each of its consumers.
+	 */
+	if (arg0 == NULL) {
+		pname = getexecname();
+		if (pname == NULL)
+			pname = "unknown_command";
+		return (pname);
+	}
+
+	/*
+	 * Guard against '/' at end of command invocation.
+	 */
+	for (;;) {
+		char *p = strrchr(arg0, '/');
+		if (p == NULL) {
+			pname = arg0;
+			break;
+		} else {
+			if (*(p + 1) == '\0') {
+				*p = '\0';
+				continue;
+			}
+
+			pname = p + 1;
+			break;
+		}
+	}
+
+	return (pname);
+}
+
+const char *
+uu_getpname(void)
+{
+	return (pname);
+}
diff --git a/lib/libuutil/uu_strtoint.c b/lib/libuutil/uu_strtoint.c
new file mode 100644
index 000000000..8fd114836
--- /dev/null
+++ b/lib/libuutil/uu_strtoint.c
@@ -0,0 +1,300 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "libuutil_common.h"
+
+#include <limits.h>
+#include <ctype.h>
+
+#define	MAX_BASE	36
+
+#define	IS_DIGIT(x)	((x) >= '0' && (x) <= '9')
+
+#define	CTOI(x) (((x) >= '0' && (x) <= '9') ? (x) - '0' : \
+	    ((x) >= 'a' && (x) <= 'z') ? (x) + 10 - 'a' : (x) + 10 - 'A')
+
+static int
+strtoint(const char *s_arg, uint64_t *out, uint32_t base, int sign)
+{
+	const unsigned char *s = (const unsigned char *)s_arg;
+
+	uint64_t val = 0;
+	uint64_t multmax;
+
+	unsigned c, i;
+
+	int neg = 0;
+
+	int bad_digit = 0;
+	int bad_char = 0;
+	int overflow = 0;
+
+	if (s == NULL || base == 1 || base > MAX_BASE) {
+		uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+		return (-1);
+	}
+
+	while ((c = *s) != 0 && isspace(c))
+		s++;
+
+	switch (c) {
+	case '-':
+		if (!sign)
+			overflow = 1;		/* becomes underflow below */
+		neg = 1;
+		/*FALLTHRU*/
+	case '+':
+		c = *++s;
+		break;
+	default:
+		break;
+	}
+
+	if (c == '\0') {
+		uu_set_error(UU_ERROR_EMPTY);
+		return (-1);
+	}
+
+	if (base == 0) {
+		if (c != '0')
+			base = 10;
+		else if (s[1] == 'x' || s[1] == 'X')
+			base = 16;
+		else
+			base = 8;
+	}
+
+	if (base == 16 && c == '0' && (s[1] == 'x' || s[1] == 'X'))
+		c = *(s += 2);
+
+	if ((val = CTOI(c)) >= base) {
+		if (IS_DIGIT(c))
+			bad_digit = 1;
+		else
+			bad_char = 1;
+		val = 0;
+	}
+
+	multmax = (uint64_t)UINT64_MAX / (uint64_t)base;
+
+	for (c = *++s; c != '\0'; c = *++s) {
+		if ((i = CTOI(c)) >= base) {
+			if (isspace(c))
+				break;
+			if (IS_DIGIT(c))
+				bad_digit = 1;
+			else
+				bad_char = 1;
+			i = 0;
+		}
+
+		if (val > multmax)
+			overflow = 1;
+
+		val *= base;
+		if ((uint64_t)UINT64_MAX - val < (uint64_t)i)
+			overflow = 1;
+
+		val += i;
+	}
+
+	while ((c = *s) != 0) {
+		if (!isspace(c))
+			bad_char = 1;
+		s++;
+	}
+
+	if (sign) {
+		if (neg) {
+			if (val > -(uint64_t)INT64_MIN)
+				overflow = 1;
+		} else {
+			if (val > INT64_MAX)
+				overflow = 1;
+		}
+	}
+
+	if (neg)
+		val = -val;
+
+	if (bad_char | bad_digit | overflow) {
+		if (bad_char)
+			uu_set_error(UU_ERROR_INVALID_CHAR);
+		else if (bad_digit)
+			uu_set_error(UU_ERROR_INVALID_DIGIT);
+		else if (overflow) {
+			if (neg)
+				uu_set_error(UU_ERROR_UNDERFLOW);
+			else
+				uu_set_error(UU_ERROR_OVERFLOW);
+		}
+		return (-1);
+	}
+
+	*out = val;
+	return (0);
+}
+
+int
+uu_strtoint(const char *s, void *v, size_t sz, int base,
+    int64_t min, int64_t max)
+{
+	uint64_t val_u;
+	int64_t val;
+
+	if (min > max)
+		goto bad_argument;
+
+	switch (sz) {
+	case 1:
+		if (max > INT8_MAX || min < INT8_MIN)
+			goto bad_argument;
+		break;
+	case 2:
+		if (max > INT16_MAX || min < INT16_MIN)
+			goto bad_argument;
+		break;
+	case 4:
+		if (max > INT32_MAX || min < INT32_MIN)
+			goto bad_argument;
+		break;
+	case 8:
+		if (max > INT64_MAX || min < INT64_MIN)
+			goto bad_argument;
+		break;
+	default:
+		goto bad_argument;
+	}
+
+	if (min == 0 && max == 0) {
+		min = -(1ULL << (8 * sz - 1));
+		max = (1ULL << (8 * sz - 1)) - 1;
+	}
+
+	if (strtoint(s, &val_u, base, 1) == -1)
+		return (-1);
+
+	val = (int64_t)val_u;
+
+	if (val < min) {
+		uu_set_error(UU_ERROR_UNDERFLOW);
+		return (-1);
+	} else if (val > max) {
+		uu_set_error(UU_ERROR_OVERFLOW);
+		return (-1);
+	}
+
+	switch (sz) {
+	case 1:
+		*(int8_t *)v = val;
+		return (0);
+	case 2:
+		*(int16_t *)v = val;
+		return (0);
+	case 4:
+		*(int32_t *)v = val;
+		return (0);
+	case 8:
+		*(int64_t *)v = val;
+		return (0);
+	default:
+		break;		/* fall through to bad_argument */
+	}
+
+bad_argument:
+	uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+	return (-1);
+}
+
+int
+uu_strtouint(const char *s, void *v, size_t sz, int base,
+    uint64_t min, uint64_t max)
+{
+	uint64_t val;
+
+	if (min > max)
+		goto bad_argument;
+
+	switch (sz) {
+	case 1:
+		if (max > UINT8_MAX)
+			goto bad_argument;
+		break;
+	case 2:
+		if (max > UINT16_MAX)
+			goto bad_argument;
+		break;
+	case 4:
+		if (max > UINT32_MAX)
+			goto bad_argument;
+		break;
+	case 8:
+		if (max > UINT64_MAX)
+			goto bad_argument;
+		break;
+	default:
+		goto bad_argument;
+	}
+
+	if (min == 0 && max == 0) {
+		/* we have to be careful, since << can overflow */
+		max = (1ULL << (8 * sz - 1)) * 2 - 1;
+	}
+
+	if (strtoint(s, &val, base, 0) == -1)
+		return (-1);
+
+	if (val < min) {
+		uu_set_error(UU_ERROR_UNDERFLOW);
+		return (-1);
+	} else if (val > max) {
+		uu_set_error(UU_ERROR_OVERFLOW);
+		return (-1);
+	}
+
+	switch (sz) {
+	case 1:
+		*(uint8_t *)v = val;
+		return (0);
+	case 2:
+		*(uint16_t *)v = val;
+		return (0);
+	case 4:
+		*(uint32_t *)v = val;
+		return (0);
+	case 8:
+		*(uint64_t *)v = val;
+		return (0);
+	default:
+		break;		/* shouldn't happen, fall through */
+	}
+
+bad_argument:
+	uu_set_error(UU_ERROR_INVALID_ARGUMENT);
+	return (-1);
+}
diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h
new file mode 100644
index 000000000..c650865f3
--- /dev/null
+++ b/lib/libzfs/include/libzfs.h
@@ -0,0 +1,570 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBZFS_H
+#define	_LIBZFS_H
+
+#include <assert.h>
+#include <libnvpair.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <sys/fs/zfs.h>
+#include <sys/avl.h>
+#include <ucred.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Miscellaneous ZFS constants
+ */
+#define	ZFS_MAXNAMELEN		MAXNAMELEN
+#define	ZPOOL_MAXNAMELEN	MAXNAMELEN
+#define	ZFS_MAXPROPLEN		MAXPATHLEN
+#define	ZPOOL_MAXPROPLEN	MAXPATHLEN
+
+/*
+ * libzfs errors
+ */
+enum {
+	EZFS_NOMEM = 2000,	/* out of memory */
+	EZFS_BADPROP,		/* invalid property value */
+	EZFS_PROPREADONLY,	/* cannot set readonly property */
+	EZFS_PROPTYPE,		/* property does not apply to dataset type */
+	EZFS_PROPNONINHERIT,	/* property is not inheritable */
+	EZFS_PROPSPACE,		/* bad quota or reservation */
+	EZFS_BADTYPE,		/* dataset is not of appropriate type */
+	EZFS_BUSY,		/* pool or dataset is busy */
+	EZFS_EXISTS,		/* pool or dataset already exists */
+	EZFS_NOENT,		/* no such pool or dataset */
+	EZFS_BADSTREAM,		/* bad backup stream */
+	EZFS_DSREADONLY,	/* dataset is readonly */
+	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
+	EZFS_VOLHASDATA,	/* volume already contains data */
+	EZFS_INVALIDNAME,	/* invalid dataset name */
+	EZFS_BADRESTORE,	/* unable to restore to destination */
+	EZFS_BADBACKUP,		/* backup failed */
+	EZFS_BADTARGET,		/* bad attach/detach/replace target */
+	EZFS_NODEVICE,		/* no such device in pool */
+	EZFS_BADDEV,		/* invalid device to add */
+	EZFS_NOREPLICAS,	/* no valid replicas */
+	EZFS_RESILVERING,	/* currently resilvering */
+	EZFS_BADVERSION,	/* unsupported version */
+	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
+	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
+	EZFS_BADPATH,		/* must be an absolute path */
+	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
+	EZFS_ZONED,		/* used improperly in local zone */
+	EZFS_MOUNTFAILED,	/* failed to mount dataset */
+	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
+	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
+	EZFS_SHARENFSFAILED,	/* share(1M) failed */
+	EZFS_DEVLINKS,		/* failed to create zvol links */
+	EZFS_PERM,		/* permission denied */
+	EZFS_NOSPC,		/* out of space */
+	EZFS_IO,		/* I/O error */
+	EZFS_INTR,		/* signal received */
+	EZFS_ISSPARE,		/* device is a hot spare */
+	EZFS_INVALCONFIG,	/* invalid vdev configuration */
+	EZFS_RECURSIVE,		/* recursive dependency */
+	EZFS_NOHISTORY,		/* no history object */
+	EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
+	EZFS_SHAREISCSIFAILED,	/* iscsitgtd failed request to share */
+	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
+	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
+	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
+	EZFS_NAMETOOLONG,	/* dataset name is too long */
+	EZFS_OPENFAILED,	/* open of device failed */
+	EZFS_NOCAP,		/* couldn't get capacity */
+	EZFS_LABELFAILED,	/* write of label failed */
+	EZFS_ISCSISVCUNAVAIL,	/* iscsi service unavailable */
+	EZFS_BADWHO,		/* invalid permission who */
+	EZFS_BADPERM,		/* invalid permission */
+	EZFS_BADPERMSET,	/* invalid permission set name */
+	EZFS_NODELEGATION,	/* delegated administration is disabled */
+	EZFS_PERMRDONLY,	/* pemissions are readonly */
+	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
+	EZFS_SHARESMBFAILED,	/* failed to share over smb */
+	EZFS_BADCACHE,		/* bad cache file */
+	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
+	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
+	EZFS_NOTSUP,		/* ops not supported on this dataset */
+	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
+	EZFS_UNKNOWN
+};
+
+/*
+ * The following data structures are all part
+ * of the zfs_allow_t data structure which is
+ * used for printing 'allow' permissions.
+ * It is a linked list of zfs_allow_t's which
+ * then contain avl tree's for user/group/sets/...
+ * and each one of the entries in those trees have
+ * avl tree's for the permissions they belong to and
+ * whether they are local,descendent or local+descendent
+ * permissions.  The AVL trees are used primarily for
+ * sorting purposes, but also so that we can quickly find
+ * a given user and or permission.
+ */
+typedef struct zfs_perm_node {
+	avl_node_t z_node;
+	char z_pname[MAXPATHLEN];
+} zfs_perm_node_t;
+
+typedef struct zfs_allow_node {
+	avl_node_t z_node;
+	char z_key[MAXPATHLEN];		/* name, such as joe */
+	avl_tree_t z_localdescend;	/* local+descendent perms */
+	avl_tree_t z_local;		/* local permissions */
+	avl_tree_t z_descend;		/* descendent permissions */
+} zfs_allow_node_t;
+
+typedef struct zfs_allow {
+	struct zfs_allow *z_next;
+	char z_setpoint[MAXPATHLEN];
+	avl_tree_t z_sets;
+	avl_tree_t z_crperms;
+	avl_tree_t z_user;
+	avl_tree_t z_group;
+	avl_tree_t z_everyone;
+} zfs_allow_t;
+
+/*
+ * Basic handle types
+ */
+typedef struct zfs_handle zfs_handle_t;
+typedef struct zpool_handle zpool_handle_t;
+typedef struct libzfs_handle libzfs_handle_t;
+
+/*
+ * Library initialization
+ */
+extern libzfs_handle_t *libzfs_init(void);
+extern void libzfs_fini(libzfs_handle_t *);
+
+extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
+extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
+
+extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+
+extern int libzfs_errno(libzfs_handle_t *);
+extern const char *libzfs_error_action(libzfs_handle_t *);
+extern const char *libzfs_error_description(libzfs_handle_t *);
+
+/*
+ * Basic handle functions
+ */
+extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
+extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
+extern void zpool_close(zpool_handle_t *);
+extern const char *zpool_get_name(zpool_handle_t *);
+extern int zpool_get_state(zpool_handle_t *);
+extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
+extern void zpool_free_handles(libzfs_handle_t *);
+
+/*
+ * Iterate over all active pools in the system.
+ */
+typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
+extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
+
+/*
+ * Functions to create and destroy pools
+ */
+extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
+    nvlist_t *, nvlist_t *);
+extern int zpool_destroy(zpool_handle_t *);
+extern int zpool_add(zpool_handle_t *, nvlist_t *);
+
+/*
+ * Functions to manipulate pool and vdev state
+ */
+extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
+extern int zpool_clear(zpool_handle_t *, const char *);
+
+extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
+    vdev_state_t *);
+extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
+extern int zpool_vdev_attach(zpool_handle_t *, const char *,
+    const char *, nvlist_t *, int);
+extern int zpool_vdev_detach(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+
+extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
+
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
+    boolean_t *, boolean_t *);
+extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
+
+/*
+ * Functions to manage pool properties
+ */
+extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
+extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
+    size_t proplen, zprop_source_t *);
+extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
+    zprop_source_t *);
+
+extern const char *zpool_prop_to_name(zpool_prop_t);
+extern const char *zpool_prop_values(zpool_prop_t);
+
+/*
+ * Pool health statistics.
+ */
+typedef enum {
+	/*
+	 * The following correspond to faults as defined in the (fault.fs.zfs.*)
+	 * event namespace.  Each is associated with a corresponding message ID.
+	 */
+	ZPOOL_STATUS_CORRUPT_CACHE,	/* corrupt /kernel/drv/zpool.cache */
+	ZPOOL_STATUS_MISSING_DEV_R,	/* missing device with replicas */
+	ZPOOL_STATUS_MISSING_DEV_NR,	/* missing device with no replicas */
+	ZPOOL_STATUS_CORRUPT_LABEL_R,	/* bad device label with replicas */
+	ZPOOL_STATUS_CORRUPT_LABEL_NR,	/* bad device label with no replicas */
+	ZPOOL_STATUS_BAD_GUID_SUM,	/* sum of device guids didn't match */
+	ZPOOL_STATUS_CORRUPT_POOL,	/* pool metadata is corrupted */
+	ZPOOL_STATUS_CORRUPT_DATA,	/* data errors in user (meta)data */
+	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
+	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
+	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
+	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
+	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
+	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
+	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
+	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
+
+	/*
+	 * The following are not faults per se, but still an error possibly
+	 * requiring administrative attention.  There is no corresponding
+	 * message ID.
+	 */
+	ZPOOL_STATUS_VERSION_OLDER,	/* older on-disk version */
+	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
+	ZPOOL_STATUS_OFFLINE_DEV,	/* device online */
+
+	/*
+	 * Finally, the following indicates a healthy pool.
+	 */
+	ZPOOL_STATUS_OK
+} zpool_status_t;
+
+extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
+extern zpool_status_t zpool_import_status(nvlist_t *, char **);
+
+/*
+ * Statistics and configuration functions.
+ */
+extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
+extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
+extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
+
+/*
+ * Import and export functions
+ */
+extern int zpool_export(zpool_handle_t *, boolean_t);
+extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
+    char *altroot);
+extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
+    nvlist_t *, boolean_t);
+
+/*
+ * Search for pools to import
+ */
+extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
+extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
+    char *, uint64_t);
+extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
+    char *);
+extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
+    uint64_t);
+extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);
+
+/*
+ * Miscellaneous pool functions
+ */
+struct zfs_cmd;
+
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
+extern int zpool_upgrade(zpool_handle_t *, uint64_t);
+extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
+extern void zpool_set_history_str(const char *subcommand, int argc,
+    char **argv, char *history_str);
+extern int zpool_stage_history(libzfs_handle_t *, const char *);
+extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
+    size_t len);
+extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
+extern int zpool_get_physpath(zpool_handle_t *, char *);
+/*
+ * Basic handle manipulations.  These functions do not create or destroy the
+ * underlying datasets, only the references to them.
+ */
+extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
+extern void zfs_close(zfs_handle_t *);
+extern zfs_type_t zfs_get_type(const zfs_handle_t *);
+extern const char *zfs_get_name(const zfs_handle_t *);
+extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
+
+/*
+ * Property management functions.  Some functions are shared with the kernel,
+ * and are found in sys/fs/zfs.h.
+ */
+
+/*
+ * zfs dataset property management
+ */
+extern const char *zfs_prop_default_string(zfs_prop_t);
+extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
+extern const char *zfs_prop_column_name(zfs_prop_t);
+extern boolean_t zfs_prop_align_right(zfs_prop_t);
+
+extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
+    nvlist_t *, uint64_t, zfs_handle_t *, const char *);
+
+extern const char *zfs_prop_to_name(zfs_prop_t);
+extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
+extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
+    zprop_source_t *, char *, size_t, boolean_t);
+extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
+    zprop_source_t *, char *, size_t);
+extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
+extern int zfs_prop_inherit(zfs_handle_t *, const char *);
+extern const char *zfs_prop_values(zfs_prop_t);
+extern int zfs_prop_is_string(zfs_prop_t prop);
+extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
+
+typedef struct zprop_list {
+	int		pl_prop;
+	char		*pl_user_prop;
+	struct zprop_list *pl_next;
+	boolean_t	pl_all;
+	size_t		pl_width;
+	boolean_t	pl_fixed;
+} zprop_list_t;
+
+extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
+
+#define	ZFS_MOUNTPOINT_NONE	"none"
+#define	ZFS_MOUNTPOINT_LEGACY	"legacy"
+
+/*
+ * zpool property management
+ */
+extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
+extern const char *zpool_prop_default_string(zpool_prop_t);
+extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
+extern const char *zpool_prop_column_name(zpool_prop_t);
+extern boolean_t zpool_prop_align_right(zpool_prop_t);
+
+/*
+ * Functions shared by zfs and zpool property management.
+ */
+extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
+    boolean_t ordered, zfs_type_t type);
+extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
+    zfs_type_t);
+extern void zprop_free_list(zprop_list_t *);
+
+/*
+ * Functions for printing zfs or zpool properties
+ */
+typedef struct zprop_get_cbdata {
+	int cb_sources;
+	int cb_columns[4];
+	int cb_colwidths[5];
+	boolean_t cb_scripted;
+	boolean_t cb_literal;
+	boolean_t cb_first;
+	zprop_list_t *cb_proplist;
+	zfs_type_t cb_type;
+} zprop_get_cbdata_t;
+
+void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
+    const char *, const char *, zprop_source_t, const char *);
+
+#define	GET_COL_NAME		1
+#define	GET_COL_PROPERTY	2
+#define	GET_COL_VALUE		3
+#define	GET_COL_SOURCE		4
+
+/*
+ * Iterator functions.
+ */
+typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
+extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
+extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
+
+/*
+ * Functions to create and destroy datasets.
+ */
+extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
+    nvlist_t *);
+extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
+extern int zfs_destroy(zfs_handle_t *);
+extern int zfs_destroy_snaps(zfs_handle_t *, char *);
+extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
+extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
+extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
+extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
+extern int zfs_send(zfs_handle_t *, const char *, const char *,
+    boolean_t, boolean_t, boolean_t, boolean_t, int);
+extern int zfs_promote(zfs_handle_t *);
+
+typedef struct recvflags {
+	/* print informational messages (ie, -v was specified) */
+	int verbose : 1;
+
+	/* the destination is a prefix, not the exact fs (ie, -d) */
+	int isprefix : 1;
+
+	/* do not actually do the recv, just check if it would work (ie, -n) */
+	int dryrun : 1;
+
+	/* rollback/destroy filesystems as necessary (eg, -F) */
+	int force : 1;
+
+	/* set "canmount=off" on all modified filesystems */
+	int canmountoff : 1;
+
+	/* byteswap flag is used internally; callers need not specify */
+	int byteswap : 1;
+} recvflags_t;
+
+extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t,
+    int, avl_tree_t *);
+
+/*
+ * Miscellaneous functions.
+ */
+extern const char *zfs_type_to_name(zfs_type_t);
+extern void zfs_refresh_properties(zfs_handle_t *);
+extern int zfs_name_valid(const char *, zfs_type_t);
+extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
+    zfs_type_t);
+extern int zfs_spa_version(zfs_handle_t *, int *);
+
+/*
+ * dataset permission functions.
+ */
+extern int zfs_perm_set(zfs_handle_t *, nvlist_t *);
+extern int zfs_perm_remove(zfs_handle_t *, nvlist_t *);
+extern int zfs_build_perms(zfs_handle_t *, char *, char *,
+    zfs_deleg_who_type_t, zfs_deleg_inherit_t, nvlist_t **nvlist_t);
+extern int zfs_perm_get(zfs_handle_t *, zfs_allow_t **);
+extern void zfs_free_allows(zfs_allow_t *);
+extern void zfs_deleg_permissions(void);
+
+/*
+ * Mount support functions.
+ */
+extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
+extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
+extern int zfs_mount(zfs_handle_t *, const char *, int);
+extern int zfs_unmount(zfs_handle_t *, const char *, int);
+extern int zfs_unmountall(zfs_handle_t *, int);
+
+/*
+ * Share support functions.
+ */
+extern boolean_t zfs_is_shared(zfs_handle_t *);
+extern int zfs_share(zfs_handle_t *);
+extern int zfs_unshare(zfs_handle_t *);
+
+/*
+ * Protocol-specific share support functions.
+ */
+extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
+extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
+extern int zfs_share_nfs(zfs_handle_t *);
+extern int zfs_share_smb(zfs_handle_t *);
+extern int zfs_shareall(zfs_handle_t *);
+extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
+extern int zfs_unshare_smb(zfs_handle_t *, const char *);
+extern int zfs_unshareall_nfs(zfs_handle_t *);
+extern int zfs_unshareall_smb(zfs_handle_t *);
+extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
+extern int zfs_unshareall(zfs_handle_t *);
+extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
+extern int zfs_share_iscsi(zfs_handle_t *);
+extern int zfs_unshare_iscsi(zfs_handle_t *);
+extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
+extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *,
+    void *, void *, int, zfs_share_op_t);
+
+/*
+ * When dealing with nvlists, verify() is extremely useful
+ */
+#ifdef NDEBUG
+#define	verify(EX)	((void)(EX))
+#else
+#define	verify(EX)	assert(EX)
+#endif
+
+/*
+ * Utility function to convert a number to a human-readable form.
+ */
+extern void zfs_nicenum(uint64_t, char *, size_t);
+extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
+
+/*
+ * Given a device or file, determine if it is part of a pool.
+ */
+extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
+    boolean_t *);
+
+/*
+ * ftyp special.  Read the label from a given device.
+ */
+extern int zpool_read_label(int, nvlist_t **);
+
+/*
+ * Create and remove zvol /dev links.
+ */
+extern int zpool_create_zvol_links(zpool_handle_t *);
+extern int zpool_remove_zvol_links(zpool_handle_t *);
+
+/* is this zvol valid for use as a dump device? */
+extern int zvol_check_dump_config(char *);
+
+/*
+ * Enable and disable datasets within a pool by mounting/unmounting and
+ * sharing/unsharing them.
+ */
+extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
+extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBZFS_H */
diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h
new file mode 100644
index 000000000..9f1f66d51
--- /dev/null
+++ b/lib/libzfs/include/libzfs_impl.h
@@ -0,0 +1,193 @@
+/*
+ * CDDL HEADER SART
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LIBFS_IMPL_H
+#define	_LIBFS_IMPL_H
+
+#include <sys/dmu.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_acl.h>
+#include <sys/spa.h>
+#include <sys/nvpair.h>
+
+#include <libuutil.h>
+#include <libzfs.h>
+#include <libshare.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef	VERIFY
+#undef	VERIFY
+#endif
+#define	VERIFY	verify
+
+struct libzfs_handle {
+	int libzfs_error;
+	int libzfs_fd;
+	FILE *libzfs_mnttab;
+	FILE *libzfs_sharetab;
+	zpool_handle_t *libzfs_pool_handles;
+	uu_avl_pool_t *libzfs_ns_avlpool;
+	uu_avl_t *libzfs_ns_avl;
+	uint64_t libzfs_ns_gen;
+	int libzfs_desc_active;
+	char libzfs_action[1024];
+	char libzfs_desc[1024];
+	char *libzfs_log_str;
+	int libzfs_printerr;
+	void *libzfs_sharehdl; /* libshare handle */
+	uint_t libzfs_shareflags;
+};
+#define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */
+
+struct zfs_handle {
+	libzfs_handle_t *zfs_hdl;
+	zpool_handle_t *zpool_hdl;
+	char zfs_name[ZFS_MAXNAMELEN];
+	zfs_type_t zfs_type; /* type including snapshot */
+	zfs_type_t zfs_head_type; /* type excluding snapshot */
+	dmu_objset_stats_t zfs_dmustats;
+	nvlist_t *zfs_props;
+	nvlist_t *zfs_user_props;
+	boolean_t zfs_mntcheck;
+	char *zfs_mntopts;
+};
+
+/*
+ * This is different from checking zfs_type, because it will also catch
+ * snapshots of volumes.
+ */
+#define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
+
+struct zpool_handle {
+	libzfs_handle_t *zpool_hdl;
+	zpool_handle_t *zpool_next;
+	char zpool_name[ZPOOL_MAXNAMELEN];
+	int zpool_state;
+	size_t zpool_config_size;
+	nvlist_t *zpool_config;
+	nvlist_t *zpool_old_config;
+	nvlist_t *zpool_props;
+	diskaddr_t zpool_start_block;
+};
+
+typedef  enum {
+	PROTO_NFS = 0,
+	PROTO_SMB = 1,
+	PROTO_END = 2
+} zfs_share_proto_t;
+
+/*
+ * The following can be used as a bitmask and any new values
+ * added must preserve that capability.
+ */
+typedef enum {
+	SHARED_NOT_SHARED = 0x0,
+	SHARED_ISCSI = 0x1,
+	SHARED_NFS = 0x2,
+	SHARED_SMB = 0x4
+} zfs_share_type_t;
+
+int zfs_error(libzfs_handle_t *, int, const char *);
+int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
+void zfs_error_aux(libzfs_handle_t *, const char *, ...);
+void *zfs_alloc(libzfs_handle_t *, size_t);
+void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
+char *zfs_strdup(libzfs_handle_t *, const char *);
+int no_memory(libzfs_handle_t *);
+
+int zfs_standard_error(libzfs_handle_t *, int, const char *);
+int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+int zpool_standard_error(libzfs_handle_t *, int, const char *);
+int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
+
+int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
+    size_t *);
+
+
+int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
+    nvlist_t *, char **, uint64_t *, const char *);
+int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp,
+    zfs_type_t type);
+
+/*
+ * Use this changelist_gather() flag to force attempting mounts
+ * on each change node regardless of whether or not it is currently
+ * mounted.
+ */
+#define	CL_GATHER_MOUNT_ALWAYS	1
+
+typedef struct prop_changelist prop_changelist_t;
+
+int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
+int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
+int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
+int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
+int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
+void zcmd_free_nvlists(zfs_cmd_t *);
+
+int changelist_prefix(prop_changelist_t *);
+int changelist_postfix(prop_changelist_t *);
+void changelist_rename(prop_changelist_t *, const char *, const char *);
+void changelist_remove(prop_changelist_t *, const char *);
+void changelist_free(prop_changelist_t *);
+prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int);
+int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *);
+int changelist_haszonedchild(prop_changelist_t *);
+
+void remove_mountpoint(zfs_handle_t *);
+int create_parents(libzfs_handle_t *, char *, int);
+boolean_t isa_child_of(const char *dataset, const char *parent);
+
+zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
+
+int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
+
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
+int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
+boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
+
+void namespace_clear(libzfs_handle_t *);
+
+/*
+ * libshare (sharemgr) interfaces used internally.
+ */
+
+extern int zfs_init_libshare(libzfs_handle_t *, int);
+extern void zfs_uninit_libshare(libzfs_handle_t *);
+extern int zfs_parse_options(char *, zfs_share_proto_t);
+
+extern int zfs_unshare_proto(zfs_handle_t *zhp,
+    const char *, zfs_share_proto_t *);
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LIBFS_IMPL_H */
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
new file mode 100644
index 000000000..b905bc6cb
--- /dev/null
+++ b/lib/libzfs/libzfs_changelist.c
@@ -0,0 +1,713 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Portions Copyright 2007 Ramprakash Jelari
+ */
+
+#include <libintl.h>
+#include <libuutil.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <zone.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+/*
+ * Structure to keep track of dataset state.  Before changing the 'sharenfs' or
+ * 'mountpoint' property, we record whether the filesystem was previously
+ * mounted/shared.  This prior state dictates whether we remount/reshare the
+ * dataset after the property has been changed.
+ *
+ * The interface consists of the following sequence of functions:
+ *
+ * 	changelist_gather()
+ * 	changelist_prefix()
+ * 	< change property >
+ * 	changelist_postfix()
+ * 	changelist_free()
+ *
+ * Other interfaces:
+ *
+ * changelist_remove() - remove a node from a gathered list
+ * changelist_rename() - renames all datasets appropriately when doing a rename
+ * changelist_unshare() - unshares all the nodes in a given changelist
+ * changelist_haszonedchild() - check if there is any child exported to
+ *				a local zone
+ */
+typedef struct prop_changenode {
+	zfs_handle_t		*cn_handle;
+	int			cn_shared;
+	int			cn_mounted;
+	int			cn_zoned;
+	boolean_t		cn_needpost;	/* is postfix() needed? */
+	uu_list_node_t		cn_listnode;
+} prop_changenode_t;
+
+struct prop_changelist {
+	zfs_prop_t		cl_prop;
+	zfs_prop_t		cl_realprop;
+	zfs_prop_t		cl_shareprop;  /* used with sharenfs/sharesmb */
+	uu_list_pool_t		*cl_pool;
+	uu_list_t		*cl_list;
+	boolean_t		cl_waslegacy;
+	boolean_t		cl_allchildren;
+	boolean_t		cl_alldependents;
+	int			cl_mflags;	/* Mount flags */
+	int			cl_gflags;	/* Gather request flags */
+	boolean_t		cl_haszonedchild;
+	boolean_t		cl_sorted;
+};
+
+/*
+ * If the property is 'mountpoint', go through and unmount filesystems as
+ * necessary.  We don't do the same for 'sharenfs', because we can just re-share
+ * with different options without interrupting service. We do handle 'sharesmb'
+ * since there may be old resource names that need to be removed.
+ */
+int
+changelist_prefix(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	int ret = 0;
+
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
+		return (0);
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+
+		/* if a previous loop failed, set the remaining to false */
+		if (ret == -1) {
+			cn->cn_needpost = B_FALSE;
+			continue;
+		}
+
+		/*
+		 * If we are in the global zone, but this dataset is exported
+		 * to a local zone, do nothing.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			continue;
+
+		if (ZFS_IS_VOLUME(cn->cn_handle)) {
+			switch (clp->cl_realprop) {
+			case ZFS_PROP_NAME:
+				/*
+				 * If this was a rename, unshare the zvol, and
+				 * remove the /dev/zvol links.
+				 */
+				(void) zfs_unshare_iscsi(cn->cn_handle);
+
+				if (zvol_remove_link(cn->cn_handle->zfs_hdl,
+				    cn->cn_handle->zfs_name) != 0) {
+					ret = -1;
+					cn->cn_needpost = B_FALSE;
+					(void) zfs_share_iscsi(cn->cn_handle);
+				}
+				break;
+
+			case ZFS_PROP_VOLSIZE:
+				/*
+				 * If this was a change to the volume size, we
+				 * need to unshare and reshare the volume.
+				 */
+				(void) zfs_unshare_iscsi(cn->cn_handle);
+				break;
+			}
+		} else {
+			/*
+			 * Do the property specific processing.
+			 */
+			switch (clp->cl_prop) {
+			case ZFS_PROP_MOUNTPOINT:
+				if (zfs_unmount(cn->cn_handle, NULL,
+				    clp->cl_mflags) != 0) {
+					ret = -1;
+					cn->cn_needpost = B_FALSE;
+				}
+				break;
+			case ZFS_PROP_SHARESMB:
+				(void) zfs_unshare_smb(cn->cn_handle, NULL);
+				break;
+			}
+		}
+	}
+
+	if (ret == -1)
+		(void) changelist_postfix(clp);
+
+	return (ret);
+}
+
+/*
+ * If the property is 'mountpoint' or 'sharenfs', go through and remount and/or
+ * reshare the filesystems as necessary.  In changelist_gather() we recorded
+ * whether the filesystem was previously shared or mounted.  The action we take
+ * depends on the previous state, and whether the value was previously 'legacy'.
+ * For non-legacy properties, we only remount/reshare the filesystem if it was
+ * previously mounted/shared.  Otherwise, we always remount/reshare the
+ * filesystem.
+ */
+int
+changelist_postfix(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	char shareopts[ZFS_MAXPROPLEN];
+	int errors = 0;
+	libzfs_handle_t *hdl;
+
+	/*
+	 * If we're changing the mountpoint, attempt to destroy the underlying
+	 * mountpoint.  All other datasets will have inherited from this dataset
+	 * (in which case their mountpoints exist in the filesystem in the new
+	 * location), or have explicit mountpoints set (in which case they won't
+	 * be in the changelist).
+	 */
+	if ((cn = uu_list_last(clp->cl_list)) == NULL)
+		return (0);
+
+	if (clp->cl_prop == ZFS_PROP_MOUNTPOINT)
+		remove_mountpoint(cn->cn_handle);
+
+	/*
+	 * It is possible that the changelist_prefix() used libshare
+	 * to unshare some entries. Since libshare caches data, an
+	 * attempt to reshare during postfix can fail unless libshare
+	 * is uninitialized here so that it will reinitialize later.
+	 */
+	if (cn->cn_handle != NULL) {
+		hdl = cn->cn_handle->zfs_hdl;
+		assert(hdl != NULL);
+		zfs_uninit_libshare(hdl);
+	}
+
+	/*
+	 * We walk the datasets in reverse, because we want to mount any parent
+	 * datasets before mounting the children.  We walk all datasets even if
+	 * there are errors.
+	 */
+	for (cn = uu_list_last(clp->cl_list); cn != NULL;
+	    cn = uu_list_prev(clp->cl_list, cn)) {
+
+		boolean_t sharenfs;
+		boolean_t sharesmb;
+
+		/*
+		 * If we are in the global zone, but this dataset is exported
+		 * to a local zone, do nothing.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			continue;
+
+		/* Only do post-processing if it's required */
+		if (!cn->cn_needpost)
+			continue;
+		cn->cn_needpost = B_FALSE;
+
+		zfs_refresh_properties(cn->cn_handle);
+
+		if (ZFS_IS_VOLUME(cn->cn_handle)) {
+			/*
+			 * If we're doing a rename, recreate the /dev/zvol
+			 * links.
+			 */
+			if (clp->cl_realprop == ZFS_PROP_NAME &&
+			    zvol_create_link(cn->cn_handle->zfs_hdl,
+			    cn->cn_handle->zfs_name) != 0) {
+				errors++;
+			} else if (cn->cn_shared ||
+			    clp->cl_prop == ZFS_PROP_SHAREISCSI) {
+				if (zfs_prop_get(cn->cn_handle,
+				    ZFS_PROP_SHAREISCSI, shareopts,
+				    sizeof (shareopts), NULL, NULL, 0,
+				    B_FALSE) == 0 &&
+				    strcmp(shareopts, "off") == 0) {
+					errors +=
+					    zfs_unshare_iscsi(cn->cn_handle);
+				} else {
+					errors +=
+					    zfs_share_iscsi(cn->cn_handle);
+				}
+			}
+
+			continue;
+		}
+
+		/*
+		 * Remount if previously mounted or mountpoint was legacy,
+		 * or sharenfs or sharesmb  property is set.
+		 */
+		sharenfs = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
+		    shareopts, sizeof (shareopts), NULL, NULL, 0,
+		    B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+
+		sharesmb = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARESMB,
+		    shareopts, sizeof (shareopts), NULL, NULL, 0,
+		    B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+
+		if ((cn->cn_mounted || clp->cl_waslegacy || sharenfs ||
+		    sharesmb) && !zfs_is_mounted(cn->cn_handle, NULL) &&
+		    zfs_mount(cn->cn_handle, NULL, 0) != 0)
+			errors++;
+
+		/*
+		 * We always re-share even if the filesystem is currently
+		 * shared, so that we can adopt any new options.
+		 */
+		if (sharenfs)
+			errors += zfs_share_nfs(cn->cn_handle);
+		else if (cn->cn_shared || clp->cl_waslegacy)
+			errors += zfs_unshare_nfs(cn->cn_handle, NULL);
+		if (sharesmb)
+			errors += zfs_share_smb(cn->cn_handle);
+		else if (cn->cn_shared || clp->cl_waslegacy)
+			errors += zfs_unshare_smb(cn->cn_handle, NULL);
+	}
+
+	return (errors ? -1 : 0);
+}
+
+/*
+ * Is this "dataset" a child of "parent"?
+ */
+boolean_t
+isa_child_of(const char *dataset, const char *parent)
+{
+	int len;
+
+	len = strlen(parent);
+
+	if (strncmp(dataset, parent, len) == 0 &&
+	    (dataset[len] == '@' || dataset[len] == '/' ||
+	    dataset[len] == '\0'))
+		return (B_TRUE);
+	else
+		return (B_FALSE);
+
+}
+
+/*
+ * If we rename a filesystem, child filesystem handles are no longer valid
+ * since we identify each dataset by its name in the ZFS namespace.  As a
+ * result, we have to go through and fix up all the names appropriately.  We
+ * could do this automatically if libzfs kept track of all open handles, but
+ * this is a lot less work.
+ */
+void
+changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
+{
+	prop_changenode_t *cn;
+	char newname[ZFS_MAXNAMELEN];
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		/*
+		 * Do not rename a clone that's not in the source hierarchy.
+		 */
+		if (!isa_child_of(cn->cn_handle->zfs_name, src))
+			continue;
+
+		/*
+		 * Destroy the previous mountpoint if needed.
+		 */
+		remove_mountpoint(cn->cn_handle);
+
+		(void) strlcpy(newname, dst, sizeof (newname));
+		(void) strcat(newname, cn->cn_handle->zfs_name + strlen(src));
+
+		(void) strlcpy(cn->cn_handle->zfs_name, newname,
+		    sizeof (cn->cn_handle->zfs_name));
+	}
+}
+
+/*
+ * Given a gathered changelist for the 'sharenfs' or 'sharesmb' property,
+ * unshare all the datasets in the list.
+ */
+int
+changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
+{
+	prop_changenode_t *cn;
+	int ret = 0;
+
+	if (clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB)
+		return (0);
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+		if (zfs_unshare_proto(cn->cn_handle, NULL, proto) != 0)
+			ret = -1;
+	}
+
+	return (ret);
+}
+
+/*
+ * Check if there is any child exported to a local zone in a given changelist.
+ * This information has already been recorded while gathering the changelist
+ * via changelist_gather().
+ */
+int
+changelist_haszonedchild(prop_changelist_t *clp)
+{
+	return (clp->cl_haszonedchild);
+}
+
+/*
+ * Remove a node from a gathered list.
+ */
+void
+changelist_remove(prop_changelist_t *clp, const char *name)
+{
+	prop_changenode_t *cn;
+
+	for (cn = uu_list_first(clp->cl_list); cn != NULL;
+	    cn = uu_list_next(clp->cl_list, cn)) {
+
+		if (strcmp(cn->cn_handle->zfs_name, name) == 0) {
+			uu_list_remove(clp->cl_list, cn);
+			zfs_close(cn->cn_handle);
+			free(cn);
+			return;
+		}
+	}
+}
+
+/*
+ * Release any memory associated with a changelist.
+ */
+void
+changelist_free(prop_changelist_t *clp)
+{
+	prop_changenode_t *cn;
+	void *cookie;
+
+	if (clp->cl_list) {
+		cookie = NULL;
+		while ((cn = uu_list_teardown(clp->cl_list, &cookie)) != NULL) {
+			zfs_close(cn->cn_handle);
+			free(cn);
+		}
+
+		uu_list_destroy(clp->cl_list);
+	}
+	if (clp->cl_pool)
+		uu_list_pool_destroy(clp->cl_pool);
+
+	free(clp);
+}
+
+static int
+change_one(zfs_handle_t *zhp, void *data)
+{
+	prop_changelist_t *clp = data;
+	char property[ZFS_MAXPROPLEN];
+	char where[64];
+	prop_changenode_t *cn;
+	zprop_source_t sourcetype;
+	zprop_source_t share_sourcetype;
+
+	/*
+	 * We only want to unmount/unshare those filesystems that may inherit
+	 * from the target filesystem.  If we find any filesystem with a
+	 * locally set mountpoint, we ignore any children since changing the
+	 * property will not affect them.  If this is a rename, we iterate
+	 * over all children regardless, since we need them unmounted in
+	 * order to do the rename.  Also, if this is a volume and we're doing
+	 * a rename, then always add it to the changelist.
+	 */
+
+	if (!(ZFS_IS_VOLUME(zhp) && clp->cl_realprop == ZFS_PROP_NAME) &&
+	    zfs_prop_get(zhp, clp->cl_prop, property,
+	    sizeof (property), &sourcetype, where, sizeof (where),
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/*
+	 * If we are "watching" sharenfs or sharesmb
+	 * then check out the companion property which is tracked
+	 * in cl_shareprop
+	 */
+	if (clp->cl_shareprop != ZPROP_INVAL &&
+	    zfs_prop_get(zhp, clp->cl_shareprop, property,
+	    sizeof (property), &share_sourcetype, where, sizeof (where),
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (clp->cl_alldependents || clp->cl_allchildren ||
+	    sourcetype == ZPROP_SRC_DEFAULT ||
+	    sourcetype == ZPROP_SRC_INHERITED ||
+	    (clp->cl_shareprop != ZPROP_INVAL &&
+	    (share_sourcetype == ZPROP_SRC_DEFAULT ||
+	    share_sourcetype == ZPROP_SRC_INHERITED))) {
+		if ((cn = zfs_alloc(zfs_get_handle(zhp),
+		    sizeof (prop_changenode_t))) == NULL) {
+			zfs_close(zhp);
+			return (-1);
+		}
+
+		cn->cn_handle = zhp;
+		cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) ||
+		    zfs_is_mounted(zhp, NULL);
+		cn->cn_shared = zfs_is_shared(zhp);
+		cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+		cn->cn_needpost = B_TRUE;
+
+		/* Indicate if any child is exported to a local zone. */
+		if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
+			clp->cl_haszonedchild = B_TRUE;
+
+		uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
+
+		if (clp->cl_sorted) {
+			uu_list_index_t idx;
+
+			(void) uu_list_find(clp->cl_list, cn, NULL,
+			    &idx);
+			uu_list_insert(clp->cl_list, cn, idx);
+		} else {
+			ASSERT(!clp->cl_alldependents);
+			verify(uu_list_insert_before(clp->cl_list,
+			    uu_list_first(clp->cl_list), cn) == 0);
+		}
+
+		if (!clp->cl_alldependents)
+			return (zfs_iter_children(zhp, change_one, data));
+	} else {
+		zfs_close(zhp);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+compare_mountpoints(const void *a, const void *b, void *unused)
+{
+	const prop_changenode_t *ca = a;
+	const prop_changenode_t *cb = b;
+
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+
+	boolean_t hasmounta, hasmountb;
+
+	/*
+	 * When unsharing or unmounting filesystems, we need to do it in
+	 * mountpoint order.  This allows the user to have a mountpoint
+	 * hierarchy that is different from the dataset hierarchy, and still
+	 * allow it to be changed.  However, if either dataset doesn't have a
+	 * mountpoint (because it is a volume or a snapshot), we place it at the
+	 * end of the list, because it doesn't affect our change at all.
+	 */
+	hasmounta = (zfs_prop_get(ca->cn_handle, ZFS_PROP_MOUNTPOINT, mounta,
+	    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	hasmountb = (zfs_prop_get(cb->cn_handle, ZFS_PROP_MOUNTPOINT, mountb,
+	    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (!hasmounta && hasmountb)
+		return (-1);
+	else if (hasmounta && !hasmountb)
+		return (1);
+	else if (!hasmounta && !hasmountb)
+		return (0);
+	else
+		return (strcmp(mountb, mounta));
+}
+
+/*
+ * Given a ZFS handle and a property, construct a complete list of datasets
+ * that need to be modified as part of this process.  For anything but the
+ * 'mountpoint' and 'sharenfs' properties, this just returns an empty list.
+ * Otherwise, we iterate over all children and look for any datasets that
+ * inherit the property.  For each such dataset, we add it to the list and
+ * mark whether it was shared beforehand.
+ */
+prop_changelist_t *
+changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
+    int mnt_flags)
+{
+	prop_changelist_t *clp;
+	prop_changenode_t *cn;
+	zfs_handle_t *temp;
+	char property[ZFS_MAXPROPLEN];
+	uu_compare_fn_t *compare = NULL;
+
+	if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
+		return (NULL);
+
+	/*
+	 * For mountpoint-related tasks, we want to sort everything by
+	 * mountpoint, so that we mount and unmount them in the appropriate
+	 * order, regardless of their position in the hierarchy.
+	 */
+	if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
+	    prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS ||
+	    prop == ZFS_PROP_SHARESMB) {
+		compare = compare_mountpoints;
+		clp->cl_sorted = B_TRUE;
+	}
+
+	clp->cl_pool = uu_list_pool_create("changelist_pool",
+	    sizeof (prop_changenode_t),
+	    offsetof(prop_changenode_t, cn_listnode),
+	    compare, 0);
+	if (clp->cl_pool == NULL) {
+		assert(uu_error() == UU_ERROR_NO_MEMORY);
+		(void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	clp->cl_list = uu_list_create(clp->cl_pool, NULL,
+	    clp->cl_sorted ? UU_LIST_SORTED : 0);
+	clp->cl_gflags = gather_flags;
+	clp->cl_mflags = mnt_flags;
+
+	if (clp->cl_list == NULL) {
+		assert(uu_error() == UU_ERROR_NO_MEMORY);
+		(void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * If this is a rename or the 'zoned' property, we pretend we're
+	 * changing the mountpoint and flag it so we can catch all children in
+	 * change_one().
+	 *
+	 * Flag cl_alldependents to catch all children plus the dependents
+	 * (clones) that are not in the hierarchy.
+	 */
+	if (prop == ZFS_PROP_NAME) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+		clp->cl_alldependents = B_TRUE;
+	} else if (prop == ZFS_PROP_ZONED) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+		clp->cl_allchildren = B_TRUE;
+	} else if (prop == ZFS_PROP_CANMOUNT) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else if (prop == ZFS_PROP_VOLSIZE) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else if (prop == ZFS_PROP_VERSION) {
+		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
+	} else {
+		clp->cl_prop = prop;
+	}
+	clp->cl_realprop = prop;
+
+	if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
+	    clp->cl_prop != ZFS_PROP_SHARENFS &&
+	    clp->cl_prop != ZFS_PROP_SHARESMB &&
+	    clp->cl_prop != ZFS_PROP_SHAREISCSI)
+		return (clp);
+
+	/*
+	 * If watching SHARENFS or SHARESMB then
+	 * also watch its companion property.
+	 */
+	if (clp->cl_prop == ZFS_PROP_SHARENFS)
+		clp->cl_shareprop = ZFS_PROP_SHARESMB;
+	else if (clp->cl_prop == ZFS_PROP_SHARESMB)
+		clp->cl_shareprop = ZFS_PROP_SHARENFS;
+
+	if (clp->cl_alldependents) {
+		if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) {
+			changelist_free(clp);
+			return (NULL);
+		}
+	} else if (zfs_iter_children(zhp, change_one, clp) != 0) {
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * We have to re-open ourselves because we auto-close all the handles
+	 * and can't tell the difference.
+	 */
+	if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
+	    ZFS_TYPE_DATASET)) == NULL) {
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	/*
+	 * Always add ourself to the list.  We add ourselves to the end so that
+	 * we're the last to be unmounted.
+	 */
+	if ((cn = zfs_alloc(zhp->zfs_hdl,
+	    sizeof (prop_changenode_t))) == NULL) {
+		zfs_close(temp);
+		changelist_free(clp);
+		return (NULL);
+	}
+
+	cn->cn_handle = temp;
+	cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) ||
+	    zfs_is_mounted(temp, NULL);
+	cn->cn_shared = zfs_is_shared(temp);
+	cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	cn->cn_needpost = B_TRUE;
+
+	uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
+	if (clp->cl_sorted) {
+		uu_list_index_t idx;
+		(void) uu_list_find(clp->cl_list, cn, NULL, &idx);
+		uu_list_insert(clp->cl_list, cn, idx);
+	} else {
+		verify(uu_list_insert_after(clp->cl_list,
+		    uu_list_last(clp->cl_list), cn) == 0);
+	}
+
+	/*
+	 * If the mountpoint property was previously 'legacy', or 'none',
+	 * record it as the behavior of changelist_postfix() will be different.
+	 */
+	if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) &&
+	    (zfs_prop_get(zhp, prop, property, sizeof (property),
+	    NULL, NULL, 0, B_FALSE) == 0 &&
+	    (strcmp(property, "legacy") == 0 ||
+	    strcmp(property, "none") == 0))) {
+		/*
+		 * do not automatically mount ex-legacy datasets if
+		 * we specifically set canmount to noauto
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) !=
+		    ZFS_CANMOUNT_NOAUTO)
+			clp->cl_waslegacy = B_TRUE;
+	}
+
+	return (clp);
+}
diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c
new file mode 100644
index 000000000..94640d1b1
--- /dev/null
+++ b/lib/libzfs/libzfs_config.c
@@ -0,0 +1,360 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
+ * single packed nvlist.  While it would be nice to just read in this
+ * file from userland, this wouldn't work from a local zone.  So we have to have
+ * a zpool ioctl to return the complete configuration for all pools.  In the
+ * global zone, this will be identical to reading the file and unpacking it in
+ * userland.
+ */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <libuutil.h>
+
+#include "libzfs_impl.h"
+
+typedef struct config_node {
+	char		*cn_name;
+	nvlist_t	*cn_config;
+	uu_avl_node_t	cn_avl;
+} config_node_t;
+
+/* ARGSUSED */
+static int
+config_node_compare(const void *a, const void *b, void *unused)
+{
+	int ret;
+
+	const config_node_t *ca = (config_node_t *)a;
+	const config_node_t *cb = (config_node_t *)b;
+
+	ret = strcmp(ca->cn_name, cb->cn_name);
+
+	if (ret < 0)
+		return (-1);
+	else if (ret > 0)
+		return (1);
+	else
+		return (0);
+}
+
+void
+namespace_clear(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_ns_avl) {
+		config_node_t *cn;
+		void *cookie = NULL;
+
+		while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl,
+		    &cookie)) != NULL) {
+			nvlist_free(cn->cn_config);
+			free(cn->cn_name);
+			free(cn);
+		}
+
+		uu_avl_destroy(hdl->libzfs_ns_avl);
+		hdl->libzfs_ns_avl = NULL;
+	}
+
+	if (hdl->libzfs_ns_avlpool) {
+		uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
+		hdl->libzfs_ns_avlpool = NULL;
+	}
+}
+
+/*
+ * Loads the pool namespace, or re-loads it if the cache has changed.
+ */
+static int
+namespace_reload(libzfs_handle_t *hdl)
+{
+	nvlist_t *config;
+	config_node_t *cn;
+	nvpair_t *elem;
+	zfs_cmd_t zc = { 0 };
+	void *cookie;
+
+	if (hdl->libzfs_ns_gen == 0) {
+		/*
+		 * This is the first time we've accessed the configuration
+		 * cache.  Initialize the AVL tree and then fall through to the
+		 * common code.
+		 */
+		if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
+		    sizeof (config_node_t),
+		    offsetof(config_node_t, cn_avl),
+		    config_node_compare, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
+
+		if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
+		    NULL, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
+	}
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	for (;;) {
+		zc.zc_cookie = hdl->libzfs_ns_gen;
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
+			switch (errno) {
+			case EEXIST:
+				/*
+				 * The namespace hasn't changed.
+				 */
+				zcmd_free_nvlists(&zc);
+				return (0);
+
+			case ENOMEM:
+				if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+					zcmd_free_nvlists(&zc);
+					return (-1);
+				}
+				break;
+
+			default:
+				zcmd_free_nvlists(&zc);
+				return (zfs_standard_error(hdl, errno,
+				    dgettext(TEXT_DOMAIN, "failed to read "
+				    "pool configuration")));
+			}
+		} else {
+			hdl->libzfs_ns_gen = zc.zc_cookie;
+			break;
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/*
+	 * Clear out any existing configuration information.
+	 */
+	cookie = NULL;
+	while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) {
+		nvlist_free(cn->cn_config);
+		free(cn->cn_name);
+		free(cn);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
+		nvlist_t *child;
+		uu_avl_index_t where;
+
+		if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if ((cn->cn_name = zfs_strdup(hdl,
+		    nvpair_name(elem))) == NULL) {
+			free(cn);
+			nvlist_free(config);
+			return (-1);
+		}
+
+		verify(nvpair_value_nvlist(elem, &child) == 0);
+		if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
+			free(cn->cn_name);
+			free(cn);
+			nvlist_free(config);
+			return (no_memory(hdl));
+		}
+		verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
+		    == NULL);
+
+		uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
+	}
+
+	nvlist_free(config);
+	return (0);
+}
+
+/*
+ * Retrieve the configuration for the given pool.  The configuration is a nvlist
+ * describing the vdevs, as well as the statistics associated with each one.
+ */
+nvlist_t *
+zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig)
+{
+	if (oldconfig)
+		*oldconfig = zhp->zpool_old_config;
+	return (zhp->zpool_config);
+}
+
+/*
+ * Refresh the vdev statistics associated with the given pool.  This is used in
+ * iostat to show configuration changes and determine the delta from the last
+ * time the function was called.  This function can fail, in case the pool has
+ * been destroyed.
+ */
+int
+zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	nvlist_t *config;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	*missing = B_FALSE;
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+
+	if (zhp->zpool_config_size == 0)
+		zhp->zpool_config_size = 1 << 16;
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0)
+		return (-1);
+
+	for (;;) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
+		    &zc) == 0) {
+			/*
+			 * The real error is returned in the zc_cookie field.
+			 */
+			error = zc.zc_cookie;
+			break;
+		}
+
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			if (errno == ENOENT || errno == EINVAL)
+				*missing = B_TRUE;
+			zhp->zpool_state = POOL_STATE_UNAVAIL;
+			return (0);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	zhp->zpool_config_size = zc.zc_nvlist_dst_size;
+
+	if (zhp->zpool_config != NULL) {
+		uint64_t oldtxg, newtxg;
+
+		verify(nvlist_lookup_uint64(zhp->zpool_config,
+		    ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0);
+		verify(nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0);
+
+		if (zhp->zpool_old_config != NULL)
+			nvlist_free(zhp->zpool_old_config);
+
+		if (oldtxg != newtxg) {
+			nvlist_free(zhp->zpool_config);
+			zhp->zpool_old_config = NULL;
+		} else {
+			zhp->zpool_old_config = zhp->zpool_config;
+		}
+	}
+
+	zhp->zpool_config = config;
+	if (error)
+		zhp->zpool_state = POOL_STATE_UNAVAIL;
+	else
+		zhp->zpool_state = POOL_STATE_ACTIVE;
+
+	return (0);
+}
+
+/*
+ * Iterate over all pools in the system.
+ */
+int
+zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
+{
+	config_node_t *cn;
+	zpool_handle_t *zhp;
+	int ret;
+
+	if (namespace_reload(hdl) != 0)
+		return (-1);
+
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
+
+		if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
+			return (-1);
+
+		if (zhp == NULL)
+			continue;
+
+		if ((ret = func(zhp, data)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * Iterate over root datasets, calling the given function for each.  The zfs
+ * handle passed each time must be explicitly closed by the callback.
+ */
+int
+zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
+{
+	config_node_t *cn;
+	zfs_handle_t *zhp;
+	int ret;
+
+	if (namespace_reload(hdl) != 0)
+		return (-1);
+
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
+
+		if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
+			continue;
+
+		if ((ret = func(zhp, data)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
new file mode 100644
index 000000000..a8005ffc0
--- /dev/null
+++ b/lib/libzfs/libzfs_dataset.c
@@ -0,0 +1,4248 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libdevinfo.h>
+#include <libintl.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <zone.h>
+#include <fcntl.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/avl.h>
+#include <priv.h>
+#include <pwd.h>
+#include <grp.h>
+#include <stddef.h>
+#include <ucred.h>
+
+#include <sys/spa.h>
+#include <sys/zap.h>
+#include <libzfs.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+#include "zfs_deleg.h"
+
+static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
+
+/*
+ * Given a single type (not a mask of types), return the type in a human
+ * readable form.
+ */
+const char *
+zfs_type_to_name(zfs_type_t type)
+{
+	switch (type) {
+	case ZFS_TYPE_FILESYSTEM:
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+	case ZFS_TYPE_SNAPSHOT:
+		return (dgettext(TEXT_DOMAIN, "snapshot"));
+	case ZFS_TYPE_VOLUME:
+		return (dgettext(TEXT_DOMAIN, "volume"));
+	}
+
+	return (NULL);
+}
+
+/*
+ * Given a path and mask of ZFS types, return a string describing this dataset.
+ * This is used when we fail to open a dataset and we cannot get an exact type.
+ * We guess what the type would have been based on the path and the mask of
+ * acceptable types.
+ */
+static const char *
+path_to_str(const char *path, int types)
+{
+	/*
+	 * When given a single type, always report the exact type.
+	 */
+	if (types == ZFS_TYPE_SNAPSHOT)
+		return (dgettext(TEXT_DOMAIN, "snapshot"));
+	if (types == ZFS_TYPE_FILESYSTEM)
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+	if (types == ZFS_TYPE_VOLUME)
+		return (dgettext(TEXT_DOMAIN, "volume"));
+
+	/*
+	 * The user is requesting more than one type of dataset.  If this is the
+	 * case, consult the path itself.  If we're looking for a snapshot, and
+	 * a '@' is found, then report it as "snapshot".  Otherwise, remove the
+	 * snapshot attribute and try again.
+	 */
+	if (types & ZFS_TYPE_SNAPSHOT) {
+		if (strchr(path, '@') != NULL)
+			return (dgettext(TEXT_DOMAIN, "snapshot"));
+		return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
+	}
+
+
+	/*
+	 * The user has requested either filesystems or volumes.
+	 * We have no way of knowing a priori what type this would be, so always
+	 * report it as "filesystem" or "volume", our two primitive types.
+	 */
+	if (types & ZFS_TYPE_FILESYSTEM)
+		return (dgettext(TEXT_DOMAIN, "filesystem"));
+
+	assert(types & ZFS_TYPE_VOLUME);
+	return (dgettext(TEXT_DOMAIN, "volume"));
+}
+
+/*
+ * Validate a ZFS path.  This is used even before trying to open the dataset, to
+ * provide a more meaningful error message.  We place a more useful message in
+ * 'buf' detailing exactly why the name was not valid.
+ */
+static int
+zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
+    boolean_t modifying)
+{
+	namecheck_err_t why;
+	char what;
+
+	if (dataset_namecheck(path, &why, &what) != 0) {
+		if (hdl != NULL) {
+			switch (why) {
+			case NAME_ERR_TOOLONG:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is too long"));
+				break;
+
+			case NAME_ERR_LEADING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
+				break;
+
+			case NAME_ERR_EMPTY_COMPONENT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
+				break;
+
+			case NAME_ERR_TRAILING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
+				break;
+
+			case NAME_ERR_INVALCHAR:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "invalid character "
+				    "'%c' in name"), what);
+				break;
+
+			case NAME_ERR_MULTIPLE_AT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
+				break;
+
+			case NAME_ERR_NOLETTER:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool doesn't begin with a letter"));
+				break;
+
+			case NAME_ERR_RESERVED:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
+				break;
+
+			case NAME_ERR_DISKLIKE:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "reserved disk name"));
+				break;
+			}
+		}
+
+		return (0);
+	}
+
+	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot delimiter '@' in filesystem name"));
+		return (0);
+	}
+
+	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "missing '@' delimiter in snapshot name"));
+		return (0);
+	}
+
+	if (modifying && strchr(path, '%') != NULL) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid character %c in name"), '%');
+		return (0);
+	}
+
+	return (-1);
+}
+
+int
+zfs_name_valid(const char *name, zfs_type_t type)
+{
+	if (type == ZFS_TYPE_POOL)
+		return (zpool_name_valid(NULL, B_FALSE, name));
+	return (zfs_validate_name(NULL, name, type, B_FALSE));
+}
+
+/*
+ * This function takes the raw DSL properties, and filters out the user-defined
+ * properties into a separate nvlist.
+ */
+static nvlist_t *
+process_user_props(zfs_handle_t *zhp, nvlist_t *props)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvpair_t *elem;
+	nvlist_t *propval;
+	nvlist_t *nvl;
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+		if (!zfs_prop_user(nvpair_name(elem)))
+			continue;
+
+		verify(nvpair_value_nvlist(elem, &propval) == 0);
+		if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) {
+			nvlist_free(nvl);
+			(void) no_memory(hdl);
+			return (NULL);
+		}
+	}
+
+	return (nvl);
+}
+
+static zpool_handle_t *
+zpool_add_handle(zfs_handle_t *zhp, const char *pool_name)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zpool_handle_t *zph;
+
+	if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) {
+		if (hdl->libzfs_pool_handles != NULL)
+			zph->zpool_next = hdl->libzfs_pool_handles;
+		hdl->libzfs_pool_handles = zph;
+	}
+	return (zph);
+}
+
+static zpool_handle_t *
+zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zpool_handle_t *zph = hdl->libzfs_pool_handles;
+
+	while ((zph != NULL) &&
+	    (strncmp(pool_name, zpool_get_name(zph), len) != 0))
+		zph = zph->zpool_next;
+	return (zph);
+}
+
+/*
+ * Returns a handle to the pool that contains the provided dataset.
+ * If a handle to that pool already exists then that handle is returned.
+ * Otherwise, a new handle is created and added to the list of handles.
+ */
+static zpool_handle_t *
+zpool_handle(zfs_handle_t *zhp)
+{
+	char *pool_name;
+	int len;
+	zpool_handle_t *zph;
+
+	len = strcspn(zhp->zfs_name, "/@") + 1;
+	pool_name = zfs_alloc(zhp->zfs_hdl, len);
+	(void) strlcpy(pool_name, zhp->zfs_name, len);
+
+	zph = zpool_find_handle(zhp, pool_name, len);
+	if (zph == NULL)
+		zph = zpool_add_handle(zhp, pool_name);
+
+	free(pool_name);
+	return (zph);
+}
+
+void
+zpool_free_handles(libzfs_handle_t *hdl)
+{
+	zpool_handle_t *next, *zph = hdl->libzfs_pool_handles;
+
+	while (zph != NULL) {
+		next = zph->zpool_next;
+		zpool_close(zph);
+		zph = next;
+	}
+	hdl->libzfs_pool_handles = NULL;
+}
+
+/*
+ * Utility function to gather stats (objset and zpl) for the given object.
+ */
+static int
+get_stats(zfs_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *allprops, *userprops;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &allprops) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
+		nvlist_free(allprops);
+		return (-1);
+	}
+
+	nvlist_free(zhp->zfs_props);
+	nvlist_free(zhp->zfs_user_props);
+
+	zhp->zfs_props = allprops;
+	zhp->zfs_user_props = userprops;
+
+	return (0);
+}
+
+/*
+ * Refresh the properties currently stored in the handle.
+ */
+void
+zfs_refresh_properties(zfs_handle_t *zhp)
+{
+	(void) get_stats(zhp);
+}
+
+/*
+ * Makes a handle from the given dataset name.  Used by zfs_open() and
+ * zfs_iter_* to create child handles on the fly.
+ */
+zfs_handle_t *
+make_dataset_handle(libzfs_handle_t *hdl, const char *path)
+{
+	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+	char *logstr;
+
+	if (zhp == NULL)
+		return (NULL);
+
+	zhp->zfs_hdl = hdl;
+
+	/*
+	 * Preserve history log string.
+	 * any changes performed here will be
+	 * logged as an internal event.
+	 */
+	logstr = zhp->zfs_hdl->libzfs_log_str;
+	zhp->zfs_hdl->libzfs_log_str = NULL;
+top:
+	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
+
+	if (get_stats(zhp) != 0) {
+		zhp->zfs_hdl->libzfs_log_str = logstr;
+		free(zhp);
+		return (NULL);
+	}
+
+	if (zhp->zfs_dmustats.dds_inconsistent) {
+		zfs_cmd_t zc = { 0 };
+
+		/*
+		 * If it is dds_inconsistent, then we've caught it in
+		 * the middle of a 'zfs receive' or 'zfs destroy', and
+		 * it is inconsistent from the ZPL's point of view, so
+		 * can't be mounted.  However, it could also be that we
+		 * have crashed in the middle of one of those
+		 * operations, in which case we need to get rid of the
+		 * inconsistent state.  We do that by either rolling
+		 * back to the previous snapshot (which will fail if
+		 * there is none), or destroying the filesystem.  Note
+		 * that if we are still in the middle of an active
+		 * 'receive' or 'destroy', then the rollback and destroy
+		 * will fail with EBUSY and we will drive on as usual.
+		 */
+
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
+			(void) zvol_remove_link(hdl, zhp->zfs_name);
+			zc.zc_objset_type = DMU_OST_ZVOL;
+		} else {
+			zc.zc_objset_type = DMU_OST_ZFS;
+		}
+
+		/*
+		 * If we can successfully destroy it, pretend that it
+		 * never existed.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
+			zhp->zfs_hdl->libzfs_log_str = logstr;
+			free(zhp);
+			errno = ENOENT;
+			return (NULL);
+		}
+		/* If we can successfully roll it back, reget the stats */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
+			goto top;
+	}
+
+	/*
+	 * We've managed to open the dataset and gather statistics.  Determine
+	 * the high-level type.
+	 */
+	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();
+
+	if (zhp->zfs_dmustats.dds_is_snapshot)
+		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();	/* we should never see any other types */
+
+	zhp->zfs_hdl->libzfs_log_str = logstr;
+	zhp->zpool_hdl = zpool_handle(zhp);
+	return (zhp);
+}
+
+/*
+ * Opens the given snapshot, filesystem, or volume.   The 'types'
+ * argument is a mask of acceptable types.  The function will print an
+ * appropriate error message and return NULL if it can't be opened.
+ */
+zfs_handle_t *
+zfs_open(libzfs_handle_t *hdl, const char *path, int types)
+{
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
+
+	/*
+	 * Validate the name before we even try to open it.
+	 */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET, B_FALSE)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid dataset name"));
+		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+		return (NULL);
+	}
+
+	/*
+	 * Try to get stats for the dataset, which will tell us if it exists.
+	 */
+	errno = 0;
+	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
+		(void) zfs_standard_error(hdl, errno, errbuf);
+		return (NULL);
+	}
+
+	if (!(types & zhp->zfs_type)) {
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+		zfs_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Release a ZFS handle.  Nothing to do but free the associated memory.
+ */
+void
+zfs_close(zfs_handle_t *zhp)
+{
+	if (zhp->zfs_mntopts)
+		free(zhp->zfs_mntopts);
+	nvlist_free(zhp->zfs_props);
+	nvlist_free(zhp->zfs_user_props);
+	free(zhp);
+}
+
+int
+zfs_spa_version(zfs_handle_t *zhp, int *spa_version)
+{
+	zpool_handle_t *zpool_handle = zhp->zpool_hdl;
+
+	if (zpool_handle == NULL)
+		return (-1);
+
+	*spa_version = zpool_get_prop_int(zpool_handle,
+	    ZPOOL_PROP_VERSION, NULL);
+	return (0);
+}
+
+/*
+ * The choice of reservation property depends on the SPA version.
+ */
+static int
+zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop)
+{
+	int spa_version;
+
+	if (zfs_spa_version(zhp, &spa_version) < 0)
+		return (-1);
+
+	if (spa_version >= SPA_VERSION_REFRESERVATION)
+		*resv_prop = ZFS_PROP_REFRESERVATION;
+	else
+		*resv_prop = ZFS_PROP_RESERVATION;
+
+	return (0);
+}
+
+/*
+ * Given an nvlist of properties to set, validates that they are correct, and
+ * parses any numeric properties (index, boolean, etc) if they are specified as
+ * strings.
+ */
+nvlist_t *
+zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
+    uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
+{
+	nvpair_t *elem;
+	uint64_t intval;
+	char *strval;
+	zfs_prop_t prop;
+	nvlist_t *ret;
+	int chosen_normal = -1;
+	int chosen_utf = -1;
+
+	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+		const char *propname = nvpair_name(elem);
+
+		/*
+		 * Make sure this property is valid and applies to this type.
+		 */
+		if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+			if (!zfs_prop_user(propname)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "invalid property '%s'"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			/*
+			 * If this is a user property, make sure it's a
+			 * string, and that it's less than ZAP_MAXNAMELEN.
+			 */
+			if (nvpair_type(elem) != DATA_TYPE_STRING) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be a string"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property name '%s' is too long"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			(void) nvpair_value_string(elem, &strval);
+			if (nvlist_add_string(ret, propname, strval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+			continue;
+		}
+
+		if (type == ZFS_TYPE_SNAPSHOT) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "this property can not be modified for snapshots"));
+			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
+			goto error;
+		}
+
+		if (!zfs_prop_valid_for_type(prop, type)) {
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "'%s' does not "
+			    "apply to datasets of this type"), propname);
+			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
+			goto error;
+		}
+
+		if (zfs_prop_readonly(prop) &&
+		    (!zfs_prop_setonce(prop) || zhp != NULL)) {
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
+			    propname);
+			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
+			goto error;
+		}
+
+		if (zprop_parse_value(hdl, elem, prop, type, ret,
+		    &strval, &intval, errbuf) != 0)
+			goto error;
+
+		/*
+		 * Perform some additional checks for specific properties.
+		 */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+		{
+			int version;
+
+			if (zhp == NULL)
+				break;
+			version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+			if (intval < version) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "Can not downgrade; already at version %u"),
+				    version);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+		}
+
+		case ZFS_PROP_RECORDSIZE:
+		case ZFS_PROP_VOLBLOCKSIZE:
+			/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
+			if (intval < SPA_MINBLOCKSIZE ||
+			    intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be power of 2 from %u "
+				    "to %uk"), propname,
+				    (uint_t)SPA_MINBLOCKSIZE,
+				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZFS_PROP_SHAREISCSI:
+			if (strcmp(strval, "off") != 0 &&
+			    strcmp(strval, "on") != 0 &&
+			    strcmp(strval, "type=disk") != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be 'on', 'off', or 'type=disk'"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			break;
+
+		case ZFS_PROP_MOUNTPOINT:
+		{
+			namecheck_err_t why;
+
+			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
+			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
+				break;
+
+			if (mountpoint_namecheck(strval, &why)) {
+				switch (why) {
+				case NAME_ERR_LEADING_SLASH:
+					zfs_error_aux(hdl,
+					    dgettext(TEXT_DOMAIN,
+					    "'%s' must be an absolute path, "
+					    "'none', or 'legacy'"), propname);
+					break;
+				case NAME_ERR_TOOLONG:
+					zfs_error_aux(hdl,
+					    dgettext(TEXT_DOMAIN,
+					    "component of '%s' is too long"),
+					    propname);
+					break;
+				}
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+		}
+
+			/*FALLTHRU*/
+
+		case ZFS_PROP_SHARESMB:
+		case ZFS_PROP_SHARENFS:
+			/*
+			 * For the mountpoint and sharenfs or sharesmb
+			 * properties, check if it can be set in a
+			 * global/non-global zone based on
+			 * the zoned property value:
+			 *
+			 *		global zone	    non-global zone
+			 * --------------------------------------------------
+			 * zoned=on	mountpoint (no)	    mountpoint (yes)
+			 *		sharenfs (no)	    sharenfs (no)
+			 *		sharesmb (no)	    sharesmb (no)
+			 *
+			 * zoned=off	mountpoint (yes)	N/A
+			 *		sharenfs (yes)
+			 *		sharesmb (yes)
+			 */
+			if (zoned) {
+				if (getzoneid() == GLOBAL_ZONEID) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set on "
+					    "dataset in a non-global zone"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_ZONED,
+					    errbuf);
+					goto error;
+				} else if (prop == ZFS_PROP_SHARENFS ||
+				    prop == ZFS_PROP_SHARESMB) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set in "
+					    "a non-global zone"), propname);
+					(void) zfs_error(hdl, EZFS_ZONED,
+					    errbuf);
+					goto error;
+				}
+			} else if (getzoneid() != GLOBAL_ZONEID) {
+				/*
+				 * If zoned property is 'off', this must be in
+				 * a globle zone. If not, something is wrong.
+				 */
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' cannot be set while dataset "
+				    "'zoned' property is set"), propname);
+				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
+				goto error;
+			}
+
+			/*
+			 * At this point, it is legitimate to set the
+			 * property. Now we want to make sure that the
+			 * property value is valid if it is sharenfs.
+			 */
+			if ((prop == ZFS_PROP_SHARENFS ||
+			    prop == ZFS_PROP_SHARESMB) &&
+			    strcmp(strval, "on") != 0 &&
+			    strcmp(strval, "off") != 0) {
+				zfs_share_proto_t proto;
+
+				if (prop == ZFS_PROP_SHARESMB)
+					proto = PROTO_SMB;
+				else
+					proto = PROTO_NFS;
+
+				/*
+				 * Must be an valid sharing protocol
+				 * option string so init the libshare
+				 * in order to enable the parser and
+				 * then parse the options. We use the
+				 * control API since we don't care about
+				 * the current configuration and don't
+				 * want the overhead of loading it
+				 * until we actually do something.
+				 */
+
+				if (zfs_init_libshare(hdl,
+				    SA_INIT_CONTROL_API) != SA_OK) {
+					/*
+					 * An error occurred so we can't do
+					 * anything
+					 */
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set: problem "
+					    "in share initialization"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+
+				if (zfs_parse_options(strval, proto) != SA_OK) {
+					/*
+					 * There was an error in parsing so
+					 * deal with it by issuing an error
+					 * message and leaving after
+					 * uninitializing the the libshare
+					 * interface.
+					 */
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be set to invalid "
+					    "options"), propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					zfs_uninit_libshare(hdl);
+					goto error;
+				}
+				zfs_uninit_libshare(hdl);
+			}
+
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			chosen_utf = (int)intval;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			chosen_normal = (int)intval;
+			break;
+		}
+
+		/*
+		 * For changes to existing volumes, we have some additional
+		 * checks to enforce.
+		 */
+		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
+			uint64_t volsize = zfs_prop_get_int(zhp,
+			    ZFS_PROP_VOLSIZE);
+			uint64_t blocksize = zfs_prop_get_int(zhp,
+			    ZFS_PROP_VOLBLOCKSIZE);
+			char buf[64];
+
+			switch (prop) {
+			case ZFS_PROP_RESERVATION:
+			case ZFS_PROP_REFRESERVATION:
+				if (intval > volsize) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' is greater than current "
+					    "volume size"), propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+				break;
+
+			case ZFS_PROP_VOLSIZE:
+				if (intval % blocksize != 0) {
+					zfs_nicenum(blocksize, buf,
+					    sizeof (buf));
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' must be a multiple of "
+					    "volume block size (%s)"),
+					    propname, buf);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+
+				if (intval == 0) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "'%s' cannot be zero"),
+					    propname);
+					(void) zfs_error(hdl, EZFS_BADPROP,
+					    errbuf);
+					goto error;
+				}
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If normalization was chosen, but no UTF8 choice was made,
+	 * enforce rejection of non-UTF8 names.
+	 *
+	 * If normalization was chosen, but rejecting non-UTF8 names
+	 * was explicitly not chosen, it is an error.
+	 */
+	if (chosen_normal > 0 && chosen_utf < 0) {
+		if (nvlist_add_uint64(ret,
+		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) {
+			(void) no_memory(hdl);
+			goto error;
+		}
+	} else if (chosen_normal > 0 && chosen_utf == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' must be set 'on' if normalization chosen"),
+		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
+		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+		goto error;
+	}
+
+	/*
+	 * If this is an existing volume, and someone is setting the volsize,
+	 * make sure that it matches the reservation, or add it if necessary.
+	 */
+	if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
+	    nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+	    &intval) == 0) {
+		uint64_t old_volsize = zfs_prop_get_int(zhp,
+		    ZFS_PROP_VOLSIZE);
+		uint64_t old_reservation;
+		uint64_t new_reservation;
+		zfs_prop_t resv_prop;
+
+		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
+			goto error;
+		old_reservation = zfs_prop_get_int(zhp, resv_prop);
+
+		if (old_volsize == old_reservation &&
+		    nvlist_lookup_uint64(ret, zfs_prop_to_name(resv_prop),
+		    &new_reservation) != 0) {
+			if (nvlist_add_uint64(ret,
+			    zfs_prop_to_name(resv_prop), intval) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+		}
+	}
+	return (ret);
+
+error:
+	nvlist_free(ret);
+	return (NULL);
+}
+
+static int
+zfs_get_perm_who(const char *who, zfs_deleg_who_type_t *who_type,
+    uint64_t *ret_who)
+{
+	struct passwd *pwd;
+	struct group *grp;
+	uid_t id;
+
+	if (*who_type == ZFS_DELEG_EVERYONE || *who_type == ZFS_DELEG_CREATE ||
+	    *who_type == ZFS_DELEG_NAMED_SET) {
+		*ret_who = -1;
+		return (0);
+	}
+	if (who == NULL && !(*who_type == ZFS_DELEG_EVERYONE))
+		return (EZFS_BADWHO);
+
+	if (*who_type == ZFS_DELEG_WHO_UNKNOWN &&
+	    strcmp(who, "everyone") == 0) {
+		*ret_who = -1;
+		*who_type = ZFS_DELEG_EVERYONE;
+		return (0);
+	}
+
+	pwd = getpwnam(who);
+	grp = getgrnam(who);
+
+	if ((*who_type == ZFS_DELEG_USER) && pwd) {
+		*ret_who = pwd->pw_uid;
+	} else if ((*who_type == ZFS_DELEG_GROUP) && grp) {
+		*ret_who = grp->gr_gid;
+	} else if (pwd) {
+		*ret_who = pwd->pw_uid;
+		*who_type = ZFS_DELEG_USER;
+	} else if (grp) {
+		*ret_who = grp->gr_gid;
+		*who_type = ZFS_DELEG_GROUP;
+	} else {
+		char *end;
+
+		id = strtol(who, &end, 10);
+		if (errno != 0 || *end != '\0') {
+			return (EZFS_BADWHO);
+		} else {
+			*ret_who = id;
+			if (*who_type == ZFS_DELEG_WHO_UNKNOWN)
+				*who_type = ZFS_DELEG_USER;
+		}
+	}
+
+	return (0);
+}
+
+static void
+zfs_perms_add_to_nvlist(nvlist_t *who_nvp, char *name, nvlist_t *perms_nvp)
+{
+	if (perms_nvp != NULL) {
+		verify(nvlist_add_nvlist(who_nvp,
+		    name, perms_nvp) == 0);
+	} else {
+		verify(nvlist_add_boolean(who_nvp, name) == 0);
+	}
+}
+
+static void
+helper(zfs_deleg_who_type_t who_type, uint64_t whoid, char *whostr,
+    zfs_deleg_inherit_t inherit, nvlist_t *who_nvp, nvlist_t *perms_nvp,
+    nvlist_t *sets_nvp)
+{
+	boolean_t do_perms, do_sets;
+	char name[ZFS_MAX_DELEG_NAME];
+
+	do_perms = (nvlist_next_nvpair(perms_nvp, NULL) != NULL);
+	do_sets = (nvlist_next_nvpair(sets_nvp, NULL) != NULL);
+
+	if (!do_perms && !do_sets)
+		do_perms = do_sets = B_TRUE;
+
+	if (do_perms) {
+		zfs_deleg_whokey(name, who_type, inherit,
+		    (who_type == ZFS_DELEG_NAMED_SET) ?
+		    whostr : (void *)&whoid);
+		zfs_perms_add_to_nvlist(who_nvp, name, perms_nvp);
+	}
+	if (do_sets) {
+		zfs_deleg_whokey(name, toupper(who_type), inherit,
+		    (who_type == ZFS_DELEG_NAMED_SET) ?
+		    whostr : (void *)&whoid);
+		zfs_perms_add_to_nvlist(who_nvp, name, sets_nvp);
+	}
+}
+
+static void
+zfs_perms_add_who_nvlist(nvlist_t *who_nvp, uint64_t whoid, void *whostr,
+    nvlist_t *perms_nvp, nvlist_t *sets_nvp,
+    zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit)
+{
+	if (who_type == ZFS_DELEG_NAMED_SET || who_type == ZFS_DELEG_CREATE) {
+		helper(who_type, whoid, whostr, 0,
+		    who_nvp, perms_nvp, sets_nvp);
+	} else {
+		if (inherit & ZFS_DELEG_PERM_LOCAL) {
+			helper(who_type, whoid, whostr, ZFS_DELEG_LOCAL,
+			    who_nvp, perms_nvp, sets_nvp);
+		}
+		if (inherit & ZFS_DELEG_PERM_DESCENDENT) {
+			helper(who_type, whoid, whostr, ZFS_DELEG_DESCENDENT,
+			    who_nvp, perms_nvp, sets_nvp);
+		}
+	}
+}
+
+/*
+ * Construct nvlist to pass down to kernel for setting/removing permissions.
+ *
+ * The nvlist is constructed as a series of nvpairs with an optional embedded
+ * nvlist of permissions to remove or set.  The topmost nvpairs are the actual
+ * base attribute named stored in the dsl.
+ * Arguments:
+ *
+ * whostr:   is a comma separated list of users, groups, or a single set name.
+ *           whostr may be null for everyone or create perms.
+ * who_type: is the type of entry in whostr.  Typically this will be
+ *           ZFS_DELEG_WHO_UNKNOWN.
+ * perms:    common separated list of permissions.  May be null if user
+ *           is requested to remove permissions by who.
+ * inherit:  Specifies the inheritance of the permissions.  Will be either
+ *           ZFS_DELEG_PERM_LOCAL and/or  ZFS_DELEG_PERM_DESCENDENT.
+ * nvp       The constructed nvlist to pass to zfs_perm_set().
+ *           The output nvp will look something like this.
+ *              ul$1234 -> {create ; destroy }
+ *              Ul$1234 -> { @myset }
+ *              s-$@myset - { snapshot; checksum; compression }
+ */
+int
+zfs_build_perms(zfs_handle_t *zhp, char *whostr, char *perms,
+    zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit, nvlist_t **nvp)
+{
+	nvlist_t *who_nvp;
+	nvlist_t *perms_nvp = NULL;
+	nvlist_t *sets_nvp = NULL;
+	char errbuf[1024];
+	char *who_tok, *perm;
+	int error;
+
+	*nvp = NULL;
+
+	if (perms) {
+		if ((error = nvlist_alloc(&perms_nvp,
+		    NV_UNIQUE_NAME, 0)) != 0) {
+			return (1);
+		}
+		if ((error = nvlist_alloc(&sets_nvp,
+		    NV_UNIQUE_NAME, 0)) != 0) {
+			nvlist_free(perms_nvp);
+			return (1);
+		}
+	}
+
+	if ((error = nvlist_alloc(&who_nvp, NV_UNIQUE_NAME, 0)) != 0) {
+		if (perms_nvp)
+			nvlist_free(perms_nvp);
+		if (sets_nvp)
+			nvlist_free(sets_nvp);
+		return (1);
+	}
+
+	if (who_type == ZFS_DELEG_NAMED_SET) {
+		namecheck_err_t why;
+		char what;
+
+		if ((error = permset_namecheck(whostr, &why, &what)) != 0) {
+			nvlist_free(who_nvp);
+			if (perms_nvp)
+				nvlist_free(perms_nvp);
+			if (sets_nvp)
+				nvlist_free(sets_nvp);
+
+			switch (why) {
+			case NAME_ERR_NO_AT:
+				zfs_error_aux(zhp->zfs_hdl,
+				    dgettext(TEXT_DOMAIN,
+				    "set definition must begin with an '@' "
+				    "character"));
+			}
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_BADPERMSET, whostr));
+		}
+	}
+
+	/*
+	 * Build up nvlist(s) of permissions.  Two nvlists are maintained.
+	 * The first nvlist perms_nvp will have normal permissions and the
+	 * other sets_nvp will have only permssion set names in it.
+	 */
+	for (perm = strtok(perms, ","); perm; perm = strtok(NULL, ",")) {
+		const char *perm_canonical = zfs_deleg_canonicalize_perm(perm);
+
+		if (perm_canonical) {
+			verify(nvlist_add_boolean(perms_nvp,
+			    perm_canonical) == 0);
+		} else if (perm[0] == '@') {
+			verify(nvlist_add_boolean(sets_nvp, perm) == 0);
+		} else {
+			nvlist_free(who_nvp);
+			nvlist_free(perms_nvp);
+			nvlist_free(sets_nvp);
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADPERM, perm));
+		}
+	}
+
+	if (whostr && who_type != ZFS_DELEG_CREATE) {
+		who_tok = strtok(whostr, ",");
+		if (who_tok == NULL) {
+			nvlist_free(who_nvp);
+			if (perms_nvp)
+				nvlist_free(perms_nvp);
+			if (sets_nvp)
+				nvlist_free(sets_nvp);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "Who string is NULL"),
+			    whostr);
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
+		}
+	}
+
+	/*
+	 * Now create the nvlist(s)
+	 */
+	do {
+		uint64_t who_id;
+
+		error = zfs_get_perm_who(who_tok, &who_type,
+		    &who_id);
+		if (error) {
+			nvlist_free(who_nvp);
+			if (perms_nvp)
+				nvlist_free(perms_nvp);
+			if (sets_nvp)
+				nvlist_free(sets_nvp);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "Unable to determine uid/gid for "
+			    "%s "), who_tok);
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
+		}
+
+		/*
+		 * add entries for both local and descendent when required
+		 */
+		zfs_perms_add_who_nvlist(who_nvp, who_id, who_tok,
+		    perms_nvp, sets_nvp, who_type, inherit);
+
+	} while (who_tok = strtok(NULL, ","));
+	*nvp = who_nvp;
+	return (0);
+}
+
+static int
+zfs_perm_set_common(zfs_handle_t *zhp, nvlist_t *nvp, boolean_t unset)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "Cannot update 'allows' for '%s'"),
+	    zhp->zfs_name);
+
+	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, nvp))
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	zc.zc_perm_action = unset;
+
+	error = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SET_FSACL, &zc);
+	if (error && errno == ENOTSUP) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    gettext("Pool must be upgraded to use 'allow/unallow'"));
+		zcmd_free_nvlists(&zc);
+		return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION, errbuf));
+	} else if (error) {
+		return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf));
+	}
+	zcmd_free_nvlists(&zc);
+
+	return (error);
+}
+
+int
+zfs_perm_set(zfs_handle_t *zhp, nvlist_t *nvp)
+{
+	return (zfs_perm_set_common(zhp, nvp, B_FALSE));
+}
+
+int
+zfs_perm_remove(zfs_handle_t *zhp, nvlist_t *perms)
+{
+	return (zfs_perm_set_common(zhp, perms, B_TRUE));
+}
+
+static int
+perm_compare(const void *arg1, const void *arg2)
+{
+	const zfs_perm_node_t *node1 = arg1;
+	const zfs_perm_node_t *node2 = arg2;
+	int ret;
+
+	ret = strcmp(node1->z_pname, node2->z_pname);
+
+	if (ret > 0)
+		return (1);
+	if (ret < 0)
+		return (-1);
+	else
+		return (0);
+}
+
+static void
+zfs_destroy_perm_tree(avl_tree_t *tree)
+{
+	zfs_perm_node_t *permnode;
+	void *cookie = NULL;
+
+	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL)
+		free(permnode);
+	avl_destroy(tree);
+}
+
+static void
+zfs_destroy_tree(avl_tree_t *tree)
+{
+	zfs_allow_node_t *allownode;
+	void *cookie = NULL;
+
+	while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
+		zfs_destroy_perm_tree(&allownode->z_localdescend);
+		zfs_destroy_perm_tree(&allownode->z_local);
+		zfs_destroy_perm_tree(&allownode->z_descend);
+		free(allownode);
+	}
+	avl_destroy(tree);
+}
+
+void
+zfs_free_allows(zfs_allow_t *allow)
+{
+	zfs_allow_t *allownext;
+	zfs_allow_t *freeallow;
+
+	allownext = allow;
+	while (allownext) {
+		zfs_destroy_tree(&allownext->z_sets);
+		zfs_destroy_tree(&allownext->z_crperms);
+		zfs_destroy_tree(&allownext->z_user);
+		zfs_destroy_tree(&allownext->z_group);
+		zfs_destroy_tree(&allownext->z_everyone);
+		freeallow = allownext;
+		allownext = allownext->z_next;
+		free(freeallow);
+	}
+}
+
+static zfs_allow_t *
+zfs_alloc_perm_tree(zfs_handle_t *zhp, zfs_allow_t *prev, char *setpoint)
+{
+	zfs_allow_t *ptree;
+
+	if ((ptree = zfs_alloc(zhp->zfs_hdl,
+	    sizeof (zfs_allow_t))) == NULL) {
+		return (NULL);
+	}
+
+	(void) strlcpy(ptree->z_setpoint, setpoint, sizeof (ptree->z_setpoint));
+	avl_create(&ptree->z_sets,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_crperms,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_user,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_group,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+	avl_create(&ptree->z_everyone,
+	    perm_compare, sizeof (zfs_allow_node_t),
+	    offsetof(zfs_allow_node_t, z_node));
+
+	if (prev)
+		prev->z_next = ptree;
+	ptree->z_next = NULL;
+	return (ptree);
+}
+
+/*
+ * Add permissions to the appropriate AVL permission tree.
+ * The appropriate tree may not be the requested tree.
+ * For example if ld indicates a local permission, but
+ * same permission also exists as a descendent permission
+ * then the permission will be removed from the descendent
+ * tree and add the the local+descendent tree.
+ */
+static int
+zfs_coalesce_perm(zfs_handle_t *zhp, zfs_allow_node_t *allownode,
+    char *perm, char ld)
+{
+	zfs_perm_node_t pnode, *permnode, *permnode2;
+	zfs_perm_node_t *newnode;
+	avl_index_t where, where2;
+	avl_tree_t *tree, *altree;
+
+	(void) strlcpy(pnode.z_pname, perm, sizeof (pnode.z_pname));
+
+	if (ld == ZFS_DELEG_NA) {
+		tree =  &allownode->z_localdescend;
+		altree = &allownode->z_descend;
+	} else if (ld == ZFS_DELEG_LOCAL) {
+		tree = &allownode->z_local;
+		altree = &allownode->z_descend;
+	} else {
+		tree = &allownode->z_descend;
+		altree = &allownode->z_local;
+	}
+	permnode = avl_find(tree, &pnode, &where);
+	permnode2 = avl_find(altree, &pnode, &where2);
+
+	if (permnode2) {
+		avl_remove(altree, permnode2);
+		free(permnode2);
+		if (permnode == NULL) {
+			tree =  &allownode->z_localdescend;
+		}
+	}
+
+	/*
+	 * Now insert new permission in either requested location
+	 * local/descendent or into ld when perm will exist in both.
+	 */
+	if (permnode == NULL) {
+		if ((newnode = zfs_alloc(zhp->zfs_hdl,
+		    sizeof (zfs_perm_node_t))) == NULL) {
+			return (-1);
+		}
+		*newnode = pnode;
+		avl_add(tree, newnode);
+	}
+	return (0);
+}
+
+/*
+ * Uggh, this is going to be a bit complicated.
+ * we have an nvlist coming out of the kernel that
+ * will indicate where the permission is set and then
+ * it will contain allow of the various "who's", and what
+ * their permissions are.  To further complicate this
+ * we will then have to coalesce the local,descendent
+ * and local+descendent permissions where appropriate.
+ * The kernel only knows about a permission as being local
+ * or descendent, but not both.
+ *
+ * In order to make this easier for zfs_main to deal with
+ * a series of AVL trees will be used to maintain
+ * all of this, primarily for sorting purposes as well
+ * as the ability to quickly locate a specific entry.
+ *
+ * What we end up with are tree's for sets, create perms,
+ * user, groups and everyone.  With each of those trees
+ * we have subtrees for local, descendent and local+descendent
+ * permissions.
+ */
+int
+zfs_perm_get(zfs_handle_t *zhp, zfs_allow_t **zfs_perms)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	nvlist_t *nvlist;
+	nvlist_t *permnv, *sourcenv;
+	nvpair_t *who_pair, *source_pair;
+	nvpair_t *perm_pair;
+	char errbuf[1024];
+	zfs_allow_t *zallowp, *newallowp;
+	char  ld;
+	char *nvpname;
+	uid_t	uid;
+	gid_t	gid;
+	avl_tree_t *tree;
+	avl_index_t where;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else if (errno == ENOTSUP) {
+			zcmd_free_nvlists(&zc);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    gettext("Pool must be upgraded to use 'allow'"));
+			return (zfs_error(zhp->zfs_hdl,
+			    EZFS_BADVERSION, errbuf));
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &nvlist) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	source_pair = nvlist_next_nvpair(nvlist, NULL);
+
+	if (source_pair == NULL) {
+		*zfs_perms = NULL;
+		return (0);
+	}
+
+	*zfs_perms = zfs_alloc_perm_tree(zhp, NULL, nvpair_name(source_pair));
+	if (*zfs_perms == NULL) {
+		return (0);
+	}
+
+	zallowp = *zfs_perms;
+
+	for (;;) {
+		struct passwd *pwd;
+		struct group *grp;
+		zfs_allow_node_t *allownode;
+		zfs_allow_node_t  findallownode;
+		zfs_allow_node_t *newallownode;
+
+		(void) strlcpy(zallowp->z_setpoint,
+		    nvpair_name(source_pair),
+		    sizeof (zallowp->z_setpoint));
+
+		if ((error = nvpair_value_nvlist(source_pair, &sourcenv)) != 0)
+			goto abort;
+
+		/*
+		 * Make sure nvlist is composed correctly
+		 */
+		if (zfs_deleg_verify_nvlist(sourcenv)) {
+			goto abort;
+		}
+
+		who_pair = nvlist_next_nvpair(sourcenv, NULL);
+		if (who_pair == NULL) {
+			goto abort;
+		}
+
+		do {
+			error = nvpair_value_nvlist(who_pair, &permnv);
+			if (error) {
+				goto abort;
+			}
+
+			/*
+			 * First build up the key to use
+			 * for looking up in the various
+			 * who trees.
+			 */
+			ld = nvpair_name(who_pair)[1];
+			nvpname = nvpair_name(who_pair);
+			switch (nvpair_name(who_pair)[0]) {
+			case ZFS_DELEG_USER:
+			case ZFS_DELEG_USER_SETS:
+				tree = &zallowp->z_user;
+				uid = atol(&nvpname[3]);
+				pwd = getpwuid(uid);
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "user %s",
+				    (pwd) ? pwd->pw_name :
+				    &nvpair_name(who_pair)[3]);
+				break;
+			case ZFS_DELEG_GROUP:
+			case ZFS_DELEG_GROUP_SETS:
+				tree = &zallowp->z_group;
+				gid = atol(&nvpname[3]);
+				grp = getgrgid(gid);
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "group %s",
+				    (grp) ? grp->gr_name :
+				    &nvpair_name(who_pair)[3]);
+				break;
+			case ZFS_DELEG_CREATE:
+			case ZFS_DELEG_CREATE_SETS:
+				tree = &zallowp->z_crperms;
+				(void) strlcpy(findallownode.z_key, "",
+				    sizeof (findallownode.z_key));
+				break;
+			case ZFS_DELEG_EVERYONE:
+			case ZFS_DELEG_EVERYONE_SETS:
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "everyone");
+				tree = &zallowp->z_everyone;
+				break;
+			case ZFS_DELEG_NAMED_SET:
+			case ZFS_DELEG_NAMED_SET_SETS:
+				(void) snprintf(findallownode.z_key,
+				    sizeof (findallownode.z_key), "%s",
+				    &nvpair_name(who_pair)[3]);
+				tree = &zallowp->z_sets;
+				break;
+			}
+
+			/*
+			 * Place who in tree
+			 */
+			allownode = avl_find(tree, &findallownode, &where);
+			if (allownode == NULL) {
+				if ((newallownode = zfs_alloc(zhp->zfs_hdl,
+				    sizeof (zfs_allow_node_t))) == NULL) {
+					goto abort;
+				}
+				avl_create(&newallownode->z_localdescend,
+				    perm_compare,
+				    sizeof (zfs_perm_node_t),
+				    offsetof(zfs_perm_node_t, z_node));
+				avl_create(&newallownode->z_local,
+				    perm_compare,
+				    sizeof (zfs_perm_node_t),
+				    offsetof(zfs_perm_node_t, z_node));
+				avl_create(&newallownode->z_descend,
+				    perm_compare,
+				    sizeof (zfs_perm_node_t),
+				    offsetof(zfs_perm_node_t, z_node));
+				(void) strlcpy(newallownode->z_key,
+				    findallownode.z_key,
+				    sizeof (findallownode.z_key));
+				avl_insert(tree, newallownode, where);
+				allownode = newallownode;
+			}
+
+			/*
+			 * Now iterate over the permissions and
+			 * place them in the appropriate local,
+			 * descendent or local+descendent tree.
+			 *
+			 * The permissions are added to the tree
+			 * via zfs_coalesce_perm().
+			 */
+			perm_pair = nvlist_next_nvpair(permnv, NULL);
+			if (perm_pair == NULL)
+				goto abort;
+			do {
+				if (zfs_coalesce_perm(zhp, allownode,
+				    nvpair_name(perm_pair), ld) != 0)
+					goto abort;
+			} while (perm_pair = nvlist_next_nvpair(permnv,
+			    perm_pair));
+		} while (who_pair = nvlist_next_nvpair(sourcenv, who_pair));
+
+		source_pair = nvlist_next_nvpair(nvlist, source_pair);
+		if (source_pair == NULL)
+			break;
+
+		/*
+		 * allocate another node from the link list of
+		 * zfs_allow_t structures
+		 */
+		newallowp = zfs_alloc_perm_tree(zhp, zallowp,
+		    nvpair_name(source_pair));
+		if (newallowp == NULL) {
+			goto abort;
+		}
+		zallowp = newallowp;
+	}
+	nvlist_free(nvlist);
+	return (0);
+abort:
+	zfs_free_allows(*zfs_perms);
+	nvlist_free(nvlist);
+	return (-1);
+}
+
+static char *
+zfs_deleg_perm_note(zfs_deleg_note_t note)
+{
+	/*
+	 * Don't put newlines on end of lines
+	 */
+	switch (note) {
+	case ZFS_DELEG_NOTE_CREATE:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_DESTROY:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_SNAPSHOT:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_ROLLBACK:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the 'mount' ability"));
+	case ZFS_DELEG_NOTE_CLONE:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'create' "
+		    "ability and 'mount'\n"
+		    "\t\t\t\tability in the origin file system"));
+	case ZFS_DELEG_NOTE_PROMOTE:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'\n"
+		    "\t\t\t\tand 'promote' ability in the origin file system"));
+	case ZFS_DELEG_NOTE_RENAME:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount' "
+		    "and 'create' \n\t\t\t\tability in the new parent"));
+	case ZFS_DELEG_NOTE_RECEIVE:
+		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'"
+		    " and 'create' ability"));
+	case ZFS_DELEG_NOTE_USERPROP:
+		return (dgettext(TEXT_DOMAIN,
+		    "Allows changing any user property"));
+	case ZFS_DELEG_NOTE_ALLOW:
+		return (dgettext(TEXT_DOMAIN,
+		    "Must also have the permission that is being\n"
+		    "\t\t\t\tallowed"));
+	case ZFS_DELEG_NOTE_MOUNT:
+		return (dgettext(TEXT_DOMAIN,
+		    "Allows mount/umount of ZFS datasets"));
+	case ZFS_DELEG_NOTE_SHARE:
+		return (dgettext(TEXT_DOMAIN,
+		    "Allows sharing file systems over NFS or SMB\n"
+		    "\t\t\t\tprotocols"));
+	case ZFS_DELEG_NOTE_NONE:
+	default:
+		return (dgettext(TEXT_DOMAIN, ""));
+	}
+}
+
+typedef enum {
+	ZFS_DELEG_SUBCOMMAND,
+	ZFS_DELEG_PROP,
+	ZFS_DELEG_OTHER
+} zfs_deleg_perm_type_t;
+
+/*
+ * is the permission a subcommand or other?
+ */
+zfs_deleg_perm_type_t
+zfs_deleg_perm_type(const char *perm)
+{
+	if (strcmp(perm, "userprop") == 0)
+		return (ZFS_DELEG_OTHER);
+	else
+		return (ZFS_DELEG_SUBCOMMAND);
+}
+
+static char *
+zfs_deleg_perm_type_str(zfs_deleg_perm_type_t type)
+{
+	switch (type) {
+	case ZFS_DELEG_SUBCOMMAND:
+		return (dgettext(TEXT_DOMAIN, "subcommand"));
+	case ZFS_DELEG_PROP:
+		return (dgettext(TEXT_DOMAIN, "property"));
+	case ZFS_DELEG_OTHER:
+		return (dgettext(TEXT_DOMAIN, "other"));
+	}
+	return ("");
+}
+
+/*ARGSUSED*/
+static int
+zfs_deleg_prop_cb(int prop, void *cb)
+{
+	if (zfs_prop_delegatable(prop))
+		(void) fprintf(stderr, "%-15s %-15s\n", zfs_prop_to_name(prop),
+		    zfs_deleg_perm_type_str(ZFS_DELEG_PROP));
+
+	return (ZPROP_CONT);
+}
+
+void
+zfs_deleg_permissions(void)
+{
+	int i;
+
+	(void) fprintf(stderr, "\n%-15s %-15s\t%s\n\n", "NAME",
+	    "TYPE", "NOTES");
+
+	/*
+	 * First print out the subcommands
+	 */
+	for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
+		(void) fprintf(stderr, "%-15s %-15s\t%s\n",
+		    zfs_deleg_perm_tab[i].z_perm,
+		    zfs_deleg_perm_type_str(
+		    zfs_deleg_perm_type(zfs_deleg_perm_tab[i].z_perm)),
+		    zfs_deleg_perm_note(zfs_deleg_perm_tab[i].z_note));
+	}
+
+	(void) zprop_iter(zfs_deleg_prop_cb, NULL, B_FALSE, B_TRUE,
+	    ZFS_TYPE_DATASET|ZFS_TYPE_VOLUME);
+}
+
+/*
+ * Given a property name and value, set the property for the given dataset.
+ */
+int
+zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = -1;
+	prop_changelist_t *cl = NULL;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *nvl = NULL, *realprops;
+	zfs_prop_t prop;
+	boolean_t do_prefix;
+	uint64_t idx;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
+	    zhp->zfs_name);
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
+	    nvlist_add_string(nvl, propname, propval) != 0) {
+		(void) no_memory(hdl);
+		goto error;
+	}
+
+	if ((realprops = zfs_valid_proplist(hdl, zhp->zfs_type, nvl,
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
+		goto error;
+
+	nvlist_free(nvl);
+	nvl = realprops;
+
+	prop = zfs_name_to_prop(propname);
+
+	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
+		goto error;
+
+	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
+		goto error;
+	}
+
+	/*
+	 * If the dataset's canmount property is being set to noauto,
+	 * then we want to prevent unmounting & remounting it.
+	 */
+	do_prefix = !((prop == ZFS_PROP_CANMOUNT) &&
+	    (zprop_string_to_index(prop, propval, &idx,
+	    ZFS_TYPE_DATASET) == 0) && (idx == ZFS_CANMOUNT_NOAUTO));
+
+	if (do_prefix && (ret = changelist_prefix(cl)) != 0)
+		goto error;
+
+	/*
+	 * Execute the corresponding ioctl() to set this property.
+	 */
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
+		goto error;
+
+	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
+	if (ret != 0) {
+		switch (errno) {
+
+		case ENOSPC:
+			/*
+			 * For quotas and reservations, ENOSPC indicates
+			 * something different; setting a quota or reservation
+			 * doesn't use any disk space.
+			 */
+			switch (prop) {
+			case ZFS_PROP_QUOTA:
+			case ZFS_PROP_REFQUOTA:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is less than current used or "
+				    "reserved space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+				break;
+
+			case ZFS_PROP_RESERVATION:
+			case ZFS_PROP_REFRESERVATION:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is greater than available space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+				break;
+
+			default:
+				(void) zfs_standard_error(hdl, errno, errbuf);
+				break;
+			}
+			break;
+
+		case EBUSY:
+			if (prop == ZFS_PROP_VOLBLOCKSIZE)
+				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
+			else
+				(void) zfs_standard_error(hdl, EBUSY, errbuf);
+			break;
+
+		case EROFS:
+			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool and or dataset must be upgraded to set this "
+			    "property or value"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+
+		case ERANGE:
+			if (prop == ZFS_PROP_COMPRESSION) {
+				(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property setting is not allowed on "
+				    "bootable datasets"));
+				(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+			} else {
+				(void) zfs_standard_error(hdl, errno, errbuf);
+			}
+			break;
+
+		case EOVERFLOW:
+			/*
+			 * This platform can't address a volume this big.
+			 */
+#ifdef _ILP32
+			if (prop == ZFS_PROP_VOLSIZE) {
+				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
+				break;
+			}
+#endif
+			/* FALLTHROUGH */
+		default:
+			(void) zfs_standard_error(hdl, errno, errbuf);
+		}
+	} else {
+		if (do_prefix)
+			ret = changelist_postfix(cl);
+
+		/*
+		 * Refresh the statistics so the new property value
+		 * is reflected.
+		 */
+		if (ret == 0)
+			(void) get_stats(zhp);
+	}
+
+error:
+	nvlist_free(nvl);
+	zcmd_free_nvlists(&zc);
+	if (cl)
+		changelist_free(cl);
+	return (ret);
+}
+
+/*
+ * Given a property, inherit the value from the parent dataset.
+ */
+int
+zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	prop_changelist_t *cl;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+	zfs_prop_t prop;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
+
+	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+		/*
+		 * For user properties, the amount of work we have to do is very
+		 * small, so just do it here.
+		 */
+		if (!zfs_prop_user(propname)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
+
+		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
+			return (zfs_standard_error(hdl, errno, errbuf));
+
+		return (0);
+	}
+
+	/*
+	 * Verify that this property is inheritable.
+	 */
+	if (zfs_prop_readonly(prop))
+		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
+
+	if (!zfs_prop_inheritable(prop))
+		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
+
+	/*
+	 * Check to see if the value applies to this type
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
+
+	/*
+	 * Normalize the name, to get rid of shorthand abbrevations.
+	 */
+	propname = zfs_prop_to_name(prop);
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
+
+	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
+	}
+
+	/*
+	 * Determine datasets which will be affected by this change, if any.
+	 */
+	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
+		return (-1);
+
+	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
+		goto error;
+	}
+
+	if ((ret = changelist_prefix(cl)) != 0)
+		goto error;
+
+	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) {
+		return (zfs_standard_error(hdl, errno, errbuf));
+	} else {
+
+		if ((ret = changelist_postfix(cl)) != 0)
+			goto error;
+
+		/*
+		 * Refresh the statistics so the new property is reflected.
+		 */
+		(void) get_stats(zhp);
+	}
+
+error:
+	changelist_free(cl);
+	return (ret);
+}
+
+/*
+ * True DSL properties are stored in an nvlist.  The following two functions
+ * extract them appropriately.
+ */
+static uint64_t
+getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
+{
+	nvlist_t *nv;
+	uint64_t value;
+
+	*source = NULL;
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
+		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
+	} else {
+		value = zfs_prop_default_numeric(prop);
+		*source = "";
+	}
+
+	return (value);
+}
+
+static char *
+getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
+{
+	nvlist_t *nv;
+	char *value;
+
+	*source = NULL;
+	if (nvlist_lookup_nvlist(zhp->zfs_props,
+	    zfs_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
+		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
+	} else {
+		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
+			value = "";
+		*source = "";
+	}
+
+	return (value);
+}
+
+/*
+ * Internal function for getting a numeric property.  Both zfs_prop_get() and
+ * zfs_prop_get_int() are built using this interface.
+ *
+ * Certain properties can be overridden using 'mount -o'.  In this case, scan
+ * the contents of the /etc/mnttab entry, searching for the appropriate options.
+ * If they differ from the on-disk values, report the current values and mark
+ * the source "temporary".
+ */
+static int
+get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
+    char **source, uint64_t *val)
+{
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *zplprops = NULL;
+	struct mnttab mnt;
+	char *mntopt_on = NULL;
+	char *mntopt_off = NULL;
+
+	*source = NULL;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+		mntopt_on = MNTOPT_ATIME;
+		mntopt_off = MNTOPT_NOATIME;
+		break;
+
+	case ZFS_PROP_DEVICES:
+		mntopt_on = MNTOPT_DEVICES;
+		mntopt_off = MNTOPT_NODEVICES;
+		break;
+
+	case ZFS_PROP_EXEC:
+		mntopt_on = MNTOPT_EXEC;
+		mntopt_off = MNTOPT_NOEXEC;
+		break;
+
+	case ZFS_PROP_READONLY:
+		mntopt_on = MNTOPT_RO;
+		mntopt_off = MNTOPT_RW;
+		break;
+
+	case ZFS_PROP_SETUID:
+		mntopt_on = MNTOPT_SETUID;
+		mntopt_off = MNTOPT_NOSETUID;
+		break;
+
+	case ZFS_PROP_XATTR:
+		mntopt_on = MNTOPT_XATTR;
+		mntopt_off = MNTOPT_NOXATTR;
+		break;
+
+	case ZFS_PROP_NBMAND:
+		mntopt_on = MNTOPT_NBMAND;
+		mntopt_off = MNTOPT_NONBMAND;
+		break;
+	}
+
+	/*
+	 * Because looking up the mount options is potentially expensive
+	 * (iterating over all of /etc/mnttab), we defer its calculation until
+	 * we're looking up a property which requires its presence.
+	 */
+	if (!zhp->zfs_mntcheck &&
+	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
+		struct mnttab entry, search = { 0 };
+		FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
+
+		search.mnt_special = (char *)zhp->zfs_name;
+		search.mnt_fstype = MNTTYPE_ZFS;
+		rewind(mnttab);
+
+		if (getmntany(mnttab, &entry, &search) == 0) {
+			zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
+			    entry.mnt_mntopts);
+			if (zhp->zfs_mntopts == NULL)
+				return (-1);
+		}
+
+		zhp->zfs_mntcheck = B_TRUE;
+	}
+
+	if (zhp->zfs_mntopts == NULL)
+		mnt.mnt_mntopts = "";
+	else
+		mnt.mnt_mntopts = zhp->zfs_mntopts;
+
+	switch (prop) {
+	case ZFS_PROP_ATIME:
+	case ZFS_PROP_DEVICES:
+	case ZFS_PROP_EXEC:
+	case ZFS_PROP_READONLY:
+	case ZFS_PROP_SETUID:
+	case ZFS_PROP_XATTR:
+	case ZFS_PROP_NBMAND:
+		*val = getprop_uint64(zhp, prop, source);
+
+		if (hasmntopt(&mnt, mntopt_on) && !*val) {
+			*val = B_TRUE;
+			if (src)
+				*src = ZPROP_SRC_TEMPORARY;
+		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
+			*val = B_FALSE;
+			if (src)
+				*src = ZPROP_SRC_TEMPORARY;
+		}
+		break;
+
+	case ZFS_PROP_CANMOUNT:
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val != ZFS_CANMOUNT_ON)
+			*source = zhp->zfs_name;
+		else
+			*source = "";	/* default */
+		break;
+
+	case ZFS_PROP_QUOTA:
+	case ZFS_PROP_REFQUOTA:
+	case ZFS_PROP_RESERVATION:
+	case ZFS_PROP_REFRESERVATION:
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val == 0)
+			*source = "";	/* default */
+		else
+			*source = zhp->zfs_name;
+		break;
+
+	case ZFS_PROP_MOUNTED:
+		*val = (zhp->zfs_mntopts != NULL);
+		break;
+
+	case ZFS_PROP_NUMCLONES:
+		*val = zhp->zfs_dmustats.dds_num_clones;
+		break;
+
+	case ZFS_PROP_VERSION:
+	case ZFS_PROP_NORMALIZE:
+	case ZFS_PROP_UTF8ONLY:
+	case ZFS_PROP_CASE:
+		if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) ||
+		    zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
+			return (-1);
+		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "unable to get %s property"),
+			    zfs_prop_to_name(prop));
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION,
+			    dgettext(TEXT_DOMAIN, "internal error")));
+		}
+		if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 ||
+		    nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop),
+		    val) != 0) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "unable to get %s property"),
+			    zfs_prop_to_name(prop));
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOMEM,
+			    dgettext(TEXT_DOMAIN, "internal error")));
+		}
+		if (zplprops)
+			nvlist_free(zplprops);
+		zcmd_free_nvlists(&zc);
+		break;
+
+	default:
+		switch (zfs_prop_get_type(prop)) {
+		case PROP_TYPE_NUMBER:
+		case PROP_TYPE_INDEX:
+			*val = getprop_uint64(zhp, prop, source);
+			/*
+			 * If we tried to use a defalut value for a
+			 * readonly property, it means that it was not
+			 * present; return an error.
+			 */
+			if (zfs_prop_readonly(prop) &&
+			    *source && (*source)[0] == '\0') {
+				return (-1);
+			}
+			break;
+
+		case PROP_TYPE_STRING:
+		default:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "cannot get non-numeric property"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "internal error")));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Calculate the source type, given the raw source string.
+ */
+static void
+get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
+    char *statbuf, size_t statlen)
+{
+	if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY)
+		return;
+
+	if (source == NULL) {
+		*srctype = ZPROP_SRC_NONE;
+	} else if (source[0] == '\0') {
+		*srctype = ZPROP_SRC_DEFAULT;
+	} else {
+		if (strcmp(source, zhp->zfs_name) == 0) {
+			*srctype = ZPROP_SRC_LOCAL;
+		} else {
+			(void) strlcpy(statbuf, source, statlen);
+			*srctype = ZPROP_SRC_INHERITED;
+		}
+	}
+
+}
+
+/*
+ * Retrieve a property from the given object.  If 'literal' is specified, then
+ * numbers are left as exact values.  Otherwise, numbers are converted to a
+ * human-readable form.
+ *
+ * Returns 0 on success, or -1 on error.
+ */
+int
+zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
+    zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
+{
+	char *source = NULL;
+	uint64_t val;
+	char *str;
+	const char *strval;
+
+	/*
+	 * Check to see if this property applies to our object
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (-1);
+
+	if (src)
+		*src = ZPROP_SRC_NONE;
+
+	switch (prop) {
+	case ZFS_PROP_CREATION:
+		/*
+		 * 'creation' is a time_t stored in the statistics.  We convert
+		 * this into a string unless 'literal' is specified.
+		 */
+		{
+			val = getprop_uint64(zhp, prop, &source);
+			time_t time = (time_t)val;
+			struct tm t;
+
+			if (literal ||
+			    localtime_r(&time, &t) == NULL ||
+			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
+			    &t) == 0)
+				(void) snprintf(propbuf, proplen, "%llu", val);
+		}
+		break;
+
+	case ZFS_PROP_MOUNTPOINT:
+		/*
+		 * Getting the precise mountpoint can be tricky.
+		 *
+		 *  - for 'none' or 'legacy', return those values.
+		 *  - for inherited mountpoints, we want to take everything
+		 *    after our ancestor and append it to the inherited value.
+		 *
+		 * If the pool has an alternate root, we want to prepend that
+		 * root to any values we return.
+		 */
+
+		str = getprop_string(zhp, prop, &source);
+
+		if (str[0] == '/') {
+			char buf[MAXPATHLEN];
+			char *root = buf;
+			const char *relpath = zhp->zfs_name + strlen(source);
+
+			if (relpath[0] == '/')
+				relpath++;
+
+			if ((zpool_get_prop(zhp->zpool_hdl,
+			    ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) ||
+			    (strcmp(root, "-") == 0))
+				root[0] = '\0';
+			/*
+			 * Special case an alternate root of '/'. This will
+			 * avoid having multiple leading slashes in the
+			 * mountpoint path.
+			 */
+			if (strcmp(root, "/") == 0)
+				root++;
+
+			/*
+			 * If the mountpoint is '/' then skip over this
+			 * if we are obtaining either an alternate root or
+			 * an inherited mountpoint.
+			 */
+			if (str[1] == '\0' && (root[0] != '\0' ||
+			    relpath[0] != '\0'))
+				str++;
+
+			if (relpath[0] == '\0')
+				(void) snprintf(propbuf, proplen, "%s%s",
+				    root, str);
+			else
+				(void) snprintf(propbuf, proplen, "%s%s%s%s",
+				    root, str, relpath[0] == '@' ? "" : "/",
+				    relpath);
+		} else {
+			/* 'legacy' or 'none' */
+			(void) strlcpy(propbuf, str, proplen);
+		}
+
+		break;
+
+	case ZFS_PROP_ORIGIN:
+		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
+		    proplen);
+		/*
+		 * If there is no parent at all, return failure to indicate that
+		 * it doesn't apply to this dataset.
+		 */
+		if (propbuf[0] == '\0')
+			return (-1);
+		break;
+
+	case ZFS_PROP_QUOTA:
+	case ZFS_PROP_REFQUOTA:
+	case ZFS_PROP_RESERVATION:
+	case ZFS_PROP_REFRESERVATION:
+
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+
+		/*
+		 * If quota or reservation is 0, we translate this into 'none'
+		 * (unless literal is set), and indicate that it's the default
+		 * value.  Otherwise, we print the number nicely and indicate
+		 * that its set locally.
+		 */
+		if (val == 0) {
+			if (literal)
+				(void) strlcpy(propbuf, "0", proplen);
+			else
+				(void) strlcpy(propbuf, "none", proplen);
+		} else {
+			if (literal)
+				(void) snprintf(propbuf, proplen, "%llu",
+				    (u_longlong_t)val);
+			else
+				zfs_nicenum(val, propbuf, proplen);
+		}
+		break;
+
+	case ZFS_PROP_COMPRESSRATIO:
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+		(void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
+		    val / 100, (longlong_t)val % 100);
+		break;
+
+	case ZFS_PROP_TYPE:
+		switch (zhp->zfs_type) {
+		case ZFS_TYPE_FILESYSTEM:
+			str = "filesystem";
+			break;
+		case ZFS_TYPE_VOLUME:
+			str = "volume";
+			break;
+		case ZFS_TYPE_SNAPSHOT:
+			str = "snapshot";
+			break;
+		default:
+			abort();
+		}
+		(void) snprintf(propbuf, proplen, "%s", str);
+		break;
+
+	case ZFS_PROP_MOUNTED:
+		/*
+		 * The 'mounted' property is a pseudo-property that described
+		 * whether the filesystem is currently mounted.  Even though
+		 * it's a boolean value, the typical values of "on" and "off"
+		 * don't make sense, so we translate to "yes" and "no".
+		 */
+		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
+		    src, &source, &val) != 0)
+			return (-1);
+		if (val)
+			(void) strlcpy(propbuf, "yes", proplen);
+		else
+			(void) strlcpy(propbuf, "no", proplen);
+		break;
+
+	case ZFS_PROP_NAME:
+		/*
+		 * The 'name' property is a pseudo-property derived from the
+		 * dataset name.  It is presented as a real property to simplify
+		 * consumers.
+		 */
+		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
+		break;
+
+	default:
+		switch (zfs_prop_get_type(prop)) {
+		case PROP_TYPE_NUMBER:
+			if (get_numeric_property(zhp, prop, src,
+			    &source, &val) != 0)
+				return (-1);
+			if (literal)
+				(void) snprintf(propbuf, proplen, "%llu",
+				    (u_longlong_t)val);
+			else
+				zfs_nicenum(val, propbuf, proplen);
+			break;
+
+		case PROP_TYPE_STRING:
+			(void) strlcpy(propbuf,
+			    getprop_string(zhp, prop, &source), proplen);
+			break;
+
+		case PROP_TYPE_INDEX:
+			if (get_numeric_property(zhp, prop, src,
+			    &source, &val) != 0)
+				return (-1);
+			if (zfs_prop_index_to_string(prop, val, &strval) != 0)
+				return (-1);
+			(void) strlcpy(propbuf, strval, proplen);
+			break;
+
+		default:
+			abort();
+		}
+	}
+
+	get_source(zhp, src, source, statbuf, statlen);
+
+	return (0);
+}
+
+/*
+ * Utility function to get the given numeric property.  Does no validation that
+ * the given property is the appropriate type; should only be used with
+ * hard-coded property types.
+ */
+uint64_t
+zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
+{
+	char *source;
+	uint64_t val;
+
+	(void) get_numeric_property(zhp, prop, NULL, &source, &val);
+
+	return (val);
+}
+
+int
+zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val)
+{
+	char buf[64];
+
+	zfs_nicenum(val, buf, sizeof (buf));
+	return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf));
+}
+
+/*
+ * Similar to zfs_prop_get(), but returns the value as an integer.
+ */
+int
+zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
+    zprop_source_t *src, char *statbuf, size_t statlen)
+{
+	char *source;
+
+	/*
+	 * Check to see if this property applies to our object
+	 */
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
+		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
+		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
+		    zfs_prop_to_name(prop)));
+	}
+
+	if (src)
+		*src = ZPROP_SRC_NONE;
+
+	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
+		return (-1);
+
+	get_source(zhp, src, source, statbuf, statlen);
+
+	return (0);
+}
+
+/*
+ * Returns the name of the given zfs handle.
+ */
+const char *
+zfs_get_name(const zfs_handle_t *zhp)
+{
+	return (zhp->zfs_name);
+}
+
+/*
+ * Returns the type of the given zfs handle.
+ */
+zfs_type_t
+zfs_get_type(const zfs_handle_t *zhp)
+{
+	return (zhp->zfs_type);
+}
+
+/*
+ * Iterate over all child filesystems
+ */
+int
+zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *nzhp;
+	int ret;
+
+	if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
+		return (0);
+
+	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+		/*
+		 * Ignore private dataset names.
+		 */
+		if (dataset_name_hidden(zc.zc_name))
+			continue;
+
+		/*
+		 * Silently ignore errors, as the only plausible explanation is
+		 * that the pool has since been removed.
+		 */
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
+			continue;
+
+		if ((ret = func(nzhp, data)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
+	 * returned, then the underlying dataset has been removed since we
+	 * obtained the handle.
+	 */
+	if (errno != ESRCH && errno != ENOENT)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
+
+	return (0);
+}
+
+/*
+ * Iterate over all snapshots
+ */
+int
+zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *nzhp;
+	int ret;
+
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
+		return (0);
+
+	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	    &zc) == 0;
+	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
+
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
+			continue;
+
+		if ((ret = func(nzhp, data)) != 0)
+			return (ret);
+	}
+
+	/*
+	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
+	 * returned, then the underlying dataset has been removed since we
+	 * obtained the handle.  Silently ignore this case, and return success.
+	 */
+	if (errno != ESRCH && errno != ENOENT)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
+
+	return (0);
+}
+
+/*
+ * Iterate over all children, snapshots and filesystems
+ */
+int
+zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
+{
+	int ret;
+
+	if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
+		return (ret);
+
+	return (zfs_iter_snapshots(zhp, func, data));
+}
+
+/*
+ * Given a complete name, return just the portion that refers to the parent.
+ * Can return NULL if this is a pool.
+ */
+static int
+parent_name(const char *path, char *buf, size_t buflen)
+{
+	char *loc;
+
+	if ((loc = strrchr(path, '/')) == NULL)
+		return (-1);
+
+	(void) strncpy(buf, path, MIN(buflen, loc - path));
+	buf[loc - path] = '\0';
+
+	return (0);
+}
+
+/*
+ * If accept_ancestor is false, then check to make sure that the given path has
+ * a parent, and that it exists.  If accept_ancestor is true, then find the
+ * closest existing ancestor for the given path.  In prefixlen return the
+ * length of already existing prefix of the given path.  We also fetch the
+ * 'zoned' property, which is used to validate property settings when creating
+ * new datasets.
+ */
+static int
+check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
+    boolean_t accept_ancestor, int *prefixlen)
+{
+	zfs_cmd_t zc = { 0 };
+	char parent[ZFS_MAXNAMELEN];
+	char *slash;
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
+	    path);
+
+	/* get parent, and check to see if this is just a pool */
+	if (parent_name(path, parent, sizeof (parent)) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "missing dataset name"));
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	}
+
+	/* check to see if the pool exists */
+	if ((slash = strchr(parent, '/')) == NULL)
+		slash = parent + strlen(parent);
+	(void) strncpy(zc.zc_name, parent, slash - parent);
+	zc.zc_name[slash - parent] = '\0';
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
+	    errno == ENOENT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no such pool '%s'"), zc.zc_name);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
+	}
+
+	/* check to see if the parent dataset exists */
+	while ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
+		if (errno == ENOENT && accept_ancestor) {
+			/*
+			 * Go deeper to find an ancestor, give up on top level.
+			 */
+			if (parent_name(parent, parent, sizeof (parent)) != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "no such pool '%s'"), zc.zc_name);
+				return (zfs_error(hdl, EZFS_NOENT, errbuf));
+			}
+		} else if (errno == ENOENT) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent does not exist"));
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		} else
+			return (zfs_standard_error(hdl, errno, errbuf));
+	}
+
+	*zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+	/* we are in a non-global zone, but parent is in the global zone */
+	if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
+		(void) zfs_standard_error(hdl, EPERM, errbuf);
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	/* make sure parent is a filesystem */
+	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "parent is not a filesystem"));
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+		zfs_close(zhp);
+		return (-1);
+	}
+
+	zfs_close(zhp);
+	if (prefixlen != NULL)
+		*prefixlen = strlen(parent);
+	return (0);
+}
+
+/*
+ * Finds whether the dataset of the given type(s) exists.
+ */
+boolean_t
+zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
+{
+	zfs_handle_t *zhp;
+
+	if (!zfs_validate_name(hdl, path, types, B_FALSE))
+		return (B_FALSE);
+
+	/*
+	 * Try to get stats for the dataset, which will tell us if it exists.
+	 */
+	if ((zhp = make_dataset_handle(hdl, path)) != NULL) {
+		int ds_type = zhp->zfs_type;
+
+		zfs_close(zhp);
+		if (types & ds_type)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Given a path to 'target', create all the ancestors between
+ * the prefixlen portion of the path, and the target itself.
+ * Fail if the initial prefixlen-ancestor does not already exist.
+ */
+int
+create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
+{
+	zfs_handle_t *h;
+	char *cp;
+	const char *opname;
+
+	/* make sure prefix exists */
+	cp = target + prefixlen;
+	if (*cp != '/') {
+		assert(strchr(cp, '/') == NULL);
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+	} else {
+		*cp = '\0';
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		*cp = '/';
+	}
+	if (h == NULL)
+		return (-1);
+	zfs_close(h);
+
+	/*
+	 * Attempt to create, mount, and share any ancestor filesystems,
+	 * up to the prefixlen-long one.
+	 */
+	for (cp = target + prefixlen + 1;
+	    cp = strchr(cp, '/'); *cp = '/', cp++) {
+		char *logstr;
+
+		*cp = '\0';
+
+		h = make_dataset_handle(hdl, target);
+		if (h) {
+			/* it already exists, nothing to do here */
+			zfs_close(h);
+			continue;
+		}
+
+		logstr = hdl->libzfs_log_str;
+		hdl->libzfs_log_str = NULL;
+		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
+		    NULL) != 0) {
+			hdl->libzfs_log_str = logstr;
+			opname = dgettext(TEXT_DOMAIN, "create");
+			goto ancestorerr;
+		}
+
+		hdl->libzfs_log_str = logstr;
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		if (h == NULL) {
+			opname = dgettext(TEXT_DOMAIN, "open");
+			goto ancestorerr;
+		}
+
+		if (zfs_mount(h, NULL, 0) != 0) {
+			opname = dgettext(TEXT_DOMAIN, "mount");
+			goto ancestorerr;
+		}
+
+		if (zfs_share(h) != 0) {
+			opname = dgettext(TEXT_DOMAIN, "share");
+			goto ancestorerr;
+		}
+
+		zfs_close(h);
+	}
+
+	return (0);
+
+ancestorerr:
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "failed to %s ancestor '%s'"), opname, target);
+	return (-1);
+}
+
+/*
+ * Creates non-existing ancestors of the given path.
+ */
+int
+zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
+{
+	int prefix;
+	uint64_t zoned;
+	char *path_copy;
+	int rc;
+
+	if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0)
+		return (-1);
+
+	if ((path_copy = strdup(path)) != NULL) {
+		rc = create_parents(hdl, path_copy, prefix);
+		free(path_copy);
+	}
+	if (path_copy == NULL || rc != 0)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Create a new filesystem or volume.
+ */
+int
+zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
+    nvlist_t *props)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	uint64_t size = 0;
+	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
+	char errbuf[1024];
+	uint64_t zoned;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), path);
+
+	/* validate the path, taking care to note the extended error message */
+	if (!zfs_validate_name(hdl, path, type, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* validate parents exist */
+	if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
+		return (-1);
+
+	/*
+	 * The failure modes when creating a dataset of a different type over
+	 * one that already exists is a little strange.  In particular, if you
+	 * try to create a dataset on top of an existing dataset, the ioctl()
+	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
+	 * first try to see if the dataset exists.
+	 */
+	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
+	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+	}
+
+	if (type == ZFS_TYPE_VOLUME)
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	if (props && (props = zfs_valid_proplist(hdl, type, props,
+	    zoned, NULL, errbuf)) == 0)
+		return (-1);
+
+	if (type == ZFS_TYPE_VOLUME) {
+		/*
+		 * If we are creating a volume, the size and block size must
+		 * satisfy a few restraints.  First, the blocksize must be a
+		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
+		 * volsize must be a multiple of the block size, and cannot be
+		 * zero.
+		 */
+		if (props == NULL || nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "missing volume size"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		if ((ret = nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+		    &blocksize)) != 0) {
+			if (ret == ENOENT) {
+				blocksize = zfs_prop_default_numeric(
+				    ZFS_PROP_VOLBLOCKSIZE);
+			} else {
+				nvlist_free(props);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "missing volume block size"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+			}
+		}
+
+		if (size == 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume size cannot be zero"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+
+		if (size % blocksize != 0) {
+			nvlist_free(props);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume size must be a multiple of volume block "
+			    "size"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+		}
+	}
+
+	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
+		return (-1);
+	nvlist_free(props);
+
+	/* create the dataset */
+	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
+
+	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(hdl, path);
+		if (ret) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully created, but device links "
+			    "were not created"));
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	/* check for failure */
+	if (ret != 0) {
+		char parent[ZFS_MAXNAMELEN];
+		(void) parent_name(path, parent, sizeof (parent));
+
+		switch (errno) {
+		case ENOENT:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		case EINVAL:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent '%s' is not a filesystem"), parent);
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+
+		case EDOM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "volume block size must be power of 2 from "
+			    "%u to %uk"),
+			    (uint_t)SPA_MINBLOCKSIZE,
+			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
+
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to set this "
+			    "property or value"));
+			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+#ifdef _ILP32
+		case EOVERFLOW:
+			/*
+			 * This platform can't address a volume this big.
+			 */
+			if (type == ZFS_TYPE_VOLUME)
+				return (zfs_error(hdl, EZFS_VOLTOOBIG,
+				    errbuf));
+#endif
+			/* FALLTHROUGH */
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Destroys the given dataset.  The caller must make sure that the filesystem
+ * isn't mounted, and that there are no active dependents.
+ */
+int
+zfs_destroy(zfs_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (ZFS_IS_VOLUME(zhp)) {
+		/*
+		 * If user doesn't have permissions to unshare volume, then
+		 * abort the request.  This would only happen for a
+		 * non-privileged user.
+		 */
+		if (zfs_unshare_iscsi(zhp) != 0) {
+			return (-1);
+		}
+
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
+
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	} else {
+		zc.zc_objset_type = DMU_OST_ZFS;
+	}
+
+	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
+		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
+		    zhp->zfs_name));
+	}
+
+	remove_mountpoint(zhp);
+
+	return (0);
+}
+
+struct destroydata {
+	char *snapname;
+	boolean_t gotone;
+	boolean_t closezhp;
+};
+
+static int
+zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct destroydata *dd = arg;
+	zfs_handle_t *szhp;
+	char name[ZFS_MAXNAMELEN];
+	boolean_t closezhp = dd->closezhp;
+	int rv;
+
+	(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+	(void) strlcat(name, "@", sizeof (name));
+	(void) strlcat(name, dd->snapname, sizeof (name));
+
+	szhp = make_dataset_handle(zhp->zfs_hdl, name);
+	if (szhp) {
+		dd->gotone = B_TRUE;
+		zfs_close(szhp);
+	}
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		(void) zvol_remove_link(zhp->zfs_hdl, name);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	dd->closezhp = B_TRUE;
+	rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
+	if (closezhp)
+		zfs_close(zhp);
+	return (rv);
+}
+
+/*
+ * Destroys all snapshots with the given name in zhp & descendants.
+ */
+int
+zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	struct destroydata dd = { 0 };
+
+	dd.snapname = snapname;
+	(void) zfs_remove_link_cb(zhp, &dd);
+
+	if (!dd.gotone) {
+		return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
+		    zhp->zfs_name, snapname));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+
+	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
+	if (ret != 0) {
+		char errbuf[1024];
+
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s@%s'"), zc.zc_name, snapname);
+
+		switch (errno) {
+		case EEXIST:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot is cloned"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
+
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Clones the given dataset.  The target must be of the same type as the source.
+ */
+int
+zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
+{
+	zfs_cmd_t zc = { 0 };
+	char parent[ZFS_MAXNAMELEN];
+	int ret;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_type_t type;
+	uint64_t zoned;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), target);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/* validate parents exist */
+	if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0)
+		return (-1);
+
+	(void) parent_name(target, parent, sizeof (parent));
+
+	/* do the clone */
+	if (ZFS_IS_VOLUME(zhp)) {
+		zc.zc_objset_type = DMU_OST_ZVOL;
+		type = ZFS_TYPE_VOLUME;
+	} else {
+		zc.zc_objset_type = DMU_OST_ZFS;
+		type = ZFS_TYPE_FILESYSTEM;
+	}
+
+	if (props) {
+		if ((props = zfs_valid_proplist(hdl, type, props, zoned,
+		    zhp, errbuf)) == NULL)
+			return (-1);
+
+		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+
+		nvlist_free(props);
+	}
+
+	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
+	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	if (ret != 0) {
+		switch (errno) {
+
+		case ENOENT:
+			/*
+			 * The parent doesn't exist.  We should have caught this
+			 * above, but there may a race condition that has since
+			 * destroyed the parent.
+			 *
+			 * At this point, we don't know whether it's the source
+			 * that doesn't exist anymore, or whether the target
+			 * dataset doesn't exist.
+			 */
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
+
+		case EXDEV:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "source and target pools differ"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
+			    errbuf));
+
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	} else if (ZFS_IS_VOLUME(zhp)) {
+		ret = zvol_create_link(zhp->zfs_hdl, target);
+	}
+
+	return (ret);
+}
+
+typedef struct promote_data {
+	char cb_mountpoint[MAXPATHLEN];
+	const char *cb_target;
+	const char *cb_errbuf;
+	uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+	zfs_handle_t *szhp;
+	char snapname[MAXPATHLEN];
+	int rv = 0;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* Remove the device link if it's a zvol. */
+	if (ZFS_IS_VOLUME(zhp))
+		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
+
+	/* Check for conflicting names */
+	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
+	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
+	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+	if (szhp != NULL) {
+		zfs_close(szhp);
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot name '%s' from origin \n"
+		    "conflicts with '%s' from target"),
+		    zhp->zfs_name, snapname);
+		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
+	}
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+promote_snap_done_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
+		/* Create the device link if it's a zvol. */
+		if (ZFS_IS_VOLUME(zhp))
+			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Promotes the given clone fs to be the clone parent.
+ */
+int
+zfs_promote(zfs_handle_t *zhp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = { 0 };
+	char parent[MAXPATHLEN];
+	char *cp;
+	int ret;
+	zfs_handle_t *pzhp;
+	promote_data_t pd;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot promote '%s'"), zhp->zfs_name);
+
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be promoted"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+
+	(void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent));
+	if (parent[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "not a cloned filesystem"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+	cp = strchr(parent, '@');
+	*cp = '\0';
+
+	/* Walk the snapshots we will be moving */
+	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+	if (pzhp == NULL)
+		return (-1);
+	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+	zfs_close(pzhp);
+	pd.cb_target = zhp->zfs_name;
+	pd.cb_errbuf = errbuf;
+	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
+	if (pzhp == NULL)
+		return (-1);
+	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+	if (ret != 0) {
+		zfs_close(pzhp);
+		return (-1);
+	}
+
+	/* issue the ioctl */
+	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
+	    sizeof (zc.zc_value));
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+
+	if (ret != 0) {
+		int save_errno = errno;
+
+		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
+		zfs_close(pzhp);
+
+		switch (save_errno) {
+		case EEXIST:
+			/*
+			 * There is a conflicting snapshot name.  We
+			 * should have caught this above, but they could
+			 * have renamed something in the mean time.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "conflicting snapshot name from parent '%s'"),
+			    parent);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, save_errno, errbuf));
+		}
+	} else {
+		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
+	}
+
+	zfs_close(pzhp);
+	return (ret);
+}
+
+struct createdata {
+	const char *cd_snapname;
+	int cd_ifexists;
+};
+
+static int
+zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct createdata *cd = arg;
+	int ret;
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		char name[MAXPATHLEN];
+
+		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+		(void) strlcat(name, "@", sizeof (name));
+		(void) strlcat(name, cd->cd_snapname, sizeof (name));
+		(void) zvol_create_link_common(zhp->zfs_hdl, name,
+		    cd->cd_ifexists);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Takes a snapshot of the given dataset.
+ */
+int
+zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
+    nvlist_t *props)
+{
+	const char *delim;
+	char parent[ZFS_MAXNAMELEN];
+	zfs_handle_t *zhp;
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot snapshot '%s'"), path);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	if (props) {
+		if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
+		    props, B_FALSE, NULL, errbuf)) == NULL)
+			return (-1);
+
+		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+
+		nvlist_free(props);
+	}
+
+	/* make sure the parent exists and is of the appropriate type */
+	delim = strchr(path, '@');
+	(void) strncpy(parent, path, delim - path);
+	parent[delim - path] = '\0';
+
+	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
+	    ZFS_TYPE_VOLUME)) == NULL) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+	zc.zc_cookie = recursive;
+	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	/*
+	 * if it was recursive, the one that actually failed will be in
+	 * zc.zc_name.
+	 */
+	if (ret != 0)
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
+
+	if (ret == 0 && recursive) {
+		struct createdata cd;
+
+		cd.cd_snapname = delim + 1;
+		cd.cd_ifexists = B_FALSE;
+		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
+	}
+	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(zhp->zfs_hdl, path);
+		if (ret != 0) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully snapshotted, but device links "
+			    "were not created"));
+			zfs_close(zhp);
+			return (-1);
+		}
+	}
+
+	if (ret != 0)
+		(void) zfs_standard_error(hdl, errno, errbuf);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Destroy any more recent snapshots.  We invoke this callback on any dependents
+ * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
+ * is a dependent and we should just destroy it without checking the transaction
+ * group.
+ */
+typedef struct rollback_data {
+	const char	*cb_target;		/* the snapshot */
+	uint64_t	cb_create;		/* creation time reference */
+	boolean_t	cb_error;
+	boolean_t	cb_dependent;
+	boolean_t	cb_force;
+} rollback_data_t;
+
+static int
+rollback_destroy(zfs_handle_t *zhp, void *data)
+{
+	rollback_data_t *cbp = data;
+
+	if (!cbp->cb_dependent) {
+		if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
+		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
+		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
+		    cbp->cb_create) {
+			char *logstr;
+
+			cbp->cb_dependent = B_TRUE;
+			cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
+			    rollback_destroy, cbp);
+			cbp->cb_dependent = B_FALSE;
+
+			logstr = zhp->zfs_hdl->libzfs_log_str;
+			zhp->zfs_hdl->libzfs_log_str = NULL;
+			cbp->cb_error |= zfs_destroy(zhp);
+			zhp->zfs_hdl->libzfs_log_str = logstr;
+		}
+	} else {
+		/* We must destroy this clone; first unmount it */
+		prop_changelist_t *clp;
+
+		clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+		    cbp->cb_force ? MS_FORCE: 0);
+		if (clp == NULL || changelist_prefix(clp) != 0) {
+			cbp->cb_error = B_TRUE;
+			zfs_close(zhp);
+			return (0);
+		}
+		if (zfs_destroy(zhp) != 0)
+			cbp->cb_error = B_TRUE;
+		else
+			changelist_remove(clp, zhp->zfs_name);
+		(void) changelist_postfix(clp);
+		changelist_free(clp);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+/*
+ * Given a dataset, rollback to a specific snapshot, discarding any
+ * data changes since then and making it the active dataset.
+ *
+ * Any snapshots more recent than the target are destroyed, along with
+ * their dependents.
+ */
+int
+zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
+{
+	rollback_data_t cb = { 0 };
+	int err;
+	zfs_cmd_t zc = { 0 };
+	boolean_t restore_resv = 0;
+	uint64_t old_volsize, new_volsize;
+	zfs_prop_t resv_prop;
+
+	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
+	    zhp->zfs_type == ZFS_TYPE_VOLUME);
+
+	/*
+	 * Destroy all recent snapshots and its dependends.
+	 */
+	cb.cb_force = force;
+	cb.cb_target = snap->zfs_name;
+	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
+	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
+
+	if (cb.cb_error)
+		return (-1);
+
+	/*
+	 * Now that we have verified that the snapshot is the latest,
+	 * rollback to the given snapshot.
+	 */
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
+		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
+			return (-1);
+		old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
+		restore_resv =
+		    (old_volsize == zfs_prop_get_int(zhp, resv_prop));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	/*
+	 * We rely on zfs_iter_children() to verify that there are no
+	 * newer snapshots for the given dataset.  Therefore, we can
+	 * simply pass the name on to the ioctl() call.  There is still
+	 * an unlikely race condition where the user has taken a
+	 * snapshot since we verified that this was the most recent.
+	 *
+	 */
+	if ((err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
+		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
+		    zhp->zfs_name);
+		return (err);
+	}
+
+	/*
+	 * For volumes, if the pre-rollback volsize matched the pre-
+	 * rollback reservation and the volsize has changed then set
+	 * the reservation property to the post-rollback volsize.
+	 * Make a new handle since the rollback closed the dataset.
+	 */
+	if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
+	    (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
+		if (err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name)) {
+			zfs_close(zhp);
+			return (err);
+		}
+		if (restore_resv) {
+			new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
+			if (old_volsize != new_volsize)
+				err = zfs_prop_set_int(zhp, resv_prop,
+				    new_volsize);
+		}
+		zfs_close(zhp);
+	}
+	return (err);
+}
+
+/*
+ * Iterate over all dependents for a given dataset.  This includes both
+ * hierarchical dependents (children) and data dependents (snapshots and
+ * clones).  The bulk of the processing occurs in get_dependents() in
+ * libzfs_graph.c.
+ */
+int
+zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
+    zfs_iter_f func, void *data)
+{
+	char **dependents;
+	size_t count;
+	int i;
+	zfs_handle_t *child;
+	int ret = 0;
+
+	if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name,
+	    &dependents, &count) != 0)
+		return (-1);
+
+	for (i = 0; i < count; i++) {
+		if ((child = make_dataset_handle(zhp->zfs_hdl,
+		    dependents[i])) == NULL)
+			continue;
+
+		if ((ret = func(child, data)) != 0)
+			break;
+	}
+
+	for (i = 0; i < count; i++)
+		free(dependents[i]);
+	free(dependents);
+
+	return (ret);
+}
+
+/*
+ * Renames the given dataset.
+ */
+int
+zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
+{
+	int ret;
+	zfs_cmd_t zc = { 0 };
+	char *delim;
+	prop_changelist_t *cl = NULL;
+	zfs_handle_t *zhrp = NULL;
+	char *parentname = NULL;
+	char parent[ZFS_MAXNAMELEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+
+	/* if we have the same exact name, just return success */
+	if (strcmp(zhp->zfs_name, target) == 0)
+		return (0);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot rename to '%s'"), target);
+
+	/*
+	 * Make sure the target name is valid
+	 */
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		if ((strchr(target, '@') == NULL) ||
+		    *target == '@') {
+			/*
+			 * Snapshot target name is abbreviated,
+			 * reconstruct full dataset name
+			 */
+			(void) strlcpy(parent, zhp->zfs_name,
+			    sizeof (parent));
+			delim = strchr(parent, '@');
+			if (strchr(target, '@') == NULL)
+				*(++delim) = '\0';
+			else
+				*delim = '\0';
+			(void) strlcat(parent, target, sizeof (parent));
+			target = parent;
+		} else {
+			/*
+			 * Make sure we're renaming within the same dataset.
+			 */
+			delim = strchr(target, '@');
+			if (strncmp(zhp->zfs_name, target, delim - target)
+			    != 0 || zhp->zfs_name[delim - target] != '@') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "snapshots must be part of same "
+				    "dataset"));
+				return (zfs_error(hdl, EZFS_CROSSTARGET,
+				    errbuf));
+			}
+		}
+		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	} else {
+		if (recursive) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "recursive rename must be a snapshot"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		}
+
+		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		uint64_t unused;
+
+		/* validate parents */
+		if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0)
+			return (-1);
+
+		(void) parent_name(target, parent, sizeof (parent));
+
+		/* make sure we're in the same pool */
+		verify((delim = strchr(target, '/')) != NULL);
+		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
+		    zhp->zfs_name[delim - target] != '/') {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "datasets must be within same pool"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+		}
+
+		/* new name cannot be a child of the current dataset name */
+		if (strncmp(parent, zhp->zfs_name,
+		    strlen(zhp->zfs_name)) == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "New dataset name cannot be a descendent of "
+			    "current dataset name"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+	}
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
+
+	if (getzoneid() == GLOBAL_ZONEID &&
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
+	}
+
+	if (recursive) {
+		struct destroydata dd;
+
+		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
+		if (parentname == NULL) {
+			ret = -1;
+			goto error;
+		}
+		delim = strchr(parentname, '@');
+		*delim = '\0';
+		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET);
+		if (zhrp == NULL) {
+			ret = -1;
+			goto error;
+		}
+
+		dd.snapname = delim + 1;
+		dd.gotone = B_FALSE;
+		dd.closezhp = B_TRUE;
+
+		/* We remove any zvol links prior to renaming them */
+		ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
+		if (ret) {
+			goto error;
+		}
+	} else {
+		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL)
+			return (-1);
+
+		if (changelist_haszonedchild(cl)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "child dataset with inherited mountpoint is used "
+			    "in a non-global zone"));
+			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
+			goto error;
+		}
+
+		if ((ret = changelist_prefix(cl)) != 0)
+			goto error;
+	}
+
+	if (ZFS_IS_VOLUME(zhp))
+		zc.zc_objset_type = DMU_OST_ZVOL;
+	else
+		zc.zc_objset_type = DMU_OST_ZFS;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
+
+	zc.zc_cookie = recursive;
+
+	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
+		/*
+		 * if it was recursive, the one that actually failed will
+		 * be in zc.zc_name
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot rename '%s'"), zc.zc_name);
+
+		if (recursive && errno == EEXIST) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "a child dataset already has a snapshot "
+			    "with the new name"));
+			(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
+		} else {
+			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
+		}
+
+		/*
+		 * On failure, we still want to remount any filesystems that
+		 * were previously mounted, so we don't alter the system state.
+		 */
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = delim + 1;
+			cd.cd_ifexists = B_TRUE;
+			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
+			(void) changelist_postfix(cl);
+		}
+	} else {
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = strchr(target, '@') + 1;
+			cd.cd_ifexists = B_TRUE;
+			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
+			changelist_rename(cl, zfs_get_name(zhp), target);
+			ret = changelist_postfix(cl);
+		}
+	}
+
+error:
+	if (parentname) {
+		free(parentname);
+	}
+	if (zhrp) {
+		zfs_close(zhrp);
+	}
+	if (cl) {
+		changelist_free(cl);
+	}
+	return (ret);
+}
+
+/*
+ * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
+ * poke devfsadm to create the /dev link, and then wait for the link to appear.
+ */
+int
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	return (zvol_create_link_common(hdl, dataset, B_FALSE));
+}
+
+static int
+zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
+{
+	zfs_cmd_t zc = { 0 };
+	di_devlink_handle_t dhdl;
+	priv_set_t *priv_effective;
+	int privileged;
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	/*
+	 * Issue the appropriate ioctl.
+	 */
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case EEXIST:
+			/*
+			 * Silently ignore the case where the link already
+			 * exists.  This allows 'zfs volinit' to be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		case ENOENT:
+			/*
+			 * Dataset does not exist in the kernel.  If we
+			 * don't care (see zfs_rename), then ignore the
+			 * error quietly.
+			 */
+			if (ifexists) {
+				return (0);
+			}
+
+			/* FALLTHROUGH */
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset));
+		}
+	}
+
+	/*
+	 * If privileged call devfsadm and wait for the links to
+	 * magically appear.
+	 * Otherwise, print out an informational message.
+	 */
+
+	priv_effective = priv_allocset();
+	(void) getppriv(PRIV_EFFECTIVE, priv_effective);
+	privileged = (priv_isfullset(priv_effective) == B_TRUE);
+	priv_freeset(priv_effective);
+
+	if (privileged) {
+		if ((dhdl = di_devlink_init(ZFS_DRIVER,
+		    DI_MAKE_LINK)) == NULL) {
+			zfs_error_aux(hdl, strerror(errno));
+			(void) zfs_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset);
+			(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
+			return (-1);
+		} else {
+			(void) di_devlink_fini(&dhdl);
+		}
+	} else {
+		char pathname[MAXPATHLEN];
+		struct stat64 statbuf;
+		int i;
+
+#define	MAX_WAIT	10
+
+		/*
+		 * This is the poor mans way of waiting for the link
+		 * to show up.  If after 10 seconds we still don't
+		 * have it, then print out a message.
+		 */
+		(void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
+		    dataset);
+
+		for (i = 0; i != MAX_WAIT; i++) {
+			if (stat64(pathname, &statbuf) == 0)
+				break;
+			(void) sleep(1);
+		}
+		if (i == MAX_WAIT)
+			(void) printf(gettext("%s may not be immediately "
+			    "available\n"), pathname);
+	}
+
+	return (0);
+}
+
+/*
+ * Remove a minor node for the given zvol and the associated /dev links.
+ */
+int
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case ENXIO:
+			/*
+			 * Silently ignore the case where the link no longer
+			 * exists, so that 'zfs volfini' can be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot remove device "
+			    "links for '%s'"), dataset));
+		}
+	}
+
+	return (0);
+}
+
+nvlist_t *
+zfs_get_user_props(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_user_props);
+}
+
+/*
+ * This function is used by 'zfs list' to determine the exact set of columns to
+ * display, and their maximum widths.  This does two main things:
+ *
+ *      - If this is a list of all properties, then expand the list to include
+ *        all native properties, and set a flag so that for each dataset we look
+ *        for new unique user properties and add them to the list.
+ *
+ *      - For non fixed-width properties, keep track of the maximum width seen
+ *        so that we can size the column appropriately.
+ */
+int
+zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zprop_list_t *entry;
+	zprop_list_t **last, **start;
+	nvlist_t *userprops, *propval;
+	nvpair_t *elem;
+	char *strval;
+	char buf[ZFS_MAXPROPLEN];
+
+	if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0)
+		return (-1);
+
+	userprops = zfs_get_user_props(zhp);
+
+	entry = *plp;
+	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
+		/*
+		 * Go through and add any user properties as necessary.  We
+		 * start by incrementing our list pointer to the first
+		 * non-native property.
+		 */
+		start = plp;
+		while (*start != NULL) {
+			if ((*start)->pl_prop == ZPROP_INVAL)
+				break;
+			start = &(*start)->pl_next;
+		}
+
+		elem = NULL;
+		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
+			/*
+			 * See if we've already found this property in our list.
+			 */
+			for (last = start; *last != NULL;
+			    last = &(*last)->pl_next) {
+				if (strcmp((*last)->pl_user_prop,
+				    nvpair_name(elem)) == 0)
+					break;
+			}
+
+			if (*last == NULL) {
+				if ((entry = zfs_alloc(hdl,
+				    sizeof (zprop_list_t))) == NULL ||
+				    ((entry->pl_user_prop = zfs_strdup(hdl,
+				    nvpair_name(elem)))) == NULL) {
+					free(entry);
+					return (-1);
+				}
+
+				entry->pl_prop = ZPROP_INVAL;
+				entry->pl_width = strlen(nvpair_name(elem));
+				entry->pl_all = B_TRUE;
+				*last = entry;
+			}
+		}
+	}
+
+	/*
+	 * Now go through and check the width of any non-fixed columns
+	 */
+	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
+		if (entry->pl_fixed)
+			continue;
+
+		if (entry->pl_prop != ZPROP_INVAL) {
+			if (zfs_prop_get(zhp, entry->pl_prop,
+			    buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
+				if (strlen(buf) > entry->pl_width)
+					entry->pl_width = strlen(buf);
+			}
+		} else if (nvlist_lookup_nvlist(userprops,
+		    entry->pl_user_prop, &propval)  == 0) {
+			verify(nvlist_lookup_string(propval,
+			    ZPROP_VALUE, &strval) == 0);
+			if (strlen(strval) > entry->pl_width)
+				entry->pl_width = strlen(strval);
+		}
+	}
+
+	return (0);
+}
+
+int
+zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
+{
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *nvp;
+	gid_t gid;
+	uid_t uid;
+	const gid_t *groups;
+	int group_cnt;
+	int error;
+
+	if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0)
+		return (no_memory(hdl));
+
+	uid = ucred_geteuid(cred);
+	gid = ucred_getegid(cred);
+	group_cnt = ucred_getgroups(cred, &groups);
+
+	if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1)
+		return (1);
+
+	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) {
+		nvlist_free(nvp);
+		return (1);
+	}
+
+	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) {
+		nvlist_free(nvp);
+		return (1);
+	}
+
+	if (nvlist_add_uint32_array(nvp,
+	    ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) {
+		nvlist_free(nvp);
+		return (1);
+	}
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(hdl, &zc, nvp))
+		return (-1);
+
+	error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc);
+	nvlist_free(nvp);
+	return (error);
+}
+
+int
+zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
+    void *export, void *sharetab, int sharemax, zfs_share_op_t operation)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
+	zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab;
+	zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export;
+	zc.zc_share.z_sharetype = operation;
+	zc.zc_share.z_sharemax = sharemax;
+
+	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
+	return (error);
+}
diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c
new file mode 100644
index 000000000..e7cbf2386
--- /dev/null
+++ b/lib/libzfs/libzfs_graph.c
@@ -0,0 +1,662 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Iterate over all children of the current object.  This includes the normal
+ * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
+ * walk all datasets in the pool, and construct a directed graph of the form:
+ *
+ * 			home
+ *                        |
+ *                   +----+----+
+ *                   |         |
+ *                   v         v             ws
+ *                  bar       baz             |
+ *                             |              |
+ *                             v              v
+ *                          @yesterday ----> foo
+ *
+ * In order to construct this graph, we have to walk every dataset in the pool,
+ * because the clone parent is stored as a property of the child, not the
+ * parent.  The parent only keeps track of the number of clones.
+ *
+ * In the normal case (without clones) this would be rather expensive.  To avoid
+ * unnecessary computation, we first try a walk of the subtree hierarchy
+ * starting from the initial node.  At each dataset, we construct a node in the
+ * graph and an edge leading from its parent.  If we don't see any snapshots
+ * with a non-zero clone count, then we are finished.
+ *
+ * If we do find a cloned snapshot, then we finish the walk of the current
+ * subtree, but indicate that we need to do a complete walk.  We then perform a
+ * global walk of all datasets, avoiding the subtree we already processed.
+ *
+ * At the end of this, we'll end up with a directed graph of all relevant (and
+ * possible some irrelevant) datasets in the system.  We need to both find our
+ * limiting subgraph and determine a safe ordering in which to destroy the
+ * datasets.  We do a topological ordering of our graph starting at our target
+ * dataset, and then walk the results in reverse.
+ *
+ * It's possible for the graph to have cycles if, for example, the user renames
+ * a clone to be the parent of its origin snapshot.  The user can request to
+ * generate an error in this case, or ignore the cycle and continue.
+ *
+ * When removing datasets, we want to destroy the snapshots in chronological
+ * order (because this is the most efficient method).  In order to accomplish
+ * this, we store the creation transaction group with each vertex and keep each
+ * vertex's edges sorted according to this value.  The topological sort will
+ * automatically walk the snapshots in the correct order.
+ */
+
+#include <assert.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include "zfs_namecheck.h"
+
+#define	MIN_EDGECOUNT	4
+
+/*
+ * Vertex structure.  Indexed by dataset name, this structure maintains a list
+ * of edges to other vertices.
+ */
+struct zfs_edge;
+typedef struct zfs_vertex {
+	char			zv_dataset[ZFS_MAXNAMELEN];
+	struct zfs_vertex	*zv_next;
+	int			zv_visited;
+	uint64_t		zv_txg;
+	struct zfs_edge		**zv_edges;
+	int			zv_edgecount;
+	int			zv_edgealloc;
+} zfs_vertex_t;
+
+enum {
+	VISIT_SEEN = 1,
+	VISIT_SORT_PRE,
+	VISIT_SORT_POST
+};
+
+/*
+ * Edge structure.  Simply maintains a pointer to the destination vertex.  There
+ * is no need to store the source vertex, since we only use edges in the context
+ * of the source vertex.
+ */
+typedef struct zfs_edge {
+	zfs_vertex_t		*ze_dest;
+	struct zfs_edge		*ze_next;
+} zfs_edge_t;
+
+#define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
+
+/*
+ * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
+ */
+typedef struct zfs_graph {
+	zfs_vertex_t		**zg_hash;
+	size_t			zg_size;
+	size_t			zg_nvertex;
+	const char		*zg_root;
+	int			zg_clone_count;
+} zfs_graph_t;
+
+/*
+ * Allocate a new edge pointing to the target vertex.
+ */
+static zfs_edge_t *
+zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
+{
+	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
+
+	if (zep == NULL)
+		return (NULL);
+
+	zep->ze_dest = dest;
+
+	return (zep);
+}
+
+/*
+ * Destroy an edge.
+ */
+static void
+zfs_edge_destroy(zfs_edge_t *zep)
+{
+	free(zep);
+}
+
+/*
+ * Allocate a new vertex with the given name.
+ */
+static zfs_vertex_t *
+zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
+
+	if (zvp == NULL)
+		return (NULL);
+
+	assert(strlen(dataset) < ZFS_MAXNAMELEN);
+
+	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
+
+	if ((zvp->zv_edges = zfs_alloc(hdl,
+	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
+		free(zvp);
+		return (NULL);
+	}
+
+	zvp->zv_edgealloc = MIN_EDGECOUNT;
+
+	return (zvp);
+}
+
+/*
+ * Destroy a vertex.  Frees up any associated edges.
+ */
+static void
+zfs_vertex_destroy(zfs_vertex_t *zvp)
+{
+	int i;
+
+	for (i = 0; i < zvp->zv_edgecount; i++)
+		zfs_edge_destroy(zvp->zv_edges[i]);
+
+	free(zvp->zv_edges);
+	free(zvp);
+}
+
+/*
+ * Given a vertex, add an edge to the destination vertex.
+ */
+static int
+zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
+    zfs_vertex_t *dest)
+{
+	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
+
+	if (zep == NULL)
+		return (-1);
+
+	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
+		void *ptr;
+
+		if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
+		    zvp->zv_edgealloc * sizeof (void *),
+		    zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
+			return (-1);
+
+		zvp->zv_edges = ptr;
+		zvp->zv_edgealloc *= 2;
+	}
+
+	zvp->zv_edges[zvp->zv_edgecount++] = zep;
+
+	return (0);
+}
+
+static int
+zfs_edge_compare(const void *a, const void *b)
+{
+	const zfs_edge_t *ea = *((zfs_edge_t **)a);
+	const zfs_edge_t *eb = *((zfs_edge_t **)b);
+
+	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
+		return (-1);
+	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
+		return (1);
+	return (0);
+}
+
+/*
+ * Sort the given vertex edges according to the creation txg of each vertex.
+ */
+static void
+zfs_vertex_sort_edges(zfs_vertex_t *zvp)
+{
+	if (zvp->zv_edgecount == 0)
+		return;
+
+	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
+	    zfs_edge_compare);
+}
+
+/*
+ * Construct a new graph object.  We allow the size to be specified as a
+ * parameter so in the future we can size the hash according to the number of
+ * datasets in the pool.
+ */
+static zfs_graph_t *
+zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
+{
+	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
+
+	if (zgp == NULL)
+		return (NULL);
+
+	zgp->zg_size = size;
+	if ((zgp->zg_hash = zfs_alloc(hdl,
+	    size * sizeof (zfs_vertex_t *))) == NULL) {
+		free(zgp);
+		return (NULL);
+	}
+
+	zgp->zg_root = dataset;
+	zgp->zg_clone_count = 0;
+
+	return (zgp);
+}
+
+/*
+ * Destroy a graph object.  We have to iterate over all the hash chains,
+ * destroying each vertex in the process.
+ */
+static void
+zfs_graph_destroy(zfs_graph_t *zgp)
+{
+	int i;
+	zfs_vertex_t *current, *next;
+
+	for (i = 0; i < zgp->zg_size; i++) {
+		current = zgp->zg_hash[i];
+		while (current != NULL) {
+			next = current->zv_next;
+			zfs_vertex_destroy(current);
+			current = next;
+		}
+	}
+
+	free(zgp->zg_hash);
+	free(zgp);
+}
+
+/*
+ * Graph hash function.  Classic bernstein k=33 hash function, taken from
+ * usr/src/cmd/sgs/tools/common/strhash.c
+ */
+static size_t
+zfs_graph_hash(zfs_graph_t *zgp, const char *str)
+{
+	size_t hash = 5381;
+	int c;
+
+	while ((c = *str++) != 0)
+		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+
+	return (hash % zgp->zg_size);
+}
+
+/*
+ * Given a dataset name, finds the associated vertex, creating it if necessary.
+ */
+static zfs_vertex_t *
+zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
+    uint64_t txg)
+{
+	size_t idx = zfs_graph_hash(zgp, dataset);
+	zfs_vertex_t *zvp;
+
+	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
+		if (strcmp(zvp->zv_dataset, dataset) == 0) {
+			if (zvp->zv_txg == 0)
+				zvp->zv_txg = txg;
+			return (zvp);
+		}
+	}
+
+	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
+		return (NULL);
+
+	zvp->zv_next = zgp->zg_hash[idx];
+	zvp->zv_txg = txg;
+	zgp->zg_hash[idx] = zvp;
+	zgp->zg_nvertex++;
+
+	return (zvp);
+}
+
+/*
+ * Given two dataset names, create an edge between them.  For the source vertex,
+ * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
+ * created it as a destination of another edge.  If 'dest' is NULL, then this
+ * is an individual vertex (i.e. the starting vertex), so don't add an edge.
+ */
+static int
+zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
+    const char *dest, uint64_t txg)
+{
+	zfs_vertex_t *svp, *dvp;
+
+	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
+		return (-1);
+	svp->zv_visited = VISIT_SEEN;
+	if (dest != NULL) {
+		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
+		if (dvp == NULL)
+			return (-1);
+		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Iterate over all children of the given dataset, adding any vertices
+ * as necessary.  Returns -1 if there was an error, or 0 otherwise.
+ * This is a simple recursive algorithm - the ZFS namespace typically
+ * is very flat.  We manually invoke the necessary ioctl() calls to
+ * avoid the overhead and additional semantics of zfs_open().
+ */
+static int
+iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_vertex_t *zvp;
+
+	/*
+	 * Look up the source vertex, and avoid it if we've seen it before.
+	 */
+	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
+	if (zvp == NULL)
+		return (-1);
+	if (zvp->zv_visited == VISIT_SEEN)
+		return (0);
+
+	/*
+	 * Iterate over all children
+	 */
+	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
+
+		/*
+		 * Ignore private dataset names.
+		 */
+		if (dataset_name_hidden(zc.zc_name))
+			continue;
+
+		/*
+		 * Get statistics for this dataset, to determine the type of the
+		 * dataset and clone statistics.  If this fails, the dataset has
+		 * since been removed, and we're pretty much screwed anyway.
+		 */
+		zc.zc_objset_stats.dds_origin[0] = '\0';
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+			continue;
+
+		if (zc.zc_objset_stats.dds_origin[0] != '\0') {
+			if (zfs_graph_add(hdl, zgp,
+			    zc.zc_objset_stats.dds_origin, zc.zc_name,
+			    zc.zc_objset_stats.dds_creation_txg) != 0)
+				return (-1);
+			/*
+			 * Count origins only if they are contained in the graph
+			 */
+			if (isa_child_of(zc.zc_objset_stats.dds_origin,
+			    zgp->zg_root))
+				zgp->zg_clone_count--;
+		}
+
+		/*
+		 * Add an edge between the parent and the child.
+		 */
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+
+		/*
+		 * Recursively visit child
+		 */
+		if (iterate_children(hdl, zgp, zc.zc_name))
+			return (-1);
+	}
+
+	/*
+	 * Now iterate over all snapshots.
+	 */
+	bzero(&zc, sizeof (zc));
+
+	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
+
+		/*
+		 * Get statistics for this dataset, to determine the type of the
+		 * dataset and clone statistics.  If this fails, the dataset has
+		 * since been removed, and we're pretty much screwed anyway.
+		 */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+			continue;
+
+		/*
+		 * Add an edge between the parent and the child.
+		 */
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
+
+		zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
+	}
+
+	zvp->zv_visited = VISIT_SEEN;
+
+	return (0);
+}
+
+/*
+ * Returns false if there are no snapshots with dependent clones in this
+ * subtree or if all of those clones are also in this subtree.  Returns
+ * true if there is an error or there are external dependents.
+ */
+static boolean_t
+external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
+{
+	zfs_cmd_t zc = { 0 };
+
+	/*
+	 * Check whether this dataset is a clone or has clones since
+	 * iterate_children() only checks the children.
+	 */
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
+		return (B_TRUE);
+
+	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
+		if (zfs_graph_add(hdl, zgp,
+		    zc.zc_objset_stats.dds_origin, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (B_TRUE);
+		if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
+			zgp->zg_clone_count--;
+	}
+
+	if ((zc.zc_objset_stats.dds_num_clones) ||
+	    iterate_children(hdl, zgp, dataset))
+		return (B_TRUE);
+
+	return (zgp->zg_clone_count != 0);
+}
+
+/*
+ * Construct a complete graph of all necessary vertices.  First, iterate over
+ * only our object's children.  If no cloned snapshots are found, or all of
+ * the cloned snapshots are in this subtree then return a graph of the subtree.
+ * Otherwise, start at the root of the pool and iterate over all datasets.
+ */
+static zfs_graph_t *
+construct_graph(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
+	int ret = 0;
+
+	if (zgp == NULL)
+		return (zgp);
+
+	if ((strchr(dataset, '/') == NULL) ||
+	    (external_dependents(hdl, zgp, dataset))) {
+		/*
+		 * Determine pool name and try again.
+		 */
+		int len = strcspn(dataset, "/@") + 1;
+		char *pool = zfs_alloc(hdl, len);
+
+		if (pool == NULL) {
+			zfs_graph_destroy(zgp);
+			return (NULL);
+		}
+		(void) strlcpy(pool, dataset, len);
+
+		if (iterate_children(hdl, zgp, pool) == -1 ||
+		    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
+			free(pool);
+			zfs_graph_destroy(zgp);
+			return (NULL);
+		}
+		free(pool);
+	}
+
+	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
+
+	return (zgp);
+}
+
+/*
+ * Given a graph, do a recursive topological sort into the given array.  This is
+ * really just a depth first search, so that the deepest nodes appear first.
+ * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
+ */
+static int
+topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
+    size_t *idx, zfs_vertex_t *zgv)
+{
+	int i;
+
+	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
+		/*
+		 * If we've already seen this vertex as part of our depth-first
+		 * search, then we have a cyclic dependency, and we must return
+		 * an error.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "recursive dependency at '%s'"),
+		    zgv->zv_dataset);
+		return (zfs_error(hdl, EZFS_RECURSIVE,
+		    dgettext(TEXT_DOMAIN,
+		    "cannot determine dependent datasets")));
+	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
+		/*
+		 * If we've already processed this as part of the topological
+		 * sort, then don't bother doing so again.
+		 */
+		return (0);
+	}
+
+	zgv->zv_visited = VISIT_SORT_PRE;
+
+	/* avoid doing a search if we don't have to */
+	zfs_vertex_sort_edges(zgv);
+	for (i = 0; i < zgv->zv_edgecount; i++) {
+		if (topo_sort(hdl, allowrecursion, result, idx,
+		    zgv->zv_edges[i]->ze_dest) != 0)
+			return (-1);
+	}
+
+	/* we may have visited this in the course of the above */
+	if (zgv->zv_visited == VISIT_SORT_POST)
+		return (0);
+
+	if ((result[*idx] = zfs_alloc(hdl,
+	    strlen(zgv->zv_dataset) + 1)) == NULL)
+		return (-1);
+
+	(void) strcpy(result[*idx], zgv->zv_dataset);
+	*idx += 1;
+	zgv->zv_visited = VISIT_SORT_POST;
+	return (0);
+}
+
+/*
+ * The only public interface for this file.  Do the dirty work of constructing a
+ * child list for the given object.  Construct the graph, do the toplogical
+ * sort, and then return the array of strings to the caller.
+ *
+ * The 'allowrecursion' parameter controls behavior when cycles are found.  If
+ * it is set, the the cycle is ignored and the results returned as if the cycle
+ * did not exist.  If it is not set, then the routine will generate an error if
+ * a cycle is found.
+ */
+int
+get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
+    const char *dataset, char ***result, size_t *count)
+{
+	zfs_graph_t *zgp;
+	zfs_vertex_t *zvp;
+
+	if ((zgp = construct_graph(hdl, dataset)) == NULL)
+		return (-1);
+
+	if ((*result = zfs_alloc(hdl,
+	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
+		free(*result);
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	*count = 0;
+	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
+		free(*result);
+		zfs_graph_destroy(zgp);
+		return (-1);
+	}
+
+	/*
+	 * Get rid of the last entry, which is our starting vertex and not
+	 * strictly a dependent.
+	 */
+	assert(*count > 0);
+	free((*result)[*count - 1]);
+	(*count)--;
+
+	zfs_graph_destroy(zgp);
+
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
new file mode 100644
index 000000000..d67776889
--- /dev/null
+++ b/lib/libzfs/libzfs_import.c
@@ -0,0 +1,1311 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Pool import support functions.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device.  If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ * 	pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded.  Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed.  Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <devid.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <sys/vdev_impl.h>
+
+#include "libzfs.h"
+#include "libzfs_impl.h"
+
+/*
+ * Intermediate structures used to gather configuration information.
+ */
+typedef struct config_entry {
+	uint64_t		ce_txg;
+	nvlist_t		*ce_config;
+	struct config_entry	*ce_next;
+} config_entry_t;
+
+typedef struct vdev_entry {
+	uint64_t		ve_guid;
+	config_entry_t		*ve_configs;
+	struct vdev_entry	*ve_next;
+} vdev_entry_t;
+
+typedef struct pool_entry {
+	uint64_t		pe_guid;
+	vdev_entry_t		*pe_vdevs;
+	struct pool_entry	*pe_next;
+} pool_entry_t;
+
+typedef struct name_entry {
+	char			*ne_name;
+	uint64_t		ne_guid;
+	struct name_entry	*ne_next;
+} name_entry_t;
+
+typedef struct pool_list {
+	pool_entry_t		*pools;
+	name_entry_t		*names;
+} pool_list_t;
+
+static char *
+get_devid(const char *path)
+{
+	int fd;
+	ddi_devid_t devid;
+	char *minor, *ret;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (NULL);
+
+	minor = NULL;
+	ret = NULL;
+	if (devid_get(fd, &devid) == 0) {
+		if (devid_get_minor_name(fd, &minor) == 0)
+			ret = devid_str_encode(devid, minor);
+		if (minor != NULL)
+			devid_str_free(minor);
+		devid_free(devid);
+	}
+	(void) close(fd);
+
+	return (ret);
+}
+
+
+/*
+ * Go through and fix up any path and/or devid information for the given vdev
+ * configuration.
+ */
+static int
+fix_paths(nvlist_t *nv, name_entry_t *names)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	uint64_t guid;
+	name_entry_t *ne, *best;
+	char *path, *devid;
+	int matched;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (fix_paths(child[c], names) != 0)
+				return (-1);
+		return (0);
+	}
+
+	/*
+	 * This is a leaf (file or disk) vdev.  In either case, go through
+	 * the name list and see if we find a matching guid.  If so, replace
+	 * the path and see if we can calculate a new devid.
+	 *
+	 * There may be multiple names associated with a particular guid, in
+	 * which case we have overlapping slices or multiple paths to the same
+	 * disk.  If this is the case, then we want to pick the path that is
+	 * the most similar to the original, where "most similar" is the number
+	 * of matching characters starting from the end of the path.  This will
+	 * preserve slice numbers even if the disks have been reorganized, and
+	 * will also catch preferred disk names if multiple paths exist.
+	 */
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		path = NULL;
+
+	matched = 0;
+	best = NULL;
+	for (ne = names; ne != NULL; ne = ne->ne_next) {
+		if (ne->ne_guid == guid) {
+			const char *src, *dst;
+			int count;
+
+			if (path == NULL) {
+				best = ne;
+				break;
+			}
+
+			src = ne->ne_name + strlen(ne->ne_name) - 1;
+			dst = path + strlen(path) - 1;
+			for (count = 0; src >= ne->ne_name && dst >= path;
+			    src--, dst--, count++)
+				if (*src != *dst)
+					break;
+
+			/*
+			 * At this point, 'count' is the number of characters
+			 * matched from the end.
+			 */
+			if (count > matched || best == NULL) {
+				best = ne;
+				matched = count;
+			}
+		}
+	}
+
+	if (best == NULL)
+		return (0);
+
+	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+		return (-1);
+
+	if ((devid = get_devid(best->ne_name)) == NULL) {
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+	} else {
+		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
+			return (-1);
+		devid_str_free(devid);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given configuration to the list of known devices.
+ */
+static int
+add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
+    nvlist_t *config)
+{
+	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	name_entry_t *ne;
+
+	/*
+	 * If this is a hot spare not currently in use or level 2 cache
+	 * device, add it to the list of names to translate, but don't do
+	 * anything else.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &state) == 0 &&
+	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+			return (-1);
+
+		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+			free(ne);
+			return (-1);
+		}
+		ne->ne_guid = vdev_guid;
+		ne->ne_next = pl->names;
+		pl->names = ne;
+		return (0);
+	}
+
+	/*
+	 * If we have a valid config but cannot read any of these fields, then
+	 * it means we have a half-initialized label.  In vdev_label_init()
+	 * we write a label with txg == 0 so that we can identify the device
+	 * in case the user refers to the same disk later on.  If we fail to
+	 * create the pool, we'll be left with a label in this state
+	 * which should not be considered part of a valid pool.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &pool_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+	    &top_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+	    &txg) != 0 || txg == 0) {
+		nvlist_free(config);
+		return (0);
+	}
+
+	/*
+	 * First, see if we know about this pool.  If not, then add it to the
+	 * list of known pools.
+	 */
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		if (pe->pe_guid == pool_guid)
+			break;
+	}
+
+	if (pe == NULL) {
+		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		pe->pe_guid = pool_guid;
+		pe->pe_next = pl->pools;
+		pl->pools = pe;
+	}
+
+	/*
+	 * Second, see if we know about this toplevel vdev.  Add it if its
+	 * missing.
+	 */
+	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+		if (ve->ve_guid == top_guid)
+			break;
+	}
+
+	if (ve == NULL) {
+		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		ve->ve_guid = top_guid;
+		ve->ve_next = pe->pe_vdevs;
+		pe->pe_vdevs = ve;
+	}
+
+	/*
+	 * Third, see if we have a config with a matching transaction group.  If
+	 * so, then we do nothing.  Otherwise, add it to the list of known
+	 * configs.
+	 */
+	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
+		if (ce->ce_txg == txg)
+			break;
+	}
+
+	if (ce == NULL) {
+		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		ce->ce_txg = txg;
+		ce->ce_config = config;
+		ce->ce_next = ve->ve_configs;
+		ve->ve_configs = ce;
+	} else {
+		nvlist_free(config);
+	}
+
+	/*
+	 * At this point we've successfully added our config to the list of
+	 * known configs.  The last thing to do is add the vdev guid -> path
+	 * mappings so that we can fix up the configuration as necessary before
+	 * doing the import.
+	 */
+	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+		return (-1);
+
+	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+		free(ne);
+		return (-1);
+	}
+
+	ne->ne_guid = vdev_guid;
+	ne->ne_next = pl->names;
+	pl->names = ne;
+
+	return (0);
+}
+
+/*
+ * Returns true if the named pool matches the given GUID.
+ */
+static int
+pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
+    boolean_t *isactive)
+{
+	zpool_handle_t *zhp;
+	uint64_t theguid;
+
+	if (zpool_open_silent(hdl, name, &zhp) != 0)
+		return (-1);
+
+	if (zhp == NULL) {
+		*isactive = B_FALSE;
+		return (0);
+	}
+
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
+	    &theguid) == 0);
+
+	zpool_close(zhp);
+
+	*isactive = (theguid == guid);
+	return (0);
+}
+
+static nvlist_t *
+refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
+{
+	nvlist_t *nvl;
+	zfs_cmd_t zc = { 0 };
+	int err;
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
+		return (NULL);
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc,
+	    zc.zc_nvlist_conf_size * 2) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
+	    &zc)) != 0 && errno == ENOMEM) {
+		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+			zcmd_free_nvlists(&zc);
+			return (NULL);
+		}
+	}
+
+	if (err) {
+		(void) zpool_standard_error(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot discover pools"));
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (NULL);
+	}
+
+	zcmd_free_nvlists(&zc);
+	return (nvl);
+}
+
+/*
+ * Convert our list of pools into the definitive set of configurations.  We
+ * start by picking the best config for each toplevel vdev.  Once that's done,
+ * we assemble the toplevel vdevs into a full config for the pool.  We make a
+ * pass to fix up any incorrect paths, and then add it to the main list to
+ * return to the user.
+ */
+static nvlist_t *
+get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
+{
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
+	nvlist_t **spares, **l2cache;
+	uint_t i, nspares, nl2cache;
+	boolean_t config_seen;
+	uint64_t best_txg;
+	char *name, *hostname;
+	uint64_t version, guid;
+	uint_t children = 0;
+	nvlist_t **child = NULL;
+	uint_t c;
+	boolean_t isactive;
+	uint64_t hostid;
+	nvlist_t *nvl;
+	boolean_t found_one = B_FALSE;
+
+	if (nvlist_alloc(&ret, 0, 0) != 0)
+		goto nomem;
+
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		uint64_t id;
+
+		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		config_seen = B_FALSE;
+
+		/*
+		 * Iterate over all toplevel vdevs.  Grab the pool configuration
+		 * from the first one we find, and then go through the rest and
+		 * add them as necessary to the 'vdevs' member of the config.
+		 */
+		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+
+			/*
+			 * Determine the best configuration for this vdev by
+			 * selecting the config with the latest transaction
+			 * group.
+			 */
+			best_txg = 0;
+			for (ce = ve->ve_configs; ce != NULL;
+			    ce = ce->ce_next) {
+
+				if (ce->ce_txg > best_txg) {
+					tmp = ce->ce_config;
+					best_txg = ce->ce_txg;
+				}
+			}
+
+			if (!config_seen) {
+				/*
+				 * Copy the relevant pieces of data to the pool
+				 * configuration:
+				 *
+				 *	version
+				 * 	pool guid
+				 * 	name
+				 * 	pool state
+				 *	hostid (if available)
+				 *	hostname (if available)
+				 */
+				uint64_t state;
+
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VERSION, &version) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_VERSION, version) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_GUID, guid) != 0)
+					goto nomem;
+				verify(nvlist_lookup_string(tmp,
+				    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
+				if (nvlist_add_string(config,
+				    ZPOOL_CONFIG_POOL_NAME, name) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_STATE, state) != 0)
+					goto nomem;
+				hostid = 0;
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+					if (nvlist_add_uint64(config,
+					    ZPOOL_CONFIG_HOSTID, hostid) != 0)
+						goto nomem;
+					verify(nvlist_lookup_string(tmp,
+					    ZPOOL_CONFIG_HOSTNAME,
+					    &hostname) == 0);
+					if (nvlist_add_string(config,
+					    ZPOOL_CONFIG_HOSTNAME,
+					    hostname) != 0)
+						goto nomem;
+				}
+
+				config_seen = B_TRUE;
+			}
+
+			/*
+			 * Add this top-level vdev to the child array.
+			 */
+			verify(nvlist_lookup_nvlist(tmp,
+			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
+			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+			if (id >= children) {
+				nvlist_t **newchild;
+
+				newchild = zfs_alloc(hdl, (id + 1) *
+				    sizeof (nvlist_t *));
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				free(child);
+				child = newchild;
+				children = id + 1;
+			}
+			if (nvlist_dup(nvtop, &child[id], 0) != 0)
+				goto nomem;
+
+		}
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		/*
+		 * Look for any missing top-level vdevs.  If this is the case,
+		 * create a faked up 'missing' vdev as a placeholder.  We cannot
+		 * simply compress the child array, because the kernel performs
+		 * certain checks to make sure the vdev IDs match their location
+		 * in the configuration.
+		 */
+		for (c = 0; c < children; c++)
+			if (child[c] == NULL) {
+				nvlist_t *missing;
+				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+				if (nvlist_add_string(missing,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_MISSING) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+					nvlist_free(missing);
+					goto nomem;
+				}
+				child[c] = missing;
+			}
+
+		/*
+		 * Put all of this pool's top-level vdevs into a root vdev.
+		 */
+		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+		    child, children) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		for (c = 0; c < children; c++)
+			nvlist_free(child[c]);
+		free(child);
+		children = 0;
+		child = NULL;
+
+		/*
+		 * Go through and fix up any paths and/or devids based on our
+		 * known list of vdev GUID -> path mappings.
+		 */
+		if (fix_paths(nvroot, pl->names) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		/*
+		 * Add the root vdev to this pool's configuration.
+		 */
+		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    nvroot) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+		nvlist_free(nvroot);
+
+		/*
+		 * zdb uses this path to report on active pools that were
+		 * imported or created using -R.
+		 */
+		if (active_ok)
+			goto add_pool;
+
+		/*
+		 * Determine if this pool is currently active, in which case we
+		 * can't actually import it.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		if (pool_active(hdl, name, guid, &isactive) != 0)
+			goto error;
+
+		if (isactive) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
+
+		if ((nvl = refresh_config(hdl, config)) == NULL)
+			goto error;
+
+		nvlist_free(config);
+		config = nvl;
+
+		/*
+		 * Go through and update the paths for spares, now that we have
+		 * them.
+		 */
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0) {
+			for (i = 0; i < nspares; i++) {
+				if (fix_paths(spares[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Update the paths for l2cache devices.
+		 */
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache) == 0) {
+			for (i = 0; i < nl2cache; i++) {
+				if (fix_paths(l2cache[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Restore the original information read from the actual label.
+		 */
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
+		    DATA_TYPE_UINT64);
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
+		    DATA_TYPE_STRING);
+		if (hostid != 0) {
+			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
+			    hostid) == 0);
+			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
+			    hostname) == 0);
+		}
+
+add_pool:
+		/*
+		 * Add this pool to the list of configs.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (nvlist_add_nvlist(ret, name, config) != 0)
+			goto nomem;
+
+		found_one = B_TRUE;
+		nvlist_free(config);
+		config = NULL;
+	}
+
+	if (!found_one) {
+		nvlist_free(ret);
+		ret = NULL;
+	}
+
+	return (ret);
+
+nomem:
+	(void) no_memory(hdl);
+error:
+	nvlist_free(config);
+	nvlist_free(ret);
+	for (c = 0; c < children; c++)
+		nvlist_free(child[c]);
+	free(child);
+
+	return (NULL);
+}
+
+/*
+ * Return the offset of the given label.
+ */
+static uint64_t
+label_offset(uint64_t size, int l)
+{
+	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
+	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
+	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
+}
+
+/*
+ * Given a file descriptor, read the label information and return an nvlist
+ * describing the configuration, if there is one.
+ */
+int
+zpool_read_label(int fd, nvlist_t **config)
+{
+	struct stat64 statbuf;
+	int l;
+	vdev_label_t *label;
+	uint64_t state, txg, size;
+
+	*config = NULL;
+
+	if (fstat64(fd, &statbuf) == -1)
+		return (0);
+	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+		return (-1);
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+		if (pread64(fd, label, sizeof (vdev_label_t),
+		    label_offset(size, l)) != sizeof (vdev_label_t))
+			continue;
+
+		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
+			continue;
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(*config);
+			continue;
+		}
+
+		free(label);
+		return (0);
+	}
+
+	free(label);
+	*config = NULL;
+	return (0);
+}
+
+/*
+ * Given a list of directories to search, find all pools stored on disk.  This
+ * includes partial pools which are not available to import.  If no args are
+ * given (argc is 0), then the default directory (/dev/dsk) is searched.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+static nvlist_t *
+zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
+    boolean_t active_ok, char *poolname, uint64_t guid)
+{
+	int i;
+	DIR *dirp = NULL;
+	struct dirent64 *dp;
+	char path[MAXPATHLEN];
+	char *end;
+	size_t pathleft;
+	struct stat64 statbuf;
+	nvlist_t *ret = NULL, *config;
+	static char *default_dir = "/dev/dsk";
+	int fd;
+	pool_list_t pools = { 0 };
+	pool_entry_t *pe, *penext;
+	vdev_entry_t *ve, *venext;
+	config_entry_t *ce, *cenext;
+	name_entry_t *ne, *nenext;
+
+	verify(poolname == NULL || guid == 0);
+
+	if (argc == 0) {
+		argc = 1;
+		argv = &default_dir;
+	}
+
+	/*
+	 * Go through and read the label configuration information from every
+	 * possible device, organizing the information according to pool GUID
+	 * and toplevel GUID.
+	 */
+	for (i = 0; i < argc; i++) {
+		char *rdsk;
+		int dfd;
+
+		/* use realpath to normalize the path */
+		if (realpath(argv[i], path) == 0) {
+			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    argv[i]);
+			goto error;
+		}
+		end = &path[strlen(path)];
+		*end++ = '/';
+		*end = 0;
+		pathleft = &path[sizeof (path)] - end;
+
+		/*
+		 * Using raw devices instead of block devices when we're
+		 * reading the labels skips a bunch of slow operations during
+		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
+		 */
+		if (strcmp(path, "/dev/dsk/") == 0)
+			rdsk = "/dev/rdsk/";
+		else
+			rdsk = path;
+
+		if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
+		    (dirp = fdopendir(dfd)) == NULL) {
+			zfs_error_aux(hdl, strerror(errno));
+			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    rdsk);
+			goto error;
+		}
+
+		/*
+		 * This is not MT-safe, but we have no MT consumers of libzfs
+		 */
+		while ((dp = readdir64(dirp)) != NULL) {
+			const char *name = dp->d_name;
+			if (name[0] == '.' &&
+			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+				continue;
+
+			if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
+				continue;
+
+			/*
+			 * Ignore failed stats.  We only want regular
+			 * files, character devs and block devs.
+			 */
+			if (fstat64(fd, &statbuf) != 0 ||
+			    (!S_ISREG(statbuf.st_mode) &&
+			    !S_ISCHR(statbuf.st_mode) &&
+			    !S_ISBLK(statbuf.st_mode))) {
+				(void) close(fd);
+				continue;
+			}
+
+			if ((zpool_read_label(fd, &config)) != 0) {
+				(void) close(fd);
+				(void) no_memory(hdl);
+				goto error;
+			}
+
+			(void) close(fd);
+
+			if (config != NULL) {
+				boolean_t matched = B_TRUE;
+
+				if (poolname != NULL) {
+					char *pname;
+
+					matched = nvlist_lookup_string(config,
+					    ZPOOL_CONFIG_POOL_NAME,
+					    &pname) == 0 &&
+					    strcmp(poolname, pname) == 0;
+				} else if (guid != 0) {
+					uint64_t this_guid;
+
+					matched = nvlist_lookup_uint64(config,
+					    ZPOOL_CONFIG_POOL_GUID,
+					    &this_guid) == 0 &&
+					    guid == this_guid;
+				}
+				if (!matched) {
+					nvlist_free(config);
+					config = NULL;
+					continue;
+				}
+				/* use the non-raw path for the config */
+				(void) strlcpy(end, name, pathleft);
+				if (add_config(hdl, &pools, path, config) != 0)
+					goto error;
+			}
+		}
+
+		(void) closedir(dirp);
+		dirp = NULL;
+	}
+
+	ret = get_configs(hdl, &pools, active_ok);
+
+error:
+	for (pe = pools.pools; pe != NULL; pe = penext) {
+		penext = pe->pe_next;
+		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+			venext = ve->ve_next;
+			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+				cenext = ce->ce_next;
+				if (ce->ce_config)
+					nvlist_free(ce->ce_config);
+				free(ce);
+			}
+			free(ve);
+		}
+		free(pe);
+	}
+
+	for (ne = pools.names; ne != NULL; ne = nenext) {
+		nenext = ne->ne_next;
+		if (ne->ne_name)
+			free(ne->ne_name);
+		free(ne);
+	}
+
+	if (dirp)
+		(void) closedir(dirp);
+
+	return (ret);
+}
+
+nvlist_t *
+zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
+}
+
+nvlist_t *
+zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
+    char *pool)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
+}
+
+nvlist_t *
+zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
+    uint64_t guid)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
+}
+
+nvlist_t *
+zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
+{
+	return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
+}
+
+/*
+ * Given a cache file, return the contents as a list of importable pools.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+nvlist_t *
+zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
+    char *poolname, uint64_t guid)
+{
+	char *buf;
+	int fd;
+	struct stat64 statbuf;
+	nvlist_t *raw, *src, *dst;
+	nvlist_t *pools;
+	nvpair_t *elem;
+	char *name;
+	uint64_t this_guid;
+	boolean_t active;
+
+	verify(poolname == NULL || guid == 0);
+
+	if ((fd = open(cachefile, O_RDONLY)) < 0) {
+		zfs_error_aux(hdl, "%s", strerror(errno));
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
+		return (NULL);
+	}
+
+	if (fstat64(fd, &statbuf) != 0) {
+		zfs_error_aux(hdl, "%s", strerror(errno));
+		(void) close(fd);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
+		return (NULL);
+	}
+
+	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
+		(void) close(fd);
+		return (NULL);
+	}
+
+	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
+		(void) close(fd);
+		free(buf);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN,
+		    "failed to read cache file contents"));
+		return (NULL);
+	}
+
+	(void) close(fd);
+
+	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
+		free(buf);
+		(void) zfs_error(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN,
+		    "invalid or corrupt cache file contents"));
+		return (NULL);
+	}
+
+	free(buf);
+
+	/*
+	 * Go through and get the current state of the pools and refresh their
+	 * state.
+	 */
+	if (nvlist_alloc(&pools, 0, 0) != 0) {
+		(void) no_memory(hdl);
+		nvlist_free(raw);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
+		verify(nvpair_value_nvlist(elem, &src) == 0);
+
+		verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (poolname != NULL && strcmp(poolname, name) != 0)
+			continue;
+
+		verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
+		    &this_guid) == 0);
+		if (guid != 0) {
+			verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
+			    &this_guid) == 0);
+			if (guid != this_guid)
+				continue;
+		}
+
+		if (pool_active(hdl, name, this_guid, &active) != 0) {
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+
+		if (active)
+			continue;
+
+		if ((dst = refresh_config(hdl, src)) == NULL) {
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+
+		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
+			(void) no_memory(hdl);
+			nvlist_free(dst);
+			nvlist_free(raw);
+			nvlist_free(pools);
+			return (NULL);
+		}
+		nvlist_free(dst);
+	}
+
+	nvlist_free(raw);
+	return (pools);
+}
+
+
+boolean_t
+find_guid(nvlist_t *nv, uint64_t guid)
+{
+	uint64_t tmp;
+	nvlist_t **child;
+	uint_t c, children;
+
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
+	if (tmp == guid)
+		return (B_TRUE);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_guid(child[c], guid))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+typedef struct aux_cbdata {
+	const char	*cb_type;
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} aux_cbdata_t;
+
+static int
+find_aux(zpool_handle_t *zhp, void *data)
+{
+	aux_cbdata_t *cbp = data;
+	nvlist_t **list;
+	uint_t i, count;
+	uint64_t guid;
+	nvlist_t *nvroot;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
+	    &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i],
+			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			if (guid == cbp->cb_guid) {
+				cbp->cb_zhp = zhp;
+				return (1);
+			}
+		}
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Determines if the pool is in use.  If so, it returns true and the state of
+ * the pool as well as the name of the pool.  Both strings are allocated and
+ * must be freed by the caller.
+ */
+int
+zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
+    boolean_t *inuse)
+{
+	nvlist_t *config;
+	char *name;
+	boolean_t ret;
+	uint64_t guid, vdev_guid;
+	zpool_handle_t *zhp;
+	nvlist_t *pool_config;
+	uint64_t stateval, isspare;
+	aux_cbdata_t cb = { 0 };
+	boolean_t isactive;
+
+	*inuse = B_FALSE;
+
+	if (zpool_read_label(fd, &config) != 0) {
+		(void) no_memory(hdl);
+		return (-1);
+	}
+
+	if (config == NULL)
+		return (0);
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &stateval) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) == 0);
+
+	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+	}
+
+	switch (stateval) {
+	case POOL_STATE_EXPORTED:
+		ret = B_TRUE;
+		break;
+
+	case POOL_STATE_ACTIVE:
+		/*
+		 * For an active pool, we have to determine if it's really part
+		 * of a currently active pool (in which case the pool will exist
+		 * and the guid will be the same), or whether it's part of an
+		 * active pool that was disconnected without being explicitly
+		 * exported.
+		 */
+		if (pool_active(hdl, name, guid, &isactive) != 0) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if (isactive) {
+			/*
+			 * Because the device may have been removed while
+			 * offlined, we only report it as active if the vdev is
+			 * still present in the config.  Otherwise, pretend like
+			 * it's not in use.
+			 */
+			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
+			    (pool_config = zpool_get_config(zhp, NULL))
+			    != NULL) {
+				nvlist_t *nvroot;
+
+				verify(nvlist_lookup_nvlist(pool_config,
+				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+				ret = find_guid(nvroot, vdev_guid);
+			} else {
+				ret = B_FALSE;
+			}
+
+			/*
+			 * If this is an active spare within another pool, we
+			 * treat it like an unused hot spare.  This allows the
+			 * user to create a pool with a hot spare that currently
+			 * in use within another pool.  Since we return B_TRUE,
+			 * libdiskmgt will continue to prevent generic consumers
+			 * from using the device.
+			 */
+			if (ret && nvlist_lookup_uint64(config,
+			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
+				stateval = POOL_STATE_SPARE;
+
+			if (zhp != NULL)
+				zpool_close(zhp);
+		} else {
+			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
+			ret = B_TRUE;
+		}
+		break;
+
+	case POOL_STATE_SPARE:
+		/*
+		 * For a hot spare, it can be either definitively in use, or
+		 * potentially active.  To determine if it's in use, we iterate
+		 * over all pools in the system and search for one with a spare
+		 * with a matching guid.
+		 *
+		 * Due to the shared nature of spares, we don't actually report
+		 * the potentially active case as in use.  This means the user
+		 * can freely create pools on the hot spares of exported pools,
+		 * but to do otherwise makes the resulting code complicated, and
+		 * we end up having to deal with this case anyway.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		cb.cb_type = ZPOOL_CONFIG_SPARES;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	case POOL_STATE_L2CACHE:
+
+		/*
+		 * Check if any pool is currently using this l2cache device.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	default:
+		ret = B_FALSE;
+	}
+
+
+	if (ret) {
+		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
+			if (cb.cb_zhp)
+				zpool_close(cb.cb_zhp);
+			nvlist_free(config);
+			return (-1);
+		}
+		*state = (pool_state_t)stateval;
+	}
+
+	if (cb.cb_zhp)
+		zpool_close(cb.cb_zhp);
+
+	nvlist_free(config);
+	*inuse = ret;
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
new file mode 100644
index 000000000..7c5c7f3ec
--- /dev/null
+++ b/lib/libzfs/libzfs_mount.c
@@ -0,0 +1,1399 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Routines to manage ZFS mounts.  We separate all the nasty routines that have
+ * to deal with the OS.  The following functions are the main entry points --
+ * they are used by mount and unmount and when changing a filesystem's
+ * mountpoint.
+ *
+ * 	zfs_is_mounted()
+ * 	zfs_mount()
+ * 	zfs_unmount()
+ * 	zfs_unmountall()
+ *
+ * This file also contains the functions used to manage sharing filesystems via
+ * NFS and iSCSI:
+ *
+ * 	zfs_is_shared()
+ * 	zfs_share()
+ * 	zfs_unshare()
+ *
+ * 	zfs_is_shared_nfs()
+ * 	zfs_is_shared_smb()
+ * 	zfs_is_shared_iscsi()
+ * 	zfs_share_proto()
+ * 	zfs_shareall();
+ * 	zfs_share_iscsi()
+ * 	zfs_unshare_nfs()
+ * 	zfs_unshare_smb()
+ * 	zfs_unshareall_nfs()
+ *	zfs_unshareall_smb()
+ *	zfs_unshareall()
+ *	zfs_unshareall_bypath()
+ * 	zfs_unshare_iscsi()
+ *
+ * The following functions are available for pool consumers, and will
+ * mount/unmount and share/unshare all datasets within pool:
+ *
+ * 	zpool_enable_datasets()
+ * 	zpool_disable_datasets()
+ */
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+
+#include <libshare.h>
+#include <sys/systeminfo.h>
+#define	MAXISALEN	257	/* based on sysinfo(2) man page */
+
+static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
+zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
+    zfs_share_proto_t);
+
+static int (*iscsitgt_zfs_share)(const char *);
+static int (*iscsitgt_zfs_unshare)(const char *);
+static int (*iscsitgt_zfs_is_shared)(const char *);
+static int (*iscsitgt_svc_online)();
+
+/*
+ * The share protocols table must be in the same order as the zfs_share_prot_t
+ * enum in libzfs_impl.h
+ */
+typedef struct {
+	zfs_prop_t p_prop;
+	char *p_name;
+	int p_share_err;
+	int p_unshare_err;
+} proto_table_t;
+
+proto_table_t proto_table[PROTO_END] = {
+	{ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
+	{ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
+};
+
+zfs_share_proto_t nfs_only[] = {
+	PROTO_NFS,
+	PROTO_END
+};
+
+zfs_share_proto_t smb_only[] = {
+	PROTO_SMB,
+	PROTO_END
+};
+zfs_share_proto_t share_all_proto[] = {
+	PROTO_NFS,
+	PROTO_SMB,
+	PROTO_END
+};
+
+#pragma init(zfs_iscsi_init)
+static void
+zfs_iscsi_init(void)
+{
+	void *libiscsitgt;
+
+	if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
+	    RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
+	    (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_share")) == NULL ||
+	    (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_unshare")) == NULL ||
+	    (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_zfs_is_shared")) == NULL ||
+	    (iscsitgt_svc_online = (int (*)(const char *))dlsym(libiscsitgt,
+	    "iscsitgt_svc_online")) == NULL) {
+		iscsitgt_zfs_share = NULL;
+		iscsitgt_zfs_unshare = NULL;
+		iscsitgt_zfs_is_shared = NULL;
+		iscsitgt_svc_online = NULL;
+	}
+}
+
+/*
+ * Search the sharetab for the given mountpoint and protocol, returning
+ * a zfs_share_type_t value.
+ */
+static zfs_share_type_t
+is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
+{
+	char buf[MAXPATHLEN], *tab;
+	char *ptr;
+
+	if (hdl->libzfs_sharetab == NULL)
+		return (SHARED_NOT_SHARED);
+
+	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
+
+	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
+
+		/* the mountpoint is the first entry on each line */
+		if ((tab = strchr(buf, '\t')) == NULL)
+			continue;
+
+		*tab = '\0';
+		if (strcmp(buf, mountpoint) == 0) {
+			/*
+			 * the protocol field is the third field
+			 * skip over second field
+			 */
+			ptr = ++tab;
+			if ((tab = strchr(ptr, '\t')) == NULL)
+				continue;
+			ptr = ++tab;
+			if ((tab = strchr(ptr, '\t')) == NULL)
+				continue;
+			*tab = '\0';
+			if (strcmp(ptr,
+			    proto_table[proto].p_name) == 0) {
+				switch (proto) {
+				case PROTO_NFS:
+					return (SHARED_NFS);
+				case PROTO_SMB:
+					return (SHARED_SMB);
+				default:
+					return (0);
+				}
+			}
+		}
+	}
+
+	return (SHARED_NOT_SHARED);
+}
+
+/*
+ * Returns true if the specified directory is empty.  If we can't open the
+ * directory at all, return true so that the mount can fail with a more
+ * informative error message.
+ */
+static boolean_t
+dir_is_empty(const char *dirname)
+{
+	DIR *dirp;
+	struct dirent64 *dp;
+
+	if ((dirp = opendir(dirname)) == NULL)
+		return (B_TRUE);
+
+	while ((dp = readdir64(dirp)) != NULL) {
+
+		if (strcmp(dp->d_name, ".") == 0 ||
+		    strcmp(dp->d_name, "..") == 0)
+			continue;
+
+		(void) closedir(dirp);
+		return (B_FALSE);
+	}
+
+	(void) closedir(dirp);
+	return (B_TRUE);
+}
+
+/*
+ * Checks to see if the mount is active.  If the filesystem is mounted, we fill
+ * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
+ * 0.
+ */
+boolean_t
+is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
+{
+	struct mnttab search = { 0 }, entry;
+
+	/*
+	 * Search for the entry in /etc/mnttab.  We don't bother getting the
+	 * mountpoint, as we can just search for the special device.  This will
+	 * also let us find mounts when the mountpoint is 'legacy'.
+	 */
+	search.mnt_special = (char *)special;
+	search.mnt_fstype = MNTTYPE_ZFS;
+
+	rewind(zfs_hdl->libzfs_mnttab);
+	if (getmntany(zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
+		return (B_FALSE);
+
+	if (where != NULL)
+		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
+
+	return (B_TRUE);
+}
+
+boolean_t
+zfs_is_mounted(zfs_handle_t *zhp, char **where)
+{
+	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
+}
+
+/*
+ * Returns true if the given dataset is mountable, false otherwise.  Returns the
+ * mountpoint in 'buf'.
+ */
+static boolean_t
+zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
+    zprop_source_t *source)
+{
+	char sourceloc[ZFS_MAXNAMELEN];
+	zprop_source_t sourcetype;
+
+	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
+		return (B_FALSE);
+
+	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
+	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
+
+	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
+	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
+		return (B_FALSE);
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
+		return (B_FALSE);
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
+	    getzoneid() == GLOBAL_ZONEID)
+		return (B_FALSE);
+
+	if (source)
+		*source = sourcetype;
+
+	return (B_TRUE);
+}
+
+/*
+ * Mount the given filesystem.
+ */
+int
+zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
+{
+	struct stat buf;
+	char mountpoint[ZFS_MAXPROPLEN];
+	char mntopts[MNT_LINE_MAX];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	if (options == NULL)
+		mntopts[0] = '\0';
+	else
+		(void) strlcpy(mntopts, options, sizeof (mntopts));
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
+		return (0);
+
+	/* Create the directory if it doesn't already exist */
+	if (lstat(mountpoint, &buf) != 0) {
+		if (mkdirp(mountpoint, 0755) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "failed to create mountpoint"));
+			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+			    mountpoint));
+		}
+	}
+
+	/*
+	 * Determine if the mountpoint is empty.  If so, refuse to perform the
+	 * mount.  We don't perform this check if MS_OVERLAY is specified, which
+	 * would defeat the point.  We also avoid this check if 'remount' is
+	 * specified.
+	 */
+	if ((flags & MS_OVERLAY) == 0 &&
+	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
+	    !dir_is_empty(mountpoint)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "directory is not empty"));
+		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
+	}
+
+	/* perform the mount */
+	if (mount(zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
+	    MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
+		/*
+		 * Generic errors are nasty, but there are just way too many
+		 * from mount(), and they're well-understood.  We pick a few
+		 * common ones to improve upon.
+		 */
+		if (errno == EBUSY) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "mountpoint or dataset is busy"));
+		} else if (errno == EPERM) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Insufficient privileges"));
+		} else {
+			zfs_error_aux(hdl, strerror(errno));
+		}
+
+		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+		    zhp->zfs_name));
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount a single filesystem.
+ */
+static int
+unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
+{
+	if (umount2(mountpoint, flags) != 0) {
+		zfs_error_aux(hdl, strerror(errno));
+		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+		    mountpoint));
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount the given filesystem.
+ */
+int
+zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
+{
+	struct mnttab search = { 0 }, entry;
+	char *mntpt = NULL;
+
+	/* check to see if need to unmount the filesystem */
+	search.mnt_special = zhp->zfs_name;
+	search.mnt_fstype = MNTTYPE_ZFS;
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+
+		/*
+		 * mountpoint may have come from a call to
+		 * getmnt/getmntany if it isn't NULL. If it is NULL,
+		 * we know it comes from getmntany which can then get
+		 * overwritten later. We strdup it to play it safe.
+		 */
+		if (mountpoint == NULL)
+			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
+		else
+			mntpt = zfs_strdup(zhp->zfs_hdl, mountpoint);
+
+		/*
+		 * Unshare and unmount the filesystem
+		 */
+		if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
+			return (-1);
+
+		if (unmount_one(zhp->zfs_hdl, mntpt, flags) != 0) {
+			free(mntpt);
+			(void) zfs_shareall(zhp);
+			return (-1);
+		}
+		free(mntpt);
+	}
+
+	return (0);
+}
+
+/*
+ * Unmount this filesystem and any children inheriting the mountpoint property.
+ * To do this, just act like we're changing the mountpoint property, but don't
+ * remount the filesystems afterwards.
+ */
+int
+zfs_unmountall(zfs_handle_t *zhp, int flags)
+{
+	prop_changelist_t *clp;
+	int ret;
+
+	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags);
+	if (clp == NULL)
+		return (-1);
+
+	ret = changelist_prefix(clp);
+	changelist_free(clp);
+
+	return (ret);
+}
+
+boolean_t
+zfs_is_shared(zfs_handle_t *zhp)
+{
+	zfs_share_type_t rc = 0;
+	zfs_share_proto_t *curr_proto;
+
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_is_shared_iscsi(zhp));
+
+	for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
+	    curr_proto++)
+		rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
+
+	return (rc ? B_TRUE : B_FALSE);
+}
+
+int
+zfs_share(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_share_iscsi(zhp));
+
+	return (zfs_share_proto(zhp, share_all_proto));
+}
+
+int
+zfs_unshare(zfs_handle_t *zhp)
+{
+	if (ZFS_IS_VOLUME(zhp))
+		return (zfs_unshare_iscsi(zhp));
+
+	return (zfs_unshareall(zhp));
+}
+
+/*
+ * Check to see if the filesystem is currently shared.
+ */
+zfs_share_type_t
+zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
+{
+	char *mountpoint;
+	zfs_share_type_t rc;
+
+	if (!zfs_is_mounted(zhp, &mountpoint))
+		return (SHARED_NOT_SHARED);
+
+	if (rc = is_shared(zhp->zfs_hdl, mountpoint, proto)) {
+		if (where != NULL)
+			*where = mountpoint;
+		else
+			free(mountpoint);
+		return (rc);
+	} else {
+		free(mountpoint);
+		return (SHARED_NOT_SHARED);
+	}
+}
+
+boolean_t
+zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
+{
+	return (zfs_is_shared_proto(zhp, where,
+	    PROTO_NFS) != SHARED_NOT_SHARED);
+}
+
+boolean_t
+zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
+{
+	return (zfs_is_shared_proto(zhp, where,
+	    PROTO_SMB) != SHARED_NOT_SHARED);
+}
+
+/*
+ * Make sure things will work if libshare isn't installed by using
+ * wrapper functions that check to see that the pointers to functions
+ * initialized in _zfs_init_libshare() are actually present.
+ */
+
+static sa_handle_t (*_sa_init)(int);
+static void (*_sa_fini)(sa_handle_t);
+static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
+static int (*_sa_enable_share)(sa_share_t, char *);
+static int (*_sa_disable_share)(sa_share_t, char *);
+static char *(*_sa_errorstr)(int);
+static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
+static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
+static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
+static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
+    char *, char *, zprop_source_t, char *, char *, char *);
+static void (*_sa_update_sharetab_ts)(sa_handle_t);
+
+/*
+ * _zfs_init_libshare()
+ *
+ * Find the libshare.so.1 entry points that we use here and save the
+ * values to be used later. This is triggered by the runtime loader.
+ * Make sure the correct ISA version is loaded.
+ */
+
+#pragma init(_zfs_init_libshare)
+static void
+_zfs_init_libshare(void)
+{
+	void *libshare;
+	char path[MAXPATHLEN];
+	char isa[MAXISALEN];
+
+#if defined(_LP64)
+	if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1)
+		isa[0] = '\0';
+#else
+	isa[0] = '\0';
+#endif
+	(void) snprintf(path, MAXPATHLEN,
+	    "/usr/lib/%s/libshare.so.1", isa);
+
+	if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
+		_sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
+		_sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
+		_sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
+		    dlsym(libshare, "sa_find_share");
+		_sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
+		    "sa_enable_share");
+		_sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
+		    "sa_disable_share");
+		_sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr");
+		_sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *))
+		    dlsym(libshare, "sa_parse_legacy_options");
+		_sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
+		    dlsym(libshare, "sa_needs_refresh");
+		_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
+		    dlsym(libshare, "sa_get_zfs_handle");
+		_sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
+		    sa_share_t, char *, char *, zprop_source_t, char *,
+		    char *, char *))dlsym(libshare, "sa_zfs_process_share");
+		_sa_update_sharetab_ts = (void (*)(sa_handle_t))
+		    dlsym(libshare, "sa_update_sharetab_ts");
+		if (_sa_init == NULL || _sa_fini == NULL ||
+		    _sa_find_share == NULL || _sa_enable_share == NULL ||
+		    _sa_disable_share == NULL || _sa_errorstr == NULL ||
+		    _sa_parse_legacy_options == NULL ||
+		    _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
+		    _sa_zfs_process_share == NULL ||
+		    _sa_update_sharetab_ts == NULL) {
+			_sa_init = NULL;
+			_sa_fini = NULL;
+			_sa_disable_share = NULL;
+			_sa_enable_share = NULL;
+			_sa_errorstr = NULL;
+			_sa_parse_legacy_options = NULL;
+			(void) dlclose(libshare);
+			_sa_needs_refresh = NULL;
+			_sa_get_zfs_handle = NULL;
+			_sa_zfs_process_share = NULL;
+			_sa_update_sharetab_ts = NULL;
+		}
+	}
+}
+
+/*
+ * zfs_init_libshare(zhandle, service)
+ *
+ * Initialize the libshare API if it hasn't already been initialized.
+ * In all cases it returns 0 if it succeeded and an error if not. The
+ * service value is which part(s) of the API to initialize and is a
+ * direct map to the libshare sa_init(service) interface.
+ */
+int
+zfs_init_libshare(libzfs_handle_t *zhandle, int service)
+{
+	int ret = SA_OK;
+
+	if (_sa_init == NULL)
+		ret = SA_CONFIG_ERR;
+
+	if (ret == SA_OK && zhandle->libzfs_shareflags & ZFSSHARE_MISS) {
+		/*
+		 * We had a cache miss. Most likely it is a new ZFS
+		 * dataset that was just created. We want to make sure
+		 * so check timestamps to see if a different process
+		 * has updated any of the configuration. If there was
+		 * some non-ZFS change, we need to re-initialize the
+		 * internal cache.
+		 */
+		zhandle->libzfs_shareflags &= ~ZFSSHARE_MISS;
+		if (_sa_needs_refresh != NULL &&
+		    _sa_needs_refresh(zhandle->libzfs_sharehdl)) {
+			zfs_uninit_libshare(zhandle);
+			zhandle->libzfs_sharehdl = _sa_init(service);
+		}
+	}
+
+	if (ret == SA_OK && zhandle && zhandle->libzfs_sharehdl == NULL)
+		zhandle->libzfs_sharehdl = _sa_init(service);
+
+	if (ret == SA_OK && zhandle->libzfs_sharehdl == NULL)
+		ret = SA_NO_MEMORY;
+
+	return (ret);
+}
+
+/*
+ * zfs_uninit_libshare(zhandle)
+ *
+ * Uninitialize the libshare API if it hasn't already been
+ * uninitialized. It is OK to call multiple times.
+ */
+void
+zfs_uninit_libshare(libzfs_handle_t *zhandle)
+{
+	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
+		if (_sa_fini != NULL)
+			_sa_fini(zhandle->libzfs_sharehdl);
+		zhandle->libzfs_sharehdl = NULL;
+	}
+}
+
+/*
+ * zfs_parse_options(options, proto)
+ *
+ * Call the legacy parse interface to get the protocol specific
+ * options using the NULL arg to indicate that this is a "parse" only.
+ */
+int
+zfs_parse_options(char *options, zfs_share_proto_t proto)
+{
+	if (_sa_parse_legacy_options != NULL) {
+		return (_sa_parse_legacy_options(NULL, options,
+		    proto_table[proto].p_name));
+	}
+	return (SA_CONFIG_ERR);
+}
+
+/*
+ * zfs_sa_find_share(handle, path)
+ *
+ * wrapper around sa_find_share to find a share path in the
+ * configuration.
+ */
+static sa_share_t
+zfs_sa_find_share(sa_handle_t handle, char *path)
+{
+	if (_sa_find_share != NULL)
+		return (_sa_find_share(handle, path));
+	return (NULL);
+}
+
+/*
+ * zfs_sa_enable_share(share, proto)
+ *
+ * Wrapper for sa_enable_share which enables a share for a specified
+ * protocol.
+ */
+static int
+zfs_sa_enable_share(sa_share_t share, char *proto)
+{
+	if (_sa_enable_share != NULL)
+		return (_sa_enable_share(share, proto));
+	return (SA_CONFIG_ERR);
+}
+
+/*
+ * zfs_sa_disable_share(share, proto)
+ *
+ * Wrapper for sa_enable_share which disables a share for a specified
+ * protocol.
+ */
+static int
+zfs_sa_disable_share(sa_share_t share, char *proto)
+{
+	if (_sa_disable_share != NULL)
+		return (_sa_disable_share(share, proto));
+	return (SA_CONFIG_ERR);
+}
+
+/*
+ * Share the given filesystem according to the options in the specified
+ * protocol specific properties (sharenfs, sharesmb).  We rely
+ * on "libshare" to the dirty work for us.
+ */
+static int
+zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	char shareopts[ZFS_MAXPROPLEN];
+	char sourcestr[ZFS_MAXPROPLEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	sa_share_t share;
+	zfs_share_proto_t *curr_proto;
+	zprop_source_t sourcetype;
+	int ret;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
+		return (0);
+
+	if ((ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+		(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
+		    zfs_get_name(zhp), _sa_errorstr != NULL ?
+		    _sa_errorstr(ret) : "");
+		return (-1);
+	}
+
+	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
+		/*
+		 * Return success if there are no share options.
+		 */
+		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
+		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
+		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
+		    strcmp(shareopts, "off") == 0)
+			continue;
+
+		/*
+		 * If the 'zoned' property is set, then zfs_is_mountable()
+		 * will have already bailed out if we are in the global zone.
+		 * But local zones cannot be NFS servers, so we ignore it for
+		 * local zones as well.
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
+			continue;
+
+		share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
+		if (share == NULL) {
+			/*
+			 * This may be a new file system that was just
+			 * created so isn't in the internal cache
+			 * (second time through). Rather than
+			 * reloading the entire configuration, we can
+			 * assume ZFS has done the checking and it is
+			 * safe to add this to the internal
+			 * configuration.
+			 */
+			if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
+			    NULL, NULL, mountpoint,
+			    proto_table[*curr_proto].p_name, sourcetype,
+			    shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
+				(void) zfs_error_fmt(hdl,
+				    proto_table[*curr_proto].p_share_err,
+				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+				    zfs_get_name(zhp));
+				return (-1);
+			}
+			hdl->libzfs_shareflags |= ZFSSHARE_MISS;
+			share = zfs_sa_find_share(hdl->libzfs_sharehdl,
+			    mountpoint);
+		}
+		if (share != NULL) {
+			int err;
+			err = zfs_sa_enable_share(share,
+			    proto_table[*curr_proto].p_name);
+			if (err != SA_OK) {
+				(void) zfs_error_fmt(hdl,
+				    proto_table[*curr_proto].p_share_err,
+				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+				    zfs_get_name(zhp));
+				return (-1);
+			}
+		} else {
+			(void) zfs_error_fmt(hdl,
+			    proto_table[*curr_proto].p_share_err,
+			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+			    zfs_get_name(zhp));
+			return (-1);
+		}
+
+	}
+	return (0);
+}
+
+
+int
+zfs_share_nfs(zfs_handle_t *zhp)
+{
+	return (zfs_share_proto(zhp, nfs_only));
+}
+
+int
+zfs_share_smb(zfs_handle_t *zhp)
+{
+	return (zfs_share_proto(zhp, smb_only));
+}
+
+int
+zfs_shareall(zfs_handle_t *zhp)
+{
+	return (zfs_share_proto(zhp, share_all_proto));
+}
+
+/*
+ * Unshare a filesystem by mountpoint.
+ */
+static int
+unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
+    zfs_share_proto_t proto)
+{
+	sa_share_t share;
+	int err;
+	char *mntpt;
+	/*
+	 * Mountpoint could get trashed if libshare calls getmntany
+	 * which id does during API initialization, so strdup the
+	 * value.
+	 */
+	mntpt = zfs_strdup(hdl, mountpoint);
+
+	/* make sure libshare initialized */
+	if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+		free(mntpt);	/* don't need the copy anymore */
+		return (zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+		    name, _sa_errorstr(err)));
+	}
+
+	share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
+	free(mntpt);	/* don't need the copy anymore */
+
+	if (share != NULL) {
+		err = zfs_sa_disable_share(share, proto_table[proto].p_name);
+		if (err != SA_OK) {
+			return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+			    name, _sa_errorstr(err)));
+		}
+	} else {
+		return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
+		    name));
+	}
+	return (0);
+}
+
+/*
+ * Unshare the given filesystem.
+ */
+int
+zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
+    zfs_share_proto_t *proto)
+{
+	struct mnttab search = { 0 }, entry;
+	char *mntpt = NULL;
+
+	/* check to see if need to unmount the filesystem */
+	search.mnt_special = (char *)zfs_get_name(zhp);
+	search.mnt_fstype = MNTTYPE_ZFS;
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (mountpoint != NULL)
+		mntpt = zfs_strdup(zhp->zfs_hdl, mountpoint);
+
+	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
+		zfs_share_proto_t *curr_proto;
+
+		if (mountpoint == NULL)
+			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
+
+		for (curr_proto = proto; *curr_proto != PROTO_END;
+		    curr_proto++) {
+
+			if (is_shared(zhp->zfs_hdl, mntpt, *curr_proto) &&
+			    unshare_one(zhp->zfs_hdl, zhp->zfs_name,
+			    mntpt, *curr_proto) != 0) {
+				if (mntpt != NULL)
+					free(mntpt);
+				return (-1);
+			}
+		}
+	}
+	if (mntpt != NULL)
+		free(mntpt);
+
+	return (0);
+}
+
+int
+zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
+{
+	return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
+}
+
+int
+zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
+{
+	return (zfs_unshare_proto(zhp, mountpoint, smb_only));
+}
+
+/*
+ * Same as zfs_unmountall(), but for NFS and SMB unshares.
+ */
+int
+zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+{
+	prop_changelist_t *clp;
+	int ret;
+
+	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
+	if (clp == NULL)
+		return (-1);
+
+	ret = changelist_unshare(clp, proto);
+	changelist_free(clp);
+
+	return (ret);
+}
+
+int
+zfs_unshareall_nfs(zfs_handle_t *zhp)
+{
+	return (zfs_unshareall_proto(zhp, nfs_only));
+}
+
+int
+zfs_unshareall_smb(zfs_handle_t *zhp)
+{
+	return (zfs_unshareall_proto(zhp, smb_only));
+}
+
+int
+zfs_unshareall(zfs_handle_t *zhp)
+{
+	return (zfs_unshareall_proto(zhp, share_all_proto));
+}
+
+int
+zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
+{
+	return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
+}
+
+/*
+ * Remove the mountpoint associated with the current dataset, if necessary.
+ * We only remove the underlying directory if:
+ *
+ *	- The mountpoint is not 'none' or 'legacy'
+ *	- The mountpoint is non-empty
+ *	- The mountpoint is the default or inherited
+ *	- The 'zoned' property is set, or we're in a local zone
+ *
+ * Any other directories we leave alone.
+ */
+void
+remove_mountpoint(zfs_handle_t *zhp)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	zprop_source_t source;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
+	    &source))
+		return;
+
+	if (source == ZPROP_SRC_DEFAULT ||
+	    source == ZPROP_SRC_INHERITED) {
+		/*
+		 * Try to remove the directory, silently ignoring any errors.
+		 * The filesystem may have since been removed or moved around,
+		 * and this error isn't really useful to the administrator in
+		 * any way.
+		 */
+		(void) rmdir(mountpoint);
+	}
+}
+
+boolean_t
+zfs_is_shared_iscsi(zfs_handle_t *zhp)
+{
+
+	/*
+	 * If iscsi deamon isn't running then we aren't shared
+	 */
+	if (iscsitgt_svc_online && iscsitgt_svc_online() == 1)
+		return (B_FALSE);
+	else
+		return (iscsitgt_zfs_is_shared != NULL &&
+		    iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
+}
+
+int
+zfs_share_iscsi(zfs_handle_t *zhp)
+{
+	char shareopts[ZFS_MAXPROPLEN];
+	const char *dataset = zhp->zfs_name;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	/*
+	 * Return success if there are no share options.
+	 */
+	if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
+	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
+	    strcmp(shareopts, "off") == 0)
+		return (0);
+
+	if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) {
+		int error = EZFS_SHAREISCSIFAILED;
+
+		/*
+		 * If service isn't availabele and EPERM was
+		 * returned then use special error.
+		 */
+		if (iscsitgt_svc_online && errno == EPERM &&
+		    (iscsitgt_svc_online() != 0))
+			error = EZFS_ISCSISVCUNAVAIL;
+
+		return (zfs_error_fmt(hdl, error,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
+	}
+
+	return (0);
+}
+
+int
+zfs_unshare_iscsi(zfs_handle_t *zhp)
+{
+	const char *dataset = zfs_get_name(zhp);
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	/*
+	 * Return if the volume is not shared
+	 */
+	if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI)
+		return (0);
+
+	/*
+	 * If this fails with ENODEV it indicates that zvol wasn't shared so
+	 * we should return success in that case.
+	 */
+	if (iscsitgt_zfs_unshare == NULL ||
+	    (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) {
+		if (errno == EPERM)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Insufficient privileges to unshare iscsi"));
+		return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
+	}
+
+	return (0);
+}
+
+typedef struct mount_cbdata {
+	zfs_handle_t	**cb_datasets;
+	int 		cb_used;
+	int		cb_alloc;
+} mount_cbdata_t;
+
+static int
+mount_cb(zfs_handle_t *zhp, void *data)
+{
+	mount_cbdata_t *cbp = data;
+
+	if (!(zfs_get_type(zhp) & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME))) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (cbp->cb_alloc == cbp->cb_used) {
+		void *ptr;
+
+		if ((ptr = zfs_realloc(zhp->zfs_hdl,
+		    cbp->cb_datasets, cbp->cb_alloc * sizeof (void *),
+		    cbp->cb_alloc * 2 * sizeof (void *))) == NULL)
+			return (-1);
+		cbp->cb_datasets = ptr;
+
+		cbp->cb_alloc *= 2;
+	}
+
+	cbp->cb_datasets[cbp->cb_used++] = zhp;
+
+	return (zfs_iter_filesystems(zhp, mount_cb, cbp));
+}
+
+static int
+dataset_cmp(const void *a, const void *b)
+{
+	zfs_handle_t **za = (zfs_handle_t **)a;
+	zfs_handle_t **zb = (zfs_handle_t **)b;
+	char mounta[MAXPATHLEN];
+	char mountb[MAXPATHLEN];
+	boolean_t gota, gotb;
+
+	if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
+		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
+	if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
+		verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
+		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (gota && gotb)
+		return (strcmp(mounta, mountb));
+
+	if (gota)
+		return (-1);
+	if (gotb)
+		return (1);
+
+	return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+}
+
+/*
+ * Mount and share all datasets within the given pool.  This assumes that no
+ * datasets within the pool are currently mounted.  Because users can create
+ * complicated nested hierarchies of mountpoints, we first gather all the
+ * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
+ * we have the list of all filesystems, we iterate over them in order and mount
+ * and/or share each one.
+ */
+#pragma weak zpool_mount_datasets = zpool_enable_datasets
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+	mount_cbdata_t cb = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	zfs_handle_t *zfsp;
+	int i, ret = -1;
+	int *good;
+
+	/*
+	 * Gather all non-snap datasets within the pool.
+	 */
+	if ((cb.cb_datasets = zfs_alloc(hdl, 4 * sizeof (void *))) == NULL)
+		return (-1);
+	cb.cb_alloc = 4;
+
+	if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
+		goto out;
+
+	cb.cb_datasets[0] = zfsp;
+	cb.cb_used = 1;
+
+	if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0)
+		goto out;
+
+	/*
+	 * Sort the datasets by mountpoint.
+	 */
+	qsort(cb.cb_datasets, cb.cb_used, sizeof (void *), dataset_cmp);
+
+	/*
+	 * And mount all the datasets, keeping track of which ones
+	 * succeeded or failed. By using zfs_alloc(), the good pointer
+	 * will always be non-NULL.
+	 */
+	good = zfs_alloc(zhp->zpool_hdl, cb.cb_used * sizeof (int));
+	ret = 0;
+	for (i = 0; i < cb.cb_used; i++) {
+		if (zfs_mount(cb.cb_datasets[i], mntopts, flags) != 0)
+			ret = -1;
+		else
+			good[i] = 1;
+	}
+
+	/*
+	 * Then share all the ones that need to be shared. This needs
+	 * to be a separate pass in order to avoid excessive reloading
+	 * of the configuration. Good should never be NULL since
+	 * zfs_alloc is supposed to exit if memory isn't available.
+	 */
+	for (i = 0; i < cb.cb_used; i++) {
+		if (good[i] && zfs_share(cb.cb_datasets[i]) != 0)
+			ret = -1;
+	}
+
+	free(good);
+
+out:
+	for (i = 0; i < cb.cb_used; i++)
+		zfs_close(cb.cb_datasets[i]);
+	free(cb.cb_datasets);
+
+	return (ret);
+}
+
+
+static int
+zvol_cb(const char *dataset, void *data)
+{
+	libzfs_handle_t *hdl = data;
+	zfs_handle_t *zhp;
+
+	/*
+	 * Ignore snapshots and ignore failures from non-existant datasets.
+	 */
+	if (strchr(dataset, '@') != NULL ||
+	    (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
+		return (0);
+
+	if (zfs_unshare_iscsi(zhp) != 0)
+		return (-1);
+
+	zfs_close(zhp);
+
+	return (0);
+}
+
+static int
+mountpoint_compare(const void *a, const void *b)
+{
+	const char *mounta = *((char **)a);
+	const char *mountb = *((char **)b);
+
+	return (strcmp(mountb, mounta));
+}
+
+/*
+ * Unshare and unmount all datasets within the given pool.  We don't want to
+ * rely on traversing the DSL to discover the filesystems within the pool,
+ * because this may be expensive (if not all of them are mounted), and can fail
+ * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
+ * gather all the filesystems that are currently mounted.
+ */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+	int used, alloc;
+	struct mnttab entry;
+	size_t namelen;
+	char **mountpoints = NULL;
+	zfs_handle_t **datasets = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	int i;
+	int ret = -1;
+	int flags = (force ? MS_FORCE : 0);
+
+	/*
+	 * First unshare all zvols.
+	 */
+	if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
+		return (-1);
+
+	namelen = strlen(zhp->zpool_name);
+
+	rewind(hdl->libzfs_mnttab);
+	used = alloc = 0;
+	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
+		/*
+		 * Ignore non-ZFS entries.
+		 */
+		if (entry.mnt_fstype == NULL ||
+		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
+			continue;
+
+		/*
+		 * Ignore filesystems not within this pool.
+		 */
+		if (entry.mnt_mountp == NULL ||
+		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
+		    (entry.mnt_special[namelen] != '/' &&
+		    entry.mnt_special[namelen] != '\0'))
+			continue;
+
+		/*
+		 * At this point we've found a filesystem within our pool.  Add
+		 * it to our growing list.
+		 */
+		if (used == alloc) {
+			if (alloc == 0) {
+				if ((mountpoints = zfs_alloc(hdl,
+				    8 * sizeof (void *))) == NULL)
+					goto out;
+
+				if ((datasets = zfs_alloc(hdl,
+				    8 * sizeof (void *))) == NULL)
+					goto out;
+
+				alloc = 8;
+			} else {
+				void *ptr;
+
+				if ((ptr = zfs_realloc(hdl, mountpoints,
+				    alloc * sizeof (void *),
+				    alloc * 2 * sizeof (void *))) == NULL)
+					goto out;
+				mountpoints = ptr;
+
+				if ((ptr = zfs_realloc(hdl, datasets,
+				    alloc * sizeof (void *),
+				    alloc * 2 * sizeof (void *))) == NULL)
+					goto out;
+				datasets = ptr;
+
+				alloc *= 2;
+			}
+		}
+
+		if ((mountpoints[used] = zfs_strdup(hdl,
+		    entry.mnt_mountp)) == NULL)
+			goto out;
+
+		/*
+		 * This is allowed to fail, in case there is some I/O error.  It
+		 * is only used to determine if we need to remove the underlying
+		 * mountpoint, so failure is not fatal.
+		 */
+		datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
+
+		used++;
+	}
+
+	/*
+	 * At this point, we have the entire list of filesystems, so sort it by
+	 * mountpoint.
+	 */
+	qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
+
+	/*
+	 * Walk through and first unshare everything.
+	 */
+	for (i = 0; i < used; i++) {
+		zfs_share_proto_t *curr_proto;
+		for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
+		    curr_proto++) {
+			if (is_shared(hdl, mountpoints[i], *curr_proto) &&
+			    unshare_one(hdl, mountpoints[i],
+			    mountpoints[i], *curr_proto) != 0)
+				goto out;
+		}
+	}
+
+	/*
+	 * Now unmount everything, removing the underlying directories as
+	 * appropriate.
+	 */
+	for (i = 0; i < used; i++) {
+		if (unmount_one(hdl, mountpoints[i], flags) != 0)
+			goto out;
+	}
+
+	for (i = 0; i < used; i++) {
+		if (datasets[i])
+			remove_mountpoint(datasets[i]);
+	}
+
+	ret = 0;
+out:
+	for (i = 0; i < used; i++) {
+		if (datasets[i])
+			zfs_close(datasets[i]);
+		free(mountpoints[i]);
+	}
+	free(datasets);
+	free(mountpoints);
+
+	return (ret);
+}
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
new file mode 100644
index 000000000..dc5407bef
--- /dev/null
+++ b/lib/libzfs/libzfs_pool.c
@@ -0,0 +1,3062 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <alloca.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <devid.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/efi_partition.h>
+#include <sys/vtoc.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zio.h>
+#include <strings.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
+
+#if defined(__i386) || defined(__amd64)
+#define	BOOTCMD	"installgrub(1M)"
+#else
+#define	BOOTCMD	"installboot(1M)"
+#endif
+
+/*
+ * ====================================================================
+ *   zpool property functions
+ * ====================================================================
+ */
+
+static int
+zpool_get_all_props(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+		return (-1);
+
+	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
+		if (errno == ENOMEM) {
+			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		} else {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
+	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
+		zcmd_free_nvlists(&zc);
+		return (-1);
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	return (0);
+}
+
+static int
+zpool_props_refresh(zpool_handle_t *zhp)
+{
+	nvlist_t *old_props;
+
+	old_props = zhp->zpool_props;
+
+	if (zpool_get_all_props(zhp) != 0)
+		return (-1);
+
+	nvlist_free(old_props);
+	return (0);
+}
+
+static char *
+zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
+    zprop_source_t *src)
+{
+	nvlist_t *nv, *nvl;
+	uint64_t ival;
+	char *value;
+	zprop_source_t source;
+
+	nvl = zhp->zpool_props;
+	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
+		source = ival;
+		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
+	} else {
+		source = ZPROP_SRC_DEFAULT;
+		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
+			value = "-";
+	}
+
+	if (src)
+		*src = source;
+
+	return (value);
+}
+
+uint64_t
+zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
+{
+	nvlist_t *nv, *nvl;
+	uint64_t value;
+	zprop_source_t source;
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
+		/*
+		 * zpool_get_all_props() has most likely failed because
+		 * the pool is faulted, but if all we need is the top level
+		 * vdev's guid then get it from the zhp config nvlist.
+		 */
+		if ((prop == ZPOOL_PROP_GUID) &&
+		    (nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
+		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
+		    == 0)) {
+			return (value);
+		}
+		return (zpool_prop_default_numeric(prop));
+	}
+
+	nvl = zhp->zpool_props;
+	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
+		source = value;
+		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
+	} else {
+		source = ZPROP_SRC_DEFAULT;
+		value = zpool_prop_default_numeric(prop);
+	}
+
+	if (src)
+		*src = source;
+
+	return (value);
+}
+
+/*
+ * Map VDEV STATE to printed strings.
+ */
+char *
+zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
+{
+	switch (state) {
+	case VDEV_STATE_CLOSED:
+	case VDEV_STATE_OFFLINE:
+		return (gettext("OFFLINE"));
+	case VDEV_STATE_REMOVED:
+		return (gettext("REMOVED"));
+	case VDEV_STATE_CANT_OPEN:
+		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
+			return (gettext("FAULTED"));
+		else
+			return (gettext("UNAVAIL"));
+	case VDEV_STATE_FAULTED:
+		return (gettext("FAULTED"));
+	case VDEV_STATE_DEGRADED:
+		return (gettext("DEGRADED"));
+	case VDEV_STATE_HEALTHY:
+		return (gettext("ONLINE"));
+	}
+
+	return (gettext("UNKNOWN"));
+}
+
+/*
+ * Get a zpool property value for 'prop' and return the value in
+ * a pre-allocated buffer.
+ */
+int
+zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
+    zprop_source_t *srctype)
+{
+	uint64_t intval;
+	const char *strval;
+	zprop_source_t src = ZPROP_SRC_NONE;
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+		if (prop == ZPOOL_PROP_NAME)
+			(void) strlcpy(buf, zpool_get_name(zhp), len);
+		else if (prop == ZPOOL_PROP_HEALTH)
+			(void) strlcpy(buf, "FAULTED", len);
+		else
+			(void) strlcpy(buf, "-", len);
+		return (0);
+	}
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
+	    prop != ZPOOL_PROP_NAME)
+		return (-1);
+
+	switch (zpool_prop_get_type(prop)) {
+	case PROP_TYPE_STRING:
+		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
+		    len);
+		break;
+
+	case PROP_TYPE_NUMBER:
+		intval = zpool_get_prop_int(zhp, prop, &src);
+
+		switch (prop) {
+		case ZPOOL_PROP_SIZE:
+		case ZPOOL_PROP_USED:
+		case ZPOOL_PROP_AVAILABLE:
+			(void) zfs_nicenum(intval, buf, len);
+			break;
+
+		case ZPOOL_PROP_CAPACITY:
+			(void) snprintf(buf, len, "%llu%%",
+			    (u_longlong_t)intval);
+			break;
+
+		case ZPOOL_PROP_HEALTH:
+			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+			verify(nvlist_lookup_uint64_array(nvroot,
+			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+
+			(void) strlcpy(buf, zpool_state_to_name(intval,
+			    vs->vs_aux), len);
+			break;
+		default:
+			(void) snprintf(buf, len, "%llu", intval);
+		}
+		break;
+
+	case PROP_TYPE_INDEX:
+		intval = zpool_get_prop_int(zhp, prop, &src);
+		if (zpool_prop_index_to_string(prop, intval, &strval)
+		    != 0)
+			return (-1);
+		(void) strlcpy(buf, strval, len);
+		break;
+
+	default:
+		abort();
+	}
+
+	if (srctype)
+		*srctype = src;
+
+	return (0);
+}
+
+/*
+ * Check if the bootfs name has the same pool name as it is set to.
+ * Assuming bootfs is a valid dataset name.
+ */
+static boolean_t
+bootfs_name_valid(const char *pool, char *bootfs)
+{
+	int len = strlen(pool);
+
+	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
+		return (B_FALSE);
+
+	if (strncmp(pool, bootfs, len) == 0 &&
+	    (bootfs[len] == '/' || bootfs[len] == '\0'))
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * Inspect the configuration to determine if any of the devices contain
+ * an EFI label.
+ */
+static boolean_t
+pool_uses_efi(nvlist_t *config)
+{
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (read_efi_label(config, NULL) >= 0);
+
+	for (c = 0; c < children; c++) {
+		if (pool_uses_efi(child[c]))
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static boolean_t
+pool_is_bootable(zpool_handle_t *zhp)
+{
+	char bootfs[ZPOOL_MAXNAMELEN];
+
+	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
+	    sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
+	    sizeof (bootfs)) != 0);
+}
+
+
+/*
+ * Given an nvlist of zpool properties to be set, validate that they are
+ * correct, and parse any numeric properties (index, boolean, etc) if they are
+ * specified as strings.
+ */
+static nvlist_t *
+zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
+    nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
+{
+	nvpair_t *elem;
+	nvlist_t *retprops;
+	zpool_prop_t prop;
+	char *strval;
+	uint64_t intval;
+	char *slash;
+	struct stat64 statbuf;
+	zpool_handle_t *zhp;
+	nvlist_t *nvroot;
+
+	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
+		(void) no_memory(hdl);
+		return (NULL);
+	}
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+		const char *propname = nvpair_name(elem);
+
+		/*
+		 * Make sure this property is valid and applies to this type.
+		 */
+		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property '%s'"), propname);
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+			goto error;
+		}
+
+		if (zpool_prop_readonly(prop)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
+			    "is readonly"), propname);
+			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
+			goto error;
+		}
+
+		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
+		    &strval, &intval, errbuf) != 0)
+			goto error;
+
+		/*
+		 * Perform additional checking for specific properties.
+		 */
+		switch (prop) {
+		case ZPOOL_PROP_VERSION:
+			if (intval < version || intval > SPA_VERSION) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' number %d is invalid."),
+				    propname, intval);
+				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZPOOL_PROP_BOOTFS:
+			if (create_or_import) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' cannot be set at creation "
+				    "or import time"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (version < SPA_VERSION_BOOTFS) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool must be upgraded to support "
+				    "'%s' property"), propname);
+				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+				goto error;
+			}
+
+			/*
+			 * bootfs property value has to be a dataset name and
+			 * the dataset has to be in the same pool as it sets to.
+			 */
+			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
+			    strval)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
+				    "is an invalid name"), strval);
+				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+				goto error;
+			}
+
+			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "could not open pool '%s'"), poolname);
+				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
+				goto error;
+			}
+			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+
+			/*
+			 * bootfs property cannot be set on a disk which has
+			 * been EFI labeled.
+			 */
+			if (pool_uses_efi(nvroot)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' not supported on "
+				    "EFI labeled devices"), propname);
+				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
+				zpool_close(zhp);
+				goto error;
+			}
+			zpool_close(zhp);
+			break;
+
+		case ZPOOL_PROP_ALTROOT:
+			if (!create_or_import) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' can only be set during pool "
+				    "creation or import"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (strval[0] != '/') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "bad alternate root '%s'"), strval);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+			break;
+
+		case ZPOOL_PROP_CACHEFILE:
+			if (strval[0] == '\0')
+				break;
+
+			if (strcmp(strval, "none") == 0)
+				break;
+
+			if (strval[0] != '/') {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' must be empty, an "
+				    "absolute path, or 'none'"), propname);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+
+			slash = strrchr(strval, '/');
+
+			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
+			    strcmp(slash, "/..") == 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' is not a valid file"), strval);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+
+			*slash = '\0';
+
+			if (strval[0] != '\0' &&
+			    (stat64(strval, &statbuf) != 0 ||
+			    !S_ISDIR(statbuf.st_mode))) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' is not a valid directory"),
+				    strval);
+				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
+				goto error;
+			}
+
+			*slash = '/';
+			break;
+		}
+	}
+
+	return (retprops);
+error:
+	nvlist_free(retprops);
+	return (NULL);
+}
+
+/*
+ * Set zpool property : propname=propval.
+ */
+int
+zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret = -1;
+	char errbuf[1024];
+	nvlist_t *nvl = NULL;
+	nvlist_t *realprops;
+	uint64_t version;
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
+	    zhp->zpool_name);
+
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
+		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
+
+	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+		return (no_memory(zhp->zpool_hdl));
+
+	if (nvlist_add_string(nvl, propname, propval) != 0) {
+		nvlist_free(nvl);
+		return (no_memory(zhp->zpool_hdl));
+	}
+
+	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
+	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
+	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
+		nvlist_free(nvl);
+		return (-1);
+	}
+
+	nvlist_free(nvl);
+	nvl = realprops;
+
+	/*
+	 * Execute the corresponding ioctl() to set this property.
+	 */
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
+		nvlist_free(nvl);
+		return (-1);
+	}
+
+	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
+
+	zcmd_free_nvlists(&zc);
+	nvlist_free(nvl);
+
+	if (ret)
+		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
+	else
+		(void) zpool_props_refresh(zhp);
+
+	return (ret);
+}
+
+int
+zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
+{
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	zprop_list_t *entry;
+	char buf[ZFS_MAXPROPLEN];
+
+	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
+		return (-1);
+
+	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
+
+		if (entry->pl_fixed)
+			continue;
+
+		if (entry->pl_prop != ZPROP_INVAL &&
+		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
+		    NULL) == 0) {
+			if (strlen(buf) > entry->pl_width)
+				entry->pl_width = strlen(buf);
+		}
+	}
+
+	return (0);
+}
+
+
+/*
+ * Validate the given pool name, optionally putting an extended error message in
+ * 'buf'.
+ */
+boolean_t
+zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
+{
+	namecheck_err_t why;
+	char what;
+	int ret;
+
+	ret = pool_namecheck(pool, &why, &what);
+
+	/*
+	 * The rules for reserved pool names were extended at a later point.
+	 * But we need to support users with existing pools that may now be
+	 * invalid.  So we only check for this expanded set of names during a
+	 * create (or import), and only in userland.
+	 */
+	if (ret == 0 && !isopen &&
+	    (strncmp(pool, "mirror", 6) == 0 ||
+	    strncmp(pool, "raidz", 5) == 0 ||
+	    strncmp(pool, "spare", 5) == 0 ||
+	    strcmp(pool, "log") == 0)) {
+		if (hdl != NULL)
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "name is reserved"));
+		return (B_FALSE);
+	}
+
+
+	if (ret != 0) {
+		if (hdl != NULL) {
+			switch (why) {
+			case NAME_ERR_TOOLONG:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "name is too long"));
+				break;
+
+			case NAME_ERR_INVALCHAR:
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "invalid character "
+				    "'%c' in pool name"), what);
+				break;
+
+			case NAME_ERR_NOLETTER:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name must begin with a letter"));
+				break;
+
+			case NAME_ERR_RESERVED:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
+				break;
+
+			case NAME_ERR_DISKLIKE:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool name is reserved"));
+				break;
+
+			case NAME_ERR_LEADING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
+				break;
+
+			case NAME_ERR_EMPTY_COMPONENT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
+				break;
+
+			case NAME_ERR_TRAILING_SLASH:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
+				break;
+
+			case NAME_ERR_MULTIPLE_AT:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
+				break;
+
+			}
+		}
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+/*
+ * Open a handle to the given pool, even if the pool is currently in the FAULTED
+ * state.
+ */
+zpool_handle_t *
+zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
+{
+	zpool_handle_t *zhp;
+	boolean_t missing;
+
+	/*
+	 * Make sure the pool name is valid.
+	 */
+	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
+		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+		    pool);
+		return (NULL);
+	}
+
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (NULL);
+
+	zhp->zpool_hdl = hdl;
+	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	if (missing) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
+		(void) zfs_error_fmt(hdl, EZFS_NOENT,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Like the above, but silent on error.  Used when iterating over pools (because
+ * the configuration cache may be out of date).
+ */
+int
+zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
+{
+	zpool_handle_t *zhp;
+	boolean_t missing;
+
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (-1);
+
+	zhp->zpool_hdl = hdl;
+	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
+
+	if (zpool_refresh_stats(zhp, &missing) != 0) {
+		zpool_close(zhp);
+		return (-1);
+	}
+
+	if (missing) {
+		zpool_close(zhp);
+		*ret = NULL;
+		return (0);
+	}
+
+	*ret = zhp;
+	return (0);
+}
+
+/*
+ * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
+ * state.
+ */
+zpool_handle_t *
+zpool_open(libzfs_handle_t *hdl, const char *pool)
+{
+	zpool_handle_t *zhp;
+
+	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
+		return (NULL);
+
+	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
+		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
+		zpool_close(zhp);
+		return (NULL);
+	}
+
+	return (zhp);
+}
+
+/*
+ * Close the handle.  Simply frees the memory associated with the handle.
+ */
+void
+zpool_close(zpool_handle_t *zhp)
+{
+	if (zhp->zpool_config)
+		nvlist_free(zhp->zpool_config);
+	if (zhp->zpool_old_config)
+		nvlist_free(zhp->zpool_old_config);
+	if (zhp->zpool_props)
+		nvlist_free(zhp->zpool_props);
+	free(zhp);
+}
+
+/*
+ * Return the name of the pool.
+ */
+const char *
+zpool_get_name(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_name);
+}
+
+
+/*
+ * Return the state of the pool (ACTIVE or UNAVAILABLE)
+ */
+int
+zpool_get_state(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_state);
+}
+
+/*
+ * Create the named pool, using the provided vdev list.  It is assumed
+ * that the consumer has already validated the contents of the nvlist, so we
+ * don't have to worry about error semantics.
+ */
+int
+zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
+    nvlist_t *props, nvlist_t *fsprops)
+{
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *zc_fsprops = NULL;
+	nvlist_t *zc_props = NULL;
+	char msg[1024];
+	char *altroot;
+	int ret = -1;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), pool);
+
+	if (!zpool_name_valid(hdl, B_FALSE, pool))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
+		return (-1);
+
+	if (props) {
+		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
+		    SPA_VERSION_1, B_TRUE, msg)) == NULL) {
+			goto create_failed;
+		}
+	}
+
+	if (fsprops) {
+		uint64_t zoned;
+		char *zonestr;
+
+		zoned = ((nvlist_lookup_string(fsprops,
+		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
+		    strcmp(zonestr, "on") == 0);
+
+		if ((zc_fsprops = zfs_valid_proplist(hdl,
+		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
+			goto create_failed;
+		}
+		if (!zc_props &&
+		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
+			goto create_failed;
+		}
+		if (nvlist_add_nvlist(zc_props,
+		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
+			goto create_failed;
+		}
+	}
+
+	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
+		goto create_failed;
+
+	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
+
+	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
+
+		zcmd_free_nvlists(&zc);
+		nvlist_free(zc_props);
+		nvlist_free(zc_fsprops);
+
+		switch (errno) {
+		case EBUSY:
+			/*
+			 * This can happen if the user has specified the same
+			 * device multiple times.  We can't reliably detect this
+			 * until we try to add it and see we already have a
+			 * label.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case EOVERFLOW:
+			/*
+			 * This occurs when one of the devices is below
+			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
+			 * device was the problem device since there's no
+			 * reliable way to determine device size from userland.
+			 */
+			{
+				char buf[64];
+
+				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "one or more devices is less than the "
+				    "minimum size (%s)"), buf);
+			}
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case ENOSPC:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is out of space"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		case ENOTBLK:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cache device must be a disk or disk slice"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
+		default:
+			return (zpool_standard_error(hdl, errno, msg));
+		}
+	}
+
+	/*
+	 * If this is an alternate root pool, then we automatically set the
+	 * mountpoint of the root dataset to be '/'.
+	 */
+	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
+	    &altroot) == 0) {
+		zfs_handle_t *zhp;
+
+		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
+		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
+		    "/") == 0);
+
+		zfs_close(zhp);
+	}
+
+create_failed:
+	zcmd_free_nvlists(&zc);
+	nvlist_free(zc_props);
+	nvlist_free(zc_fsprops);
+	return (ret);
+}
+
+/*
+ * Destroy the given pool.  It is up to the caller to ensure that there are no
+ * datasets left in the pool.
+ */
+int
+zpool_destroy(zpool_handle_t *zhp)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *zfp = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+
+	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
+	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
+		return (-1);
+
+	if (zpool_remove_zvol_links(zhp) != 0)
+		return (-1);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s'"), zhp->zpool_name);
+
+		if (errno == EROFS) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is read only"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		} else {
+			(void) zpool_standard_error(hdl, errno, msg);
+		}
+
+		if (zfp)
+			zfs_close(zfp);
+		return (-1);
+	}
+
+	if (zfp) {
+		remove_mountpoint(zfp);
+		zfs_close(zfp);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given vdevs to the pool.  The caller must have already performed the
+ * necessary verification to ensure that the vdev specification is well-formed.
+ */
+int
+zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
+{
+	zfs_cmd_t zc = { 0 };
+	int ret;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot add to '%s'"), zhp->zpool_name);
+
+	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
+	    SPA_VERSION_SPARES &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add hot spares"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
+	if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
+	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
+		uint64_t s;
+
+		for (s = 0; s < nspares; s++) {
+			char *path;
+
+			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
+			    &path) == 0 && pool_uses_efi(spares[s])) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "device '%s' contains an EFI label and "
+				    "cannot be used on root pools."),
+				    zpool_vdev_name(hdl, NULL, spares[s]));
+				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+			}
+		}
+	}
+
+	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
+	    SPA_VERSION_L2CACHE &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+	    &l2cache, &nl2cache) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add cache devices"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
+		return (-1);
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
+		switch (errno) {
+		case EBUSY:
+			/*
+			 * This can happen if the user has specified the same
+			 * device multiple times.  We can't reliably detect this
+			 * until we try to add it and see we already have a
+			 * label.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case EOVERFLOW:
+			/*
+			 * This occurrs when one of the devices is below
+			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
+			 * device was the problem device since there's no
+			 * reliable way to determine device size from userland.
+			 */
+			{
+				char buf[64];
+
+				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
+
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "device is less than the minimum "
+				    "size (%s)"), buf);
+			}
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to add these vdevs"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
+			break;
+
+		case EDOM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "root pool can not have multiple vdevs"
+			    " or separate logs"));
+			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
+			break;
+
+		case ENOTBLK:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cache device must be a disk or disk slice"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		default:
+			(void) zpool_standard_error(hdl, errno, msg);
+		}
+
+		ret = -1;
+	} else {
+		ret = 0;
+	}
+
+	zcmd_free_nvlists(&zc);
+
+	return (ret);
+}
+
+/*
+ * Exports the pool from the system.  The caller must ensure that there are no
+ * mounted datasets in the pool.
+ */
+int
+zpool_export(zpool_handle_t *zhp, boolean_t force)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+
+	if (zpool_remove_zvol_links(zhp) != 0)
+		return (-1);
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot export '%s'"), zhp->zpool_name);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_cookie = force;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
+		switch (errno) {
+		case EXDEV:
+			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
+			    "use '-f' to override the following errors:\n"
+			    "'%s' has an active shared spare which could be"
+			    " used by other pools once '%s' is exported."),
+			    zhp->zpool_name, zhp->zpool_name);
+			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
+			    msg));
+		default:
+			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
+			    msg));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * zpool_import() is a contracted interface. Should be kept the same
+ * if possible.
+ *
+ * Applications should use zpool_import_props() to import a pool with
+ * new properties value to be set.
+ */
+int
+zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+    char *altroot)
+{
+	nvlist_t *props = NULL;
+	int ret;
+
+	if (altroot != NULL) {
+		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+			return (zfs_error_fmt(hdl, EZFS_NOMEM,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		}
+
+		if (nvlist_add_string(props,
+		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
+			nvlist_free(props);
+			return (zfs_error_fmt(hdl, EZFS_NOMEM,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		}
+	}
+
+	ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
+	if (props)
+		nvlist_free(props);
+	return (ret);
+}
+
+/*
+ * Import the given pool using the known configuration and a list of
+ * properties to be set. The configuration should have come from
+ * zpool_find_import(). The 'newname' parameters control whether the pool
+ * is imported with a different name.
+ */
+int
+zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+    nvlist_t *props, boolean_t importfaulted)
+{
+	zfs_cmd_t zc = { 0 };
+	char *thename;
+	char *origname;
+	int ret;
+	char errbuf[1024];
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &origname) == 0);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot import pool '%s'"), origname);
+
+	if (newname != NULL) {
+		if (!zpool_name_valid(hdl, B_FALSE, newname))
+			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
+		thename = (char *)newname;
+	} else {
+		thename = origname;
+	}
+
+	if (props) {
+		uint64_t version;
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+		    &version) == 0);
+
+		if ((props = zpool_valid_proplist(hdl, origname,
+		    props, version, B_TRUE, errbuf)) == NULL) {
+			return (-1);
+		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
+			nvlist_free(props);
+			return (-1);
+		}
+	}
+
+	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &zc.zc_guid) == 0);
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
+		nvlist_free(props);
+		return (-1);
+	}
+
+	zc.zc_cookie = (uint64_t)importfaulted;
+	ret = 0;
+	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+		char desc[1024];
+		if (newname == NULL)
+			(void) snprintf(desc, sizeof (desc),
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    thename);
+		else
+			(void) snprintf(desc, sizeof (desc),
+			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
+			    origname, thename);
+
+		switch (errno) {
+		case ENOTSUP:
+			/*
+			 * Unsupported version.
+			 */
+			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
+			break;
+
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
+			break;
+
+		default:
+			(void) zpool_standard_error(hdl, errno, desc);
+		}
+
+		ret = -1;
+	} else {
+		zpool_handle_t *zhp;
+
+		/*
+		 * This should never fail, but play it safe anyway.
+		 */
+		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
+			ret = -1;
+		} else if (zhp != NULL) {
+			ret = zpool_create_zvol_links(zhp);
+			zpool_close(zhp);
+		}
+
+	}
+
+	zcmd_free_nvlists(&zc);
+	nvlist_free(props);
+
+	return (ret);
+}
+
+/*
+ * Scrub the pool.
+ */
+int
+zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_cookie = type;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
+		return (0);
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+
+	if (errno == EBUSY)
+		return (zfs_error(hdl, EZFS_RESILVERING, msg));
+	else
+		return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
+ * spare; but FALSE if its an INUSE spare.
+ */
+static nvlist_t *
+vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
+    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+{
+	uint_t c, children;
+	nvlist_t **child;
+	uint64_t theguid, present;
+	char *path;
+	uint64_t wholedisk = 0;
+	nvlist_t *ret;
+	uint64_t is_log;
+
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
+
+	if (search == NULL &&
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
+		/*
+		 * If the device has never been present since import, the only
+		 * reliable way to match the vdev is by GUID.
+		 */
+		if (theguid == guid)
+			return (nv);
+	} else if (search != NULL &&
+	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk);
+		if (wholedisk) {
+			/*
+			 * For whole disks, the internal path has 's0', but the
+			 * path passed in by the user doesn't.
+			 */
+			if (strlen(search) == strlen(path) - 2 &&
+			    strncmp(search, path, strlen(search)) == 0)
+				return (nv);
+		} else if (strcmp(search, path) == 0) {
+			return (nv);
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (NULL);
+
+	for (c = 0; c < children; c++) {
+		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+		    avail_spare, l2cache, NULL)) != NULL) {
+			/*
+			 * The 'is_log' value is only set for the toplevel
+			 * vdev, not the leaf vdevs.  So we always lookup the
+			 * log device from the root of the vdev tree (where
+			 * 'log' is non-NULL).
+			 */
+			if (log != NULL &&
+			    nvlist_lookup_uint64(child[c],
+			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
+			    is_log) {
+				*log = B_TRUE;
+			}
+			return (ret);
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    avail_spare, l2cache, NULL)) != NULL) {
+				*avail_spare = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    avail_spare, l2cache, NULL)) != NULL) {
+				*l2cache = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
+	return (NULL);
+}
+
+nvlist_t *
+zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
+    boolean_t *l2cache, boolean_t *log)
+{
+	char buf[MAXPATHLEN];
+	const char *search;
+	char *end;
+	nvlist_t *nvroot;
+	uint64_t guid;
+
+	guid = strtoull(path, &end, 10);
+	if (guid != 0 && *end == '\0') {
+		search = NULL;
+	} else if (path[0] != '/') {
+		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
+		search = buf;
+	} else {
+		search = path;
+	}
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	*avail_spare = B_FALSE;
+	*l2cache = B_FALSE;
+	if (log != NULL)
+		*log = B_FALSE;
+	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
+	    l2cache, log));
+}
+
+static int
+vdev_online(nvlist_t *nv)
+{
+	uint64_t ival;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
+	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
+		return (0);
+
+	return (1);
+}
+
+/*
+ * Get phys_path for a root pool
+ * Return 0 on success; non-zeron on failure.
+ */
+int
+zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
+{
+	nvlist_t *vdev_root;
+	nvlist_t **child;
+	uint_t count;
+	int i;
+
+	/*
+	 * Make sure this is a root pool, as phys_path doesn't mean
+	 * anything to a non-root pool.
+	 */
+	if (!pool_is_bootable(zhp))
+		return (-1);
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config,
+	    ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
+
+	if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
+	    &child, &count) != 0)
+		return (-2);
+
+	for (i = 0; i < count; i++) {
+		nvlist_t **child2;
+		uint_t count2;
+		char *type;
+		char *tmppath;
+		int j;
+
+		if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
+		    != 0)
+			return (-3);
+
+		if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+			if (!vdev_online(child[i]))
+				return (-8);
+			verify(nvlist_lookup_string(child[i],
+			    ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
+			(void) strncpy(physpath, tmppath, strlen(tmppath));
+		} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
+			if (nvlist_lookup_nvlist_array(child[i],
+			    ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
+				return (-4);
+
+			for (j = 0; j < count2; j++) {
+				if (!vdev_online(child2[j]))
+					return (-8);
+				if (nvlist_lookup_string(child2[j],
+				    ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
+					return (-5);
+
+				if ((strlen(physpath) + strlen(tmppath)) >
+				    MAXNAMELEN)
+					return (-6);
+
+				if (strlen(physpath) == 0) {
+					(void) strncpy(physpath, tmppath,
+					    strlen(tmppath));
+				} else {
+					(void) strcat(physpath, " ");
+					(void) strcat(physpath, tmppath);
+				}
+			}
+		} else {
+			return (-7);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Returns TRUE if the given guid corresponds to the given type.
+ * This is used to check for hot spares (INUSE or not), and level 2 cache
+ * devices.
+ */
+static boolean_t
+is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
+{
+	uint64_t target_guid;
+	nvlist_t *nvroot;
+	nvlist_t **list;
+	uint_t count;
+	int i;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
+			    &target_guid) == 0);
+			if (guid == target_guid)
+				return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Bring the specified vdev online.   The 'flags' parameter is a set of the
+ * ZFS_ONLINE_* flags.
+ */
+int
+zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
+    vdev_state_t *newstate)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (avail_spare ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	zc.zc_cookie = VDEV_STATE_ONLINE;
+	zc.zc_obj = flags;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
+		return (zpool_standard_error(hdl, errno, msg));
+
+	*newstate = zc.zc_cookie;
+	return (0);
+}
+
+/*
+ * Take the specified vdev offline
+ */
+int
+zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (avail_spare ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	zc.zc_cookie = VDEV_STATE_OFFLINE;
+	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+	case EBUSY:
+
+		/*
+		 * There are no other replicas of this device.
+		 */
+		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+
+	default:
+		return (zpool_standard_error(hdl, errno, msg));
+	}
+}
+
+/*
+ * Mark the given vdev faulted.
+ */
+int
+zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_guid = guid;
+	zc.zc_cookie = VDEV_STATE_FAULTED;
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+	case EBUSY:
+
+		/*
+		 * There are no other replicas of this device.
+		 */
+		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+
+	default:
+		return (zpool_standard_error(hdl, errno, msg));
+	}
+
+}
+
+/*
+ * Mark the given vdev degraded.
+ */
+int
+zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_guid = guid;
+	zc.zc_cookie = VDEV_STATE_DEGRADED;
+
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
+ * a hot spare.
+ */
+static boolean_t
+is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *type;
+
+	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
+		    &type) == 0);
+
+		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+		    children == 2 && child[which] == tgt)
+			return (B_TRUE);
+
+		for (c = 0; c < children; c++)
+			if (is_replacing_spare(child[c], tgt, which))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Attach new_disk (fully described by nvroot) to old_disk.
+ * If 'replacing' is specified, the new disk will replace the old one.
+ */
+int
+zpool_vdev_attach(zpool_handle_t *zhp,
+    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	int ret;
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache, islog;
+	uint64_t val;
+	char *path, *newname;
+	nvlist_t **child;
+	uint_t children;
+	nvlist_t *config_root;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	boolean_t rootpool = pool_is_bootable(zhp);
+
+	if (replacing)
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot replace %s with %s"), old_disk, new_disk);
+	else
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot attach %s to %s"), new_disk, old_disk);
+
+	/*
+	 * If this is a root pool, make sure that we're not attaching an
+	 * EFI labeled device.
+	 */
+	if (rootpool && pool_uses_efi(nvroot)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "EFI labeled devices are not supported on root pools."));
+		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+	}
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
+	    &islog)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (avail_spare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	if (l2cache)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+	zc.zc_cookie = replacing;
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0 || children != 1) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+	}
+
+	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
+
+	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
+		return (-1);
+
+	/*
+	 * If the target is a hot spare that has been swapped in, we can only
+	 * replace it with another hot spare.
+	 */
+	if (replacing &&
+	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
+	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
+	    NULL) == NULL || !avail_spare) &&
+	    is_replacing_spare(config_root, tgt, 1)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can only be replaced by another hot spare"));
+		free(newname);
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	/*
+	 * If we are attempting to replace a spare, it canot be applied to an
+	 * already spared device.
+	 */
+	if (replacing &&
+	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+	    zpool_find_vdev(zhp, newname, &avail_spare,
+	    &l2cache, NULL) != NULL && avail_spare &&
+	    is_replacing_spare(config_root, tgt, 0)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device has already been replaced with a spare"));
+		free(newname);
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	free(newname);
+
+	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
+		return (-1);
+
+	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
+
+	zcmd_free_nvlists(&zc);
+
+	if (ret == 0) {
+		if (rootpool) {
+			/*
+			 * XXX - This should be removed once we can
+			 * automatically install the bootblocks on the
+			 * newly attached disk.
+			 */
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please "
+			    "be sure to invoke %s to make '%s' bootable.\n"),
+			    BOOTCMD, new_disk);
+		}
+		return (0);
+	}
+
+	switch (errno) {
+	case ENOTSUP:
+		/*
+		 * Can't attach to or replace this type of vdev.
+		 */
+		if (replacing) {
+			if (islog)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "cannot replace a log with a spare"));
+			else
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "cannot replace a replacing device"));
+		} else {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "can only attach to mirrors and top-level "
+			    "disks"));
+		}
+		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
+		break;
+
+	case EINVAL:
+		/*
+		 * The new device must be a single disk.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+		    new_disk);
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case EOVERFLOW:
+		/*
+		 * The new device is too small.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device is too small"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case EDOM:
+		/*
+		 * The new device has a different alignment requirement.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "devices have different sector alignment"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		break;
+
+	case ENAMETOOLONG:
+		/*
+		 * The resulting top-level vdev spec won't fit in the label.
+		 */
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
+		break;
+
+	default:
+		(void) zpool_standard_error(hdl, errno, msg);
+	}
+
+	return (-1);
+}
+
+/*
+ * Detach the specified device.
+ */
+int
+zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (avail_spare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	if (l2cache)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
+		return (0);
+
+	switch (errno) {
+
+	case ENOTSUP:
+		/*
+		 * Can't detach from this type of vdev.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
+		    "applicable to mirror and replacing vdevs"));
+		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
+		break;
+
+	case EBUSY:
+		/*
+		 * There are no other replicas of this device.
+		 */
+		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
+		break;
+
+	default:
+		(void) zpool_standard_error(hdl, errno, msg);
+	}
+
+	return (-1);
+}
+
+/*
+ * Remove the given device.  Currently, this is supported only for hot spares
+ * and level 2 cache devices.
+ */
+int
+zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+	    NULL)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (!avail_spare && !l2cache) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "only inactive hot spares or cache devices "
+		    "can be removed"));
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+	}
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Clear the errors for the pool, or the particular device if specified.
+ */
+int
+zpool_clear(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t avail_spare, l2cache;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	if (path)
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
+		    path);
+	else
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
+		    zhp->zpool_name);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if (path) {
+		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
+		    &l2cache, NULL)) == 0)
+			return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+		/*
+		 * Don't allow error clearing for hot spares.  Do allow
+		 * error clearing for l2cache devices.
+		 */
+		if (avail_spare)
+			return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+		    &zc.zc_guid) == 0);
+	}
+
+	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Similar to zpool_clear(), but takes a GUID (used by fmd).
+ */
+int
+zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
+	    guid);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_guid = guid;
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
+}
+
+/*
+ * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
+ * hierarchy.
+ */
+int
+zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
+    void *data)
+{
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char (*paths)[MAXPATHLEN];
+	size_t size = 4;
+	int curr, fd, base, ret = 0;
+	DIR *dirp;
+	struct dirent *dp;
+	struct stat st;
+
+	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
+		return (errno == ENOENT ? 0 : -1);
+
+	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
+		int err = errno;
+		(void) close(base);
+		return (err == ENOENT ? 0 : -1);
+	}
+
+	/*
+	 * Oddly this wasn't a directory -- ignore that failure since we
+	 * know there are no links lower in the (non-existant) hierarchy.
+	 */
+	if (!S_ISDIR(st.st_mode)) {
+		(void) close(base);
+		return (0);
+	}
+
+	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
+		(void) close(base);
+		return (-1);
+	}
+
+	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
+	curr = 0;
+
+	while (curr >= 0) {
+		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
+			goto err;
+
+		if (S_ISDIR(st.st_mode)) {
+			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
+				goto err;
+
+			if ((dirp = fdopendir(fd)) == NULL) {
+				(void) close(fd);
+				goto err;
+			}
+
+			while ((dp = readdir(dirp)) != NULL) {
+				if (dp->d_name[0] == '.')
+					continue;
+
+				if (curr + 1 == size) {
+					paths = zfs_realloc(hdl, paths,
+					    size * sizeof (paths[0]),
+					    size * 2 * sizeof (paths[0]));
+					if (paths == NULL) {
+						(void) closedir(dirp);
+						(void) close(fd);
+						goto err;
+					}
+
+					size *= 2;
+				}
+
+				(void) strlcpy(paths[curr + 1], paths[curr],
+				    sizeof (paths[curr + 1]));
+				(void) strlcat(paths[curr], "/",
+				    sizeof (paths[curr]));
+				(void) strlcat(paths[curr], dp->d_name,
+				    sizeof (paths[curr]));
+				curr++;
+			}
+
+			(void) closedir(dirp);
+
+		} else {
+			if ((ret = cb(paths[curr], data)) != 0)
+				break;
+		}
+
+		curr--;
+	}
+
+	free(paths);
+	(void) close(base);
+
+	return (ret);
+
+err:
+	free(paths);
+	(void) close(base);
+	return (-1);
+}
+
+typedef struct zvol_cb {
+	zpool_handle_t *zcb_pool;
+	boolean_t zcb_create;
+} zvol_cb_t;
+
+/*ARGSUSED*/
+static int
+do_zvol_create(zfs_handle_t *zhp, void *data)
+{
+	int ret = 0;
+
+	if (ZFS_IS_VOLUME(zhp)) {
+		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
+	}
+
+	if (ret == 0)
+		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
+
+	zfs_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * Iterate over all zvols in the pool and make any necessary minor nodes.
+ */
+int
+zpool_create_zvol_links(zpool_handle_t *zhp)
+{
+	zfs_handle_t *zfp;
+	int ret;
+
+	/*
+	 * If the pool is unavailable, just return success.
+	 */
+	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+	    zhp->zpool_name)) == NULL)
+		return (0);
+
+	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
+
+	zfs_close(zfp);
+	return (ret);
+}
+
+static int
+do_zvol_remove(const char *dataset, void *data)
+{
+	zpool_handle_t *zhp = data;
+
+	return (zvol_remove_link(zhp->zpool_hdl, dataset));
+}
+
+/*
+ * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
+ * by examining the /dev links so that a corrupted pool doesn't impede this
+ * operation.
+ */
+int
+zpool_remove_zvol_links(zpool_handle_t *zhp)
+{
+	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
+}
+
+/*
+ * Convert from a devid string to a path.
+ */
+static char *
+devid_to_path(char *devid_str)
+{
+	ddi_devid_t devid;
+	char *minor;
+	char *path;
+	devid_nmlist_t *list = NULL;
+	int ret;
+
+	if (devid_str_decode(devid_str, &devid, &minor) != 0)
+		return (NULL);
+
+	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
+
+	devid_str_free(minor);
+	devid_free(devid);
+
+	if (ret != 0)
+		return (NULL);
+
+	if ((path = strdup(list[0].devname)) == NULL)
+		return (NULL);
+
+	devid_free_nmlist(list);
+
+	return (path);
+}
+
+/*
+ * Convert from a path to a devid string.
+ */
+static char *
+path_to_devid(const char *path)
+{
+	int fd;
+	ddi_devid_t devid;
+	char *minor, *ret;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (NULL);
+
+	minor = NULL;
+	ret = NULL;
+	if (devid_get(fd, &devid) == 0) {
+		if (devid_get_minor_name(fd, &minor) == 0)
+			ret = devid_str_encode(devid, minor);
+		if (minor != NULL)
+			devid_str_free(minor);
+		devid_free(devid);
+	}
+	(void) close(fd);
+
+	return (ret);
+}
+
+/*
+ * Issue the necessary ioctl() to update the stored path value for the vdev.  We
+ * ignore any failure here, since a common case is for an unprivileged user to
+ * type 'zpool status', and we'll display the correct information anyway.
+ */
+static void
+set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+
+	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+	    &zc.zc_guid) == 0);
+
+	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
+}
+
+/*
+ * Given a vdev, return the name to display in iostat.  If the vdev has a path,
+ * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
+ * We also check if this is a whole disk, in which case we strip off the
+ * trailing 's0' slice name.
+ *
+ * This routine is also responsible for identifying when disks have been
+ * reconfigured in a new location.  The kernel will have opened the device by
+ * devid, but the path will still refer to the old location.  To catch this, we
+ * first do a path -> devid translation (which is fast for the common case).  If
+ * the devid matches, we're done.  If not, we do a reverse devid -> path
+ * translation and issue the appropriate ioctl() to update the path of the vdev.
+ * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
+ * of these checks.
+ */
+char *
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
+{
+	char *path, *devid;
+	uint64_t value;
+	char buf[64];
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+	    &value) == 0) {
+		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+		    &value) == 0);
+		(void) snprintf(buf, sizeof (buf), "%llu",
+		    (u_longlong_t)value);
+		path = buf;
+	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
+
+		/*
+		 * If the device is dead (faulted, offline, etc) then don't
+		 * bother opening it.  Otherwise we may be forcing the user to
+		 * open a misbehaving device, which can have undesirable
+		 * effects.
+		 */
+		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &vsc) != 0 ||
+		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
+		    zhp != NULL &&
+		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
+			/*
+			 * Determine if the current path is correct.
+			 */
+			char *newdevid = path_to_devid(path);
+
+			if (newdevid == NULL ||
+			    strcmp(devid, newdevid) != 0) {
+				char *newpath;
+
+				if ((newpath = devid_to_path(devid)) != NULL) {
+					/*
+					 * Update the path appropriately.
+					 */
+					set_path(zhp, nv, newpath);
+					if (nvlist_add_string(nv,
+					    ZPOOL_CONFIG_PATH, newpath) == 0)
+						verify(nvlist_lookup_string(nv,
+						    ZPOOL_CONFIG_PATH,
+						    &path) == 0);
+					free(newpath);
+				}
+			}
+
+			if (newdevid)
+				devid_str_free(newdevid);
+		}
+
+		if (strncmp(path, "/dev/dsk/", 9) == 0)
+			path += 9;
+
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &value) == 0 && value) {
+			char *tmp = zfs_strdup(hdl, path);
+			if (tmp == NULL)
+				return (NULL);
+			tmp[strlen(path) - 2] = '\0';
+			return (tmp);
+		}
+	} else {
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
+
+		/*
+		 * If it's a raidz device, we need to stick in the parity level.
+		 */
+		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+			    &value) == 0);
+			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
+			    (u_longlong_t)value);
+			path = buf;
+		}
+	}
+
+	return (zfs_strdup(hdl, path));
+}
+
+static int
+zbookmark_compare(const void *a, const void *b)
+{
+	return (memcmp(a, b, sizeof (zbookmark_t)));
+}
+
+/*
+ * Retrieve the persistent error log, uniquify the members, and return to the
+ * caller.
+ */
+int
+zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
+{
+	zfs_cmd_t zc = { 0 };
+	uint64_t count;
+	zbookmark_t *zb = NULL;
+	int i;
+
+	/*
+	 * Retrieve the raw error list from the kernel.  If the number of errors
+	 * has increased, allocate more space and continue until we get the
+	 * entire list.
+	 */
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
+	    &count) == 0);
+	if (count == 0)
+		return (0);
+	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
+	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
+		return (-1);
+	zc.zc_nvlist_dst_size = count;
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+	for (;;) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
+		    &zc) != 0) {
+			free((void *)(uintptr_t)zc.zc_nvlist_dst);
+			if (errno == ENOMEM) {
+				count = zc.zc_nvlist_dst_size;
+				if ((zc.zc_nvlist_dst = (uintptr_t)
+				    zfs_alloc(zhp->zpool_hdl, count *
+				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
+					return (-1);
+			} else {
+				return (-1);
+			}
+		} else {
+			break;
+		}
+	}
+
+	/*
+	 * Sort the resulting bookmarks.  This is a little confusing due to the
+	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
+	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
+	 * _not_ copied as part of the process.  So we point the start of our
+	 * array appropriate and decrement the total number of elements.
+	 */
+	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
+	    zc.zc_nvlist_dst_size;
+	count -= zc.zc_nvlist_dst_size;
+
+	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
+
+	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
+
+	/*
+	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
+	 */
+	for (i = 0; i < count; i++) {
+		nvlist_t *nv;
+
+		/* ignoring zb_blkid and zb_level for now */
+		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
+		    zb[i-1].zb_object == zb[i].zb_object)
+			continue;
+
+		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
+			goto nomem;
+		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
+		    zb[i].zb_objset) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
+		    zb[i].zb_object) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
+			nvlist_free(nv);
+			goto nomem;
+		}
+		nvlist_free(nv);
+	}
+
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (0);
+
+nomem:
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (no_memory(zhp->zpool_hdl));
+}
+
+/*
+ * Upgrade a ZFS pool to the latest on-disk version.
+ */
+int
+zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strcpy(zc.zc_name, zhp->zpool_name);
+	zc.zc_cookie = new_version;
+
+	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
+		return (zpool_standard_error_fmt(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
+		    zhp->zpool_name));
+	return (0);
+}
+
+void
+zpool_set_history_str(const char *subcommand, int argc, char **argv,
+    char *history_str)
+{
+	int i;
+
+	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
+	for (i = 1; i < argc; i++) {
+		if (strlen(history_str) + 1 + strlen(argv[i]) >
+		    HIS_MAX_RECORD_LEN)
+			break;
+		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
+		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
+	}
+}
+
+/*
+ * Stage command history for logging.
+ */
+int
+zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
+{
+	if (history_str == NULL)
+		return (EINVAL);
+
+	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
+		return (EINVAL);
+
+	if (hdl->libzfs_log_str != NULL)
+		free(hdl->libzfs_log_str);
+
+	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
+		return (no_memory(hdl));
+
+	return (0);
+}
+
+/*
+ * Perform ioctl to get some command history of a pool.
+ *
+ * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
+ * logical offset of the history buffer to start reading from.
+ *
+ * Upon return, 'off' is the next logical offset to read from and
+ * 'len' is the actual amount of bytes read into 'buf'.
+ */
+static int
+get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+
+	zc.zc_history = (uint64_t)(uintptr_t)buf;
+	zc.zc_history_len = *len;
+	zc.zc_history_offset = *off;
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
+		switch (errno) {
+		case EPERM:
+			return (zfs_error_fmt(hdl, EZFS_PERM,
+			    dgettext(TEXT_DOMAIN,
+			    "cannot show history for pool '%s'"),
+			    zhp->zpool_name));
+		case ENOENT:
+			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
+			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
+			    "'%s'"), zhp->zpool_name));
+		case ENOTSUP:
+			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
+			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
+			    "'%s', pool must be upgraded"), zhp->zpool_name));
+		default:
+			return (zpool_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "cannot get history for '%s'"), zhp->zpool_name));
+		}
+	}
+
+	*len = zc.zc_history_len;
+	*off = zc.zc_history_offset;
+
+	return (0);
+}
+
+/*
+ * Process the buffer of nvlists, unpacking and storing each nvlist record
+ * into 'records'.  'leftover' is set to the number of bytes that weren't
+ * processed as there wasn't a complete record.
+ */
+static int
+zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
+    nvlist_t ***records, uint_t *numrecords)
+{
+	uint64_t reclen;
+	nvlist_t *nv;
+	int i;
+
+	while (bytes_read > sizeof (reclen)) {
+
+		/* get length of packed record (stored as little endian) */
+		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
+			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
+
+		if (bytes_read < sizeof (reclen) + reclen)
+			break;
+
+		/* unpack record */
+		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
+			return (ENOMEM);
+		bytes_read -= sizeof (reclen) + reclen;
+		buf += sizeof (reclen) + reclen;
+
+		/* add record to nvlist array */
+		(*numrecords)++;
+		if (ISP2(*numrecords + 1)) {
+			*records = realloc(*records,
+			    *numrecords * 2 * sizeof (nvlist_t *));
+		}
+		(*records)[*numrecords - 1] = nv;
+	}
+
+	*leftover = bytes_read;
+	return (0);
+}
+
+#define	HIS_BUF_LEN	(128*1024)
+
+/*
+ * Retrieve the command history of a pool.
+ */
+int
+zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
+{
+	char buf[HIS_BUF_LEN];
+	uint64_t off = 0;
+	nvlist_t **records = NULL;
+	uint_t numrecords = 0;
+	int err, i;
+
+	do {
+		uint64_t bytes_read = sizeof (buf);
+		uint64_t leftover;
+
+		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
+			break;
+
+		/* if nothing else was read in, we're at EOF, just return */
+		if (!bytes_read)
+			break;
+
+		if ((err = zpool_history_unpack(buf, bytes_read,
+		    &leftover, &records, &numrecords)) != 0)
+			break;
+		off -= leftover;
+
+		/* CONSTCOND */
+	} while (1);
+
+	if (!err) {
+		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
+		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
+		    records, numrecords) == 0);
+	}
+	for (i = 0; i < numrecords; i++)
+		nvlist_free(records[i]);
+	free(records);
+
+	return (err);
+}
+
+void
+zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
+    char *pathname, size_t len)
+{
+	zfs_cmd_t zc = { 0 };
+	boolean_t mounted = B_FALSE;
+	char *mntpnt = NULL;
+	char dsname[MAXNAMELEN];
+
+	if (dsobj == 0) {
+		/* special case for the MOS */
+		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
+		return;
+	}
+
+	/* get the dataset's name */
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_obj = dsobj;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd,
+	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
+		/* just write out a path of two object numbers */
+		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
+		    dsobj, obj);
+		return;
+	}
+	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
+
+	/* find out if the dataset is mounted */
+	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
+
+	/* get the corrupted object's path */
+	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
+	zc.zc_obj = obj;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
+	    &zc) == 0) {
+		if (mounted) {
+			(void) snprintf(pathname, len, "%s%s", mntpnt,
+			    zc.zc_value);
+		} else {
+			(void) snprintf(pathname, len, "%s:%s",
+			    dsname, zc.zc_value);
+		}
+	} else {
+		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
+	}
+	free(mntpnt);
+}
+
+#define	RDISK_ROOT	"/dev/rdsk"
+#define	BACKUP_SLICE	"s2"
+/*
+ * Don't start the slice at the default block of 34; many storage
+ * devices will use a stripe width of 128k, so start there instead.
+ */
+#define	NEW_START_BLOCK	256
+
+/*
+ * Read the EFI label from the config, if a label does not exist then
+ * pass back the error to the caller. If the caller has passed a non-NULL
+ * diskaddr argument then we set it to the starting address of the EFI
+ * partition.
+ */
+static int
+read_efi_label(nvlist_t *config, diskaddr_t *sb)
+{
+	char *path;
+	int fd;
+	char diskname[MAXPATHLEN];
+	int err = -1;
+
+	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
+		return (err);
+
+	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
+	    strrchr(path, '/'));
+	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
+		struct dk_gpt *vtoc;
+
+		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
+			if (sb != NULL)
+				*sb = vtoc->efi_parts[0].p_start;
+			efi_free(vtoc);
+		}
+		(void) close(fd);
+	}
+	return (err);
+}
+
+/*
+ * determine where a partition starts on a disk in the current
+ * configuration
+ */
+static diskaddr_t
+find_start_block(nvlist_t *config)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	diskaddr_t sb = MAXOFFSET_T;
+	uint64_t wholedisk;
+
+	if (nvlist_lookup_nvlist_array(config,
+	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
+		if (nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk) != 0 || !wholedisk) {
+			return (MAXOFFSET_T);
+		}
+		if (read_efi_label(config, &sb) < 0)
+			sb = MAXOFFSET_T;
+		return (sb);
+	}
+
+	for (c = 0; c < children; c++) {
+		sb = find_start_block(child[c]);
+		if (sb != MAXOFFSET_T) {
+			return (sb);
+		}
+	}
+	return (MAXOFFSET_T);
+}
+
+/*
+ * Label an individual disk.  The name provided is the short name,
+ * stripped of any leading /dev path.
+ */
+int
+zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
+{
+	char path[MAXPATHLEN];
+	struct dk_gpt *vtoc;
+	int fd;
+	size_t resv = EFI_MIN_RESV_SIZE;
+	uint64_t slice_size;
+	diskaddr_t start_block;
+	char errbuf[1024];
+
+	/* prepare an error message just in case */
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
+
+	if (zhp) {
+		nvlist_t *nvroot;
+
+		if (pool_is_bootable(zhp)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "EFI labeled devices are not supported on root "
+			    "pools."));
+			return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
+		}
+
+		verify(nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+
+		if (zhp->zpool_start_block == 0)
+			start_block = find_start_block(nvroot);
+		else
+			start_block = zhp->zpool_start_block;
+		zhp->zpool_start_block = start_block;
+	} else {
+		/* new pool */
+		start_block = NEW_START_BLOCK;
+	}
+
+	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
+	    BACKUP_SLICE);
+
+	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+		/*
+		 * This shouldn't happen.  We've long since verified that this
+		 * is a valid device.
+		 */
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "unable to open device"));
+		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+	}
+
+	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
+		/*
+		 * The only way this can fail is if we run out of memory, or we
+		 * were unable to read the disk's capacity
+		 */
+		if (errno == ENOMEM)
+			(void) no_memory(hdl);
+
+		(void) close(fd);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "unable to read disk capacity"), name);
+
+		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+	}
+
+	slice_size = vtoc->efi_last_u_lba + 1;
+	slice_size -= EFI_MIN_RESV_SIZE;
+	if (start_block == MAXOFFSET_T)
+		start_block = NEW_START_BLOCK;
+	slice_size -= start_block;
+
+	vtoc->efi_parts[0].p_start = start_block;
+	vtoc->efi_parts[0].p_size = slice_size;
+
+	/*
+	 * Why we use V_USR: V_BACKUP confuses users, and is considered
+	 * disposable by some EFI utilities (since EFI doesn't have a backup
+	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
+	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
+	 * etc. were all pretty specific.  V_USR is as close to reality as we
+	 * can get, in the absence of V_OTHER.
+	 */
+	vtoc->efi_parts[0].p_tag = V_USR;
+	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
+
+	vtoc->efi_parts[8].p_start = slice_size + start_block;
+	vtoc->efi_parts[8].p_size = resv;
+	vtoc->efi_parts[8].p_tag = V_RESERVED;
+
+	if (efi_write(fd, vtoc) != 0) {
+		/*
+		 * Some block drivers (like pcata) may not support EFI
+		 * GPT labels.  Print out a helpful error message dir-
+		 * ecting the user to manually label the disk and give
+		 * a specific slice.
+		 */
+		(void) close(fd);
+		efi_free(vtoc);
+
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "try using fdisk(1M) and then provide a specific slice"));
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+
+	(void) close(fd);
+	efi_free(vtoc);
+	return (0);
+}
+
+static boolean_t
+supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
+{
+	char *type;
+	nvlist_t **child;
+	uint_t children, c;
+
+	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
+	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
+	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
+	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
+	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "vdev type '%s' is not supported"), type);
+		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
+		return (B_FALSE);
+	}
+	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
+				return (B_FALSE);
+		}
+	}
+	return (B_TRUE);
+}
+
+/*
+ * check if this zvol is allowable for use as a dump device; zero if
+ * it is, > 0 if it isn't, < 0 if it isn't a zvol
+ */
+int
+zvol_check_dump_config(char *arg)
+{
+	zpool_handle_t *zhp = NULL;
+	nvlist_t *config, *nvroot;
+	char *p, *volname;
+	nvlist_t **top;
+	uint_t toplevels;
+	libzfs_handle_t *hdl;
+	char errbuf[1024];
+	char poolname[ZPOOL_MAXNAMELEN];
+	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
+	int ret = 1;
+
+	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
+		return (-1);
+	}
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "dump is not supported on device '%s'"), arg);
+
+	if ((hdl = libzfs_init()) == NULL)
+		return (1);
+	libzfs_print_on_error(hdl, B_TRUE);
+
+	volname = arg + pathlen;
+
+	/* check the configuration of the pool */
+	if ((p = strchr(volname, '/')) == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "malformed dataset name"));
+		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
+		return (1);
+	} else if (p - volname >= ZFS_MAXNAMELEN) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset name is too long"));
+		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
+		return (1);
+	} else {
+		(void) strncpy(poolname, volname, p - volname);
+		poolname[p - volname] = '\0';
+	}
+
+	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "could not open pool '%s'"), poolname);
+		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
+		goto out;
+	}
+	config = zpool_get_config(zhp, NULL);
+	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "could not obtain vdev configuration for  '%s'"), poolname);
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
+		goto out;
+	}
+
+	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &top, &toplevels) == 0);
+	if (toplevels != 1) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "'%s' has multiple top level vdevs"), poolname);
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
+		goto out;
+	}
+
+	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
+		goto out;
+	}
+	ret = 0;
+
+out:
+	if (zhp)
+		zpool_close(zhp);
+	libzfs_fini(hdl);
+	return (ret);
+}
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
new file mode 100644
index 000000000..a3ed5cea8
--- /dev/null
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -0,0 +1,2102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libdevinfo.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/avl.h>
+#include <stddef.h>
+
+#include <libzfs.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+#include <fletcher.c> /* XXX */
+
+static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
+    int, avl_tree_t *, char **);
+
+/*
+ * Routines for dealing with the AVL tree of fs-nvlists
+ */
+typedef struct fsavl_node {
+	avl_node_t fn_node;
+	nvlist_t *fn_nvfs;
+	char *fn_snapname;
+	uint64_t fn_guid;
+} fsavl_node_t;
+
+static int
+fsavl_compare(const void *arg1, const void *arg2)
+{
+	const fsavl_node_t *fn1 = arg1;
+	const fsavl_node_t *fn2 = arg2;
+
+	if (fn1->fn_guid > fn2->fn_guid)
+		return (+1);
+	else if (fn1->fn_guid < fn2->fn_guid)
+		return (-1);
+	else
+		return (0);
+}
+
+/*
+ * Given the GUID of a snapshot, find its containing filesystem and
+ * (optionally) name.
+ */
+static nvlist_t *
+fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
+{
+	fsavl_node_t fn_find;
+	fsavl_node_t *fn;
+
+	fn_find.fn_guid = snapguid;
+
+	fn = avl_find(avl, &fn_find, NULL);
+	if (fn) {
+		if (snapname)
+			*snapname = fn->fn_snapname;
+		return (fn->fn_nvfs);
+	}
+	return (NULL);
+}
+
+static void
+fsavl_destroy(avl_tree_t *avl)
+{
+	fsavl_node_t *fn;
+	void *cookie;
+
+	if (avl == NULL)
+		return;
+
+	cookie = NULL;
+	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
+		free(fn);
+	avl_destroy(avl);
+	free(avl);
+}
+
+static avl_tree_t *
+fsavl_create(nvlist_t *fss)
+{
+	avl_tree_t *fsavl;
+	nvpair_t *fselem = NULL;
+
+	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
+		return (NULL);
+
+	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
+	    offsetof(fsavl_node_t, fn_node));
+
+	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
+		nvlist_t *nvfs, *snaps;
+		nvpair_t *snapelem = NULL;
+
+		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
+		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+
+		while ((snapelem =
+		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
+			fsavl_node_t *fn;
+			uint64_t guid;
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
+			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
+				fsavl_destroy(fsavl);
+				return (NULL);
+			}
+			fn->fn_nvfs = nvfs;
+			fn->fn_snapname = nvpair_name(snapelem);
+			fn->fn_guid = guid;
+
+			/*
+			 * Note: if there are multiple snaps with the
+			 * same GUID, we ignore all but one.
+			 */
+			if (avl_find(fsavl, fn, NULL) == NULL)
+				avl_add(fsavl, fn);
+			else
+				free(fn);
+		}
+	}
+
+	return (fsavl);
+}
+
+/*
+ * Routines for dealing with the giant nvlist of fs-nvlists, etc.
+ */
+typedef struct send_data {
+	uint64_t parent_fromsnap_guid;
+	nvlist_t *parent_snaps;
+	nvlist_t *fss;
+	nvlist_t *snapprops;
+	const char *fromsnap;
+	const char *tosnap;
+
+	/*
+	 * The header nvlist is of the following format:
+	 * {
+	 *   "tosnap" -> string
+	 *   "fromsnap" -> string (if incremental)
+	 *   "fss" -> {
+	 *	id -> {
+	 *
+	 *	 "name" -> string (full name; for debugging)
+	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
+	 *
+	 *	 "props" -> { name -> value (only if set here) }
+	 *	 "snaps" -> { name (lastname) -> number (guid) }
+	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
+	 *
+	 *	 "origin" -> number (guid) (if clone)
+	 *	 "sent" -> boolean (not on-disk)
+	 *	}
+	 *   }
+	 * }
+	 *
+	 */
+} send_data_t;
+
+static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
+
+static int
+send_iterate_snap(zfs_handle_t *zhp, void *arg)
+{
+	send_data_t *sd = arg;
+	uint64_t guid = zhp->zfs_dmustats.dds_guid;
+	char *snapname;
+	nvlist_t *nv;
+
+	snapname = strrchr(zhp->zfs_name, '@')+1;
+
+	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
+	/*
+	 * NB: if there is no fromsnap here (it's a newly created fs in
+	 * an incremental replication), we will substitute the tosnap.
+	 */
+	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
+	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
+	    strcmp(snapname, sd->tosnap) == 0)) {
+		sd->parent_fromsnap_guid = guid;
+	}
+
+	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
+	send_iterate_prop(zhp, nv);
+	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
+	nvlist_free(nv);
+
+	zfs_close(zhp);
+	return (0);
+}
+
+static void
+send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
+{
+	nvpair_t *elem = NULL;
+
+	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
+		char *propname = nvpair_name(elem);
+		zfs_prop_t prop = zfs_name_to_prop(propname);
+		nvlist_t *propnv;
+
+		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
+			continue;
+
+		verify(nvpair_value_nvlist(elem, &propnv) == 0);
+		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) {
+			/* these guys are modifyable, but have no source */
+			uint64_t value;
+			verify(nvlist_lookup_uint64(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
+				continue;
+		} else {
+			char *source;
+			if (nvlist_lookup_string(propnv,
+			    ZPROP_SOURCE, &source) != 0)
+				continue;
+			if (strcmp(source, zhp->zfs_name) != 0)
+				continue;
+		}
+
+		if (zfs_prop_user(propname) ||
+		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
+			char *value;
+			verify(nvlist_lookup_string(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			VERIFY(0 == nvlist_add_string(nv, propname, value));
+		} else {
+			uint64_t value;
+			verify(nvlist_lookup_uint64(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
+		}
+	}
+}
+
+static int
+send_iterate_fs(zfs_handle_t *zhp, void *arg)
+{
+	send_data_t *sd = arg;
+	nvlist_t *nvfs, *nv;
+	int rv;
+	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
+	uint64_t guid = zhp->zfs_dmustats.dds_guid;
+	char guidstring[64];
+
+	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
+	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
+	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
+	    sd->parent_fromsnap_guid));
+
+	if (zhp->zfs_dmustats.dds_origin[0]) {
+		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
+		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+		if (origin == NULL)
+			return (-1);
+		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
+		    origin->zfs_dmustats.dds_guid));
+	}
+
+	/* iterate over props */
+	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
+	send_iterate_prop(zhp, nv);
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
+	nvlist_free(nv);
+
+	/* iterate over snaps, and set sd->parent_fromsnap_guid */
+	sd->parent_fromsnap_guid = 0;
+	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
+	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
+	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
+	nvlist_free(sd->parent_snaps);
+	nvlist_free(sd->snapprops);
+
+	/* add this fs to nvlist */
+	(void) snprintf(guidstring, sizeof (guidstring),
+	    "0x%llx", (longlong_t)guid);
+	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
+	nvlist_free(nvfs);
+
+	/* iterate over children */
+	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
+
+	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
+
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
+    const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
+{
+	zfs_handle_t *zhp;
+	send_data_t sd = { 0 };
+	int error;
+
+	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+	if (zhp == NULL)
+		return (EZFS_BADTYPE);
+
+	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
+	sd.fromsnap = fromsnap;
+	sd.tosnap = tosnap;
+
+	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
+		nvlist_free(sd.fss);
+		if (avlp != NULL)
+			*avlp = NULL;
+		*nvlp = NULL;
+		return (error);
+	}
+
+	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
+		nvlist_free(sd.fss);
+		*nvlp = NULL;
+		return (EZFS_NOMEM);
+	}
+
+	*nvlp = sd.fss;
+	return (0);
+}
+
+/*
+ * Routines for dealing with the sorted snapshot functionality
+ */
+typedef struct zfs_node {
+	zfs_handle_t	*zn_handle;
+	avl_node_t	zn_avlnode;
+} zfs_node_t;
+
+static int
+zfs_sort_snaps(zfs_handle_t *zhp, void *data)
+{
+	avl_tree_t *avl = data;
+	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
+
+	node->zn_handle = zhp;
+	avl_add(avl, node);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+zfs_snapshot_compare(const void *larg, const void *rarg)
+{
+	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
+	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
+	uint64_t lcreate, rcreate;
+
+	/*
+	 * Sort them according to creation time.  We use the hidden
+	 * CREATETXG property to get an absolute ordering of snapshots.
+	 */
+	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
+	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
+
+	if (lcreate < rcreate)
+		return (-1);
+	else if (lcreate > rcreate)
+		return (+1);
+	else
+		return (0);
+}
+
+static int
+zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
+{
+	int ret = 0;
+	zfs_node_t *node;
+	avl_tree_t avl;
+	void *cookie = NULL;
+
+	avl_create(&avl, zfs_snapshot_compare,
+	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
+
+	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
+
+	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
+		ret |= callback(node->zn_handle, data);
+
+	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
+		free(node);
+
+	avl_destroy(&avl);
+
+	return (ret);
+}
+
+/*
+ * Routines specific to "zfs send"
+ */
+typedef struct send_dump_data {
+	/* these are all just the short snapname (the part after the @) */
+	const char *fromsnap;
+	const char *tosnap;
+	char lastsnap[ZFS_MAXNAMELEN];
+	boolean_t seenfrom, seento, replicate, doall, fromorigin;
+	boolean_t verbose;
+	int outfd;
+	boolean_t err;
+	nvlist_t *fss;
+	avl_tree_t *fsavl;
+} send_dump_data_t;
+
+/*
+ * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
+ * NULL) to the file descriptor specified by outfd.
+ */
+static int
+dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
+    int outfd)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	if (fromsnap)
+		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
+	zc.zc_cookie = outfd;
+	zc.zc_obj = fromorigin;
+
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
+		char errbuf[1024];
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "warning: cannot send '%s'"), zhp->zfs_name);
+
+		switch (errno) {
+
+		case EXDEV:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not an earlier snapshot from the same fs"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+
+		case ENOENT:
+			if (zfs_dataset_exists(hdl, zc.zc_name,
+			    ZFS_TYPE_SNAPSHOT)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "incremental source (@%s) does not exist"),
+				    zc.zc_value);
+			}
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		case EDQUOT:
+		case EFBIG:
+		case EIO:
+		case ENOLINK:
+		case ENOSPC:
+		case ENOSTR:
+		case ENXIO:
+		case EPIPE:
+		case ERANGE:
+		case EFAULT:
+		case EROFS:
+			zfs_error_aux(hdl, strerror(errno));
+			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+static int
+dump_snapshot(zfs_handle_t *zhp, void *arg)
+{
+	send_dump_data_t *sdd = arg;
+	const char *thissnap;
+	int err;
+
+	thissnap = strchr(zhp->zfs_name, '@') + 1;
+
+	if (sdd->fromsnap && !sdd->seenfrom &&
+	    strcmp(sdd->fromsnap, thissnap) == 0) {
+		sdd->seenfrom = B_TRUE;
+		(void) strcpy(sdd->lastsnap, thissnap);
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (sdd->seento || !sdd->seenfrom) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* send it */
+	if (sdd->verbose) {
+		(void) fprintf(stderr, "sending from @%s to %s\n",
+		    sdd->lastsnap, zhp->zfs_name);
+	}
+
+	err = dump_ioctl(zhp, sdd->lastsnap,
+	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
+	    sdd->outfd);
+
+	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
+		sdd->seento = B_TRUE;
+
+	(void) strcpy(sdd->lastsnap, thissnap);
+	zfs_close(zhp);
+	return (err);
+}
+
+static int
+dump_filesystem(zfs_handle_t *zhp, void *arg)
+{
+	int rv = 0;
+	send_dump_data_t *sdd = arg;
+	boolean_t missingfrom = B_FALSE;
+	zfs_cmd_t zc = { 0 };
+
+	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
+	    zhp->zfs_name, sdd->tosnap);
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+		(void) fprintf(stderr, "WARNING: "
+		    "could not send %s@%s: does not exist\n",
+		    zhp->zfs_name, sdd->tosnap);
+		sdd->err = B_TRUE;
+		return (0);
+	}
+
+	if (sdd->replicate && sdd->fromsnap) {
+		/*
+		 * If this fs does not have fromsnap, and we're doing
+		 * recursive, we need to send a full stream from the
+		 * beginning (or an incremental from the origin if this
+		 * is a clone).  If we're doing non-recursive, then let
+		 * them get the error.
+		 */
+		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
+		    zhp->zfs_name, sdd->fromsnap);
+		if (ioctl(zhp->zfs_hdl->libzfs_fd,
+		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+			missingfrom = B_TRUE;
+		}
+	}
+
+	if (sdd->doall) {
+		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
+		if (sdd->fromsnap == NULL || missingfrom)
+			sdd->seenfrom = B_TRUE;
+
+		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
+		if (!sdd->seenfrom) {
+			(void) fprintf(stderr,
+			    "WARNING: could not send %s@%s:\n"
+			    "incremental source (%s@%s) does not exist\n",
+			    zhp->zfs_name, sdd->tosnap,
+			    zhp->zfs_name, sdd->fromsnap);
+			sdd->err = B_TRUE;
+		} else if (!sdd->seento) {
+			(void) fprintf(stderr,
+			    "WARNING: could not send %s@%s:\n"
+			    "incremental source (%s@%s) "
+			    "is not earlier than it\n",
+			    zhp->zfs_name, sdd->tosnap,
+			    zhp->zfs_name, sdd->fromsnap);
+			sdd->err = B_TRUE;
+		}
+	} else {
+		zfs_handle_t *snapzhp;
+		char snapname[ZFS_MAXNAMELEN];
+
+		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
+		    zfs_get_name(zhp), sdd->tosnap);
+		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
+		if (snapzhp == NULL) {
+			rv = -1;
+		} else {
+			rv = dump_ioctl(snapzhp,
+			    missingfrom ? NULL : sdd->fromsnap,
+			    sdd->fromorigin || missingfrom,
+			    sdd->outfd);
+			sdd->seento = B_TRUE;
+			zfs_close(snapzhp);
+		}
+	}
+
+	return (rv);
+}
+
+static int
+dump_filesystems(zfs_handle_t *rzhp, void *arg)
+{
+	send_dump_data_t *sdd = arg;
+	nvpair_t *fspair;
+	boolean_t needagain, progress;
+
+	if (!sdd->replicate)
+		return (dump_filesystem(rzhp, sdd));
+
+again:
+	needagain = progress = B_FALSE;
+	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
+	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
+		nvlist_t *fslist;
+		char *fsname;
+		zfs_handle_t *zhp;
+		int err;
+		uint64_t origin_guid = 0;
+		nvlist_t *origin_nv;
+
+		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
+		if (nvlist_lookup_boolean(fslist, "sent") == 0)
+			continue;
+
+		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
+		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
+
+		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
+		if (origin_nv &&
+		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
+			/*
+			 * origin has not been sent yet;
+			 * skip this clone.
+			 */
+			needagain = B_TRUE;
+			continue;
+		}
+
+		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
+		if (zhp == NULL)
+			return (-1);
+		err = dump_filesystem(zhp, sdd);
+		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
+		progress = B_TRUE;
+		zfs_close(zhp);
+		if (err)
+			return (err);
+	}
+	if (needagain) {
+		assert(progress);
+		goto again;
+	}
+	return (0);
+}
+
+/*
+ * Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL.
+ * If 'doall', dump all intermediate snaps.
+ * If 'replicate', dump special header and do recursively.
+ */
+int
+zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    boolean_t replicate, boolean_t doall, boolean_t fromorigin,
+    boolean_t verbose, int outfd)
+{
+	char errbuf[1024];
+	send_dump_data_t sdd = { 0 };
+	int err;
+	nvlist_t *fss = NULL;
+	avl_tree_t *fsavl = NULL;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot send '%s'"), zhp->zfs_name);
+
+	if (fromsnap && fromsnap[0] == '\0') {
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "zero-length incremental source"));
+		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
+	}
+
+	if (replicate || doall) {
+		dmu_replay_record_t drr = { 0 };
+		char *packbuf = NULL;
+		size_t buflen = 0;
+		zio_cksum_t zc = { 0 };
+
+		assert(fromsnap || doall);
+
+		if (replicate) {
+			nvlist_t *hdrnv;
+
+			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
+			if (fromsnap) {
+				VERIFY(0 == nvlist_add_string(hdrnv,
+				    "fromsnap", fromsnap));
+			}
+			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
+
+			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
+			    fromsnap, tosnap, &fss, &fsavl);
+			if (err)
+				return (err);
+			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
+			err = nvlist_pack(hdrnv, &packbuf, &buflen,
+			    NV_ENCODE_XDR, 0);
+			nvlist_free(hdrnv);
+			if (err) {
+				fsavl_destroy(fsavl);
+				nvlist_free(fss);
+				return (zfs_standard_error(zhp->zfs_hdl,
+				    err, errbuf));
+			}
+		}
+
+		/* write first begin record */
+		drr.drr_type = DRR_BEGIN;
+		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
+		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
+		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
+		    sizeof (drr.drr_u.drr_begin.drr_toname),
+		    "%s@%s", zhp->zfs_name, tosnap);
+		drr.drr_payloadlen = buflen;
+		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
+		err = write(outfd, &drr, sizeof (drr));
+
+		/* write header nvlist */
+		if (err != -1) {
+			fletcher_4_incremental_native(packbuf, buflen, &zc);
+			err = write(outfd, packbuf, buflen);
+		}
+		free(packbuf);
+		if (err == -1) {
+			fsavl_destroy(fsavl);
+			nvlist_free(fss);
+			return (zfs_standard_error(zhp->zfs_hdl,
+			    errno, errbuf));
+		}
+
+		/* write end record */
+		if (err != -1) {
+			bzero(&drr, sizeof (drr));
+			drr.drr_type = DRR_END;
+			drr.drr_u.drr_end.drr_checksum = zc;
+			err = write(outfd, &drr, sizeof (drr));
+			if (err == -1) {
+				fsavl_destroy(fsavl);
+				nvlist_free(fss);
+				return (zfs_standard_error(zhp->zfs_hdl,
+				    errno, errbuf));
+			}
+		}
+	}
+
+	/* dump each stream */
+	sdd.fromsnap = fromsnap;
+	sdd.tosnap = tosnap;
+	sdd.outfd = outfd;
+	sdd.replicate = replicate;
+	sdd.doall = doall;
+	sdd.fromorigin = fromorigin;
+	sdd.fss = fss;
+	sdd.fsavl = fsavl;
+	sdd.verbose = verbose;
+	err = dump_filesystems(zhp, &sdd);
+	fsavl_destroy(fsavl);
+	nvlist_free(fss);
+
+	if (replicate || doall) {
+		/*
+		 * write final end record.  NB: want to do this even if
+		 * there was some error, because it might not be totally
+		 * failed.
+		 */
+		dmu_replay_record_t drr = { 0 };
+		drr.drr_type = DRR_END;
+		if (write(outfd, &drr, sizeof (drr)) == -1) {
+			return (zfs_standard_error(zhp->zfs_hdl,
+			    errno, errbuf));
+		}
+	}
+
+	return (err || sdd.err);
+}
+
+/*
+ * Routines specific to "zfs recv"
+ */
+
+static int
+recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
+    boolean_t byteswap, zio_cksum_t *zc)
+{
+	char *cp = buf;
+	int rv;
+	int len = ilen;
+
+	do {
+		rv = read(fd, cp, len);
+		cp += rv;
+		len -= rv;
+	} while (rv > 0);
+
+	if (rv < 0 || len != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "failed to read from stream"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
+		    "cannot receive")));
+	}
+
+	if (zc) {
+		if (byteswap)
+			fletcher_4_incremental_byteswap(buf, ilen, zc);
+		else
+			fletcher_4_incremental_native(buf, ilen, zc);
+	}
+	return (0);
+}
+
+static int
+recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
+    boolean_t byteswap, zio_cksum_t *zc)
+{
+	char *buf;
+	int err;
+
+	buf = zfs_alloc(hdl, len);
+	if (buf == NULL)
+		return (ENOMEM);
+
+	err = recv_read(hdl, fd, buf, len, byteswap, zc);
+	if (err != 0) {
+		free(buf);
+		return (err);
+	}
+
+	err = nvlist_unpack(buf, len, nvp, 0);
+	free(buf);
+	if (err != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (malformed nvlist)"));
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
+    int baselen, char *newname, recvflags_t flags)
+{
+	static int seq;
+	zfs_cmd_t zc = { 0 };
+	int err;
+	prop_changelist_t *clp;
+	zfs_handle_t *zhp;
+
+	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
+	if (zhp == NULL)
+		return (-1);
+	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+	    flags.force ? MS_FORCE : 0);
+	zfs_close(zhp);
+	if (clp == NULL)
+		return (-1);
+	err = changelist_prefix(clp);
+	if (err)
+		return (err);
+
+	if (tryname) {
+		(void) strcpy(newname, tryname);
+
+		zc.zc_objset_type = DMU_OST_ZFS;
+		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
+
+		if (flags.verbose) {
+			(void) printf("attempting rename %s to %s\n",
+			    zc.zc_name, zc.zc_value);
+		}
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+		if (err == 0)
+			changelist_rename(clp, name, tryname);
+	} else {
+		err = ENOENT;
+	}
+
+	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
+		seq++;
+
+		(void) strncpy(newname, name, baselen);
+		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
+		    "recv-%u-%u", getpid(), seq);
+		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
+
+		if (flags.verbose) {
+			(void) printf("failed - trying rename %s to %s\n",
+			    zc.zc_name, zc.zc_value);
+		}
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+		if (err == 0)
+			changelist_rename(clp, name, newname);
+		if (err && flags.verbose) {
+			(void) printf("failed (%u) - "
+			    "will try again on next pass\n", errno);
+		}
+		err = EAGAIN;
+	} else if (flags.verbose) {
+		if (err == 0)
+			(void) printf("success\n");
+		else
+			(void) printf("failed (%u)\n", errno);
+	}
+
+	(void) changelist_postfix(clp);
+	changelist_free(clp);
+
+	return (err);
+}
+
+static int
+recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
+    char *newname, recvflags_t flags)
+{
+	zfs_cmd_t zc = { 0 };
+	int err = 0;
+	prop_changelist_t *clp;
+	zfs_handle_t *zhp;
+
+	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
+	if (zhp == NULL)
+		return (-1);
+	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
+	    flags.force ? MS_FORCE : 0);
+	zfs_close(zhp);
+	if (clp == NULL)
+		return (-1);
+	err = changelist_prefix(clp);
+	if (err)
+		return (err);
+
+	zc.zc_objset_type = DMU_OST_ZFS;
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	if (flags.verbose)
+		(void) printf("attempting destroy %s\n", zc.zc_name);
+	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
+
+	if (err == 0) {
+		if (flags.verbose)
+			(void) printf("success\n");
+		changelist_remove(clp, zc.zc_name);
+	}
+
+	(void) changelist_postfix(clp);
+	changelist_free(clp);
+
+	if (err != 0)
+		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
+
+	return (err);
+}
+
+typedef struct guid_to_name_data {
+	uint64_t guid;
+	char *name;
+} guid_to_name_data_t;
+
+static int
+guid_to_name_cb(zfs_handle_t *zhp, void *arg)
+{
+	guid_to_name_data_t *gtnd = arg;
+	int err;
+
+	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
+		(void) strcpy(gtnd->name, zhp->zfs_name);
+		return (EEXIST);
+	}
+	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
+	zfs_close(zhp);
+	return (err);
+}
+
+static int
+guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
+    char *name)
+{
+	/* exhaustive search all local snapshots */
+	guid_to_name_data_t gtnd;
+	int err = 0;
+	zfs_handle_t *zhp;
+	char *cp;
+
+	gtnd.guid = guid;
+	gtnd.name = name;
+
+	if (strchr(parent, '@') == NULL) {
+		zhp = make_dataset_handle(hdl, parent);
+		if (zhp != NULL) {
+			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
+			zfs_close(zhp);
+			if (err == EEXIST)
+				return (0);
+		}
+	}
+
+	cp = strchr(parent, '/');
+	if (cp)
+		*cp = '\0';
+	zhp = make_dataset_handle(hdl, parent);
+	if (cp)
+		*cp = '/';
+
+	if (zhp) {
+		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
+		zfs_close(zhp);
+	}
+
+	return (err == EEXIST ? 0 : ENOENT);
+
+}
+
+/*
+ * Return true if dataset guid1 is created before guid2.
+ */
+static int
+created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
+    uint64_t guid1, uint64_t guid2)
+{
+	nvlist_t *nvfs;
+	char *fsname, *snapname;
+	char buf[ZFS_MAXNAMELEN];
+	int rv;
+	zfs_node_t zn1, zn2;
+
+	if (guid2 == 0)
+		return (0);
+	if (guid1 == 0)
+		return (1);
+
+	nvfs = fsavl_find(avl, guid1, &snapname);
+	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
+	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
+	if (zn1.zn_handle == NULL)
+		return (-1);
+
+	nvfs = fsavl_find(avl, guid2, &snapname);
+	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
+	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
+	if (zn2.zn_handle == NULL) {
+		zfs_close(zn2.zn_handle);
+		return (-1);
+	}
+
+	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
+
+	zfs_close(zn1.zn_handle);
+	zfs_close(zn2.zn_handle);
+
+	return (rv);
+}
+
+static int
+recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
+    recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
+{
+	nvlist_t *local_nv;
+	avl_tree_t *local_avl;
+	nvpair_t *fselem, *nextfselem;
+	char *tosnap, *fromsnap;
+	char newname[ZFS_MAXNAMELEN];
+	int error;
+	boolean_t needagain, progress;
+
+	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
+	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
+
+	if (flags.dryrun)
+		return (0);
+
+again:
+	needagain = progress = B_FALSE;
+
+	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
+	    &local_nv, &local_avl)) != 0)
+		return (error);
+
+	/*
+	 * Process deletes and renames
+	 */
+	for (fselem = nvlist_next_nvpair(local_nv, NULL);
+	    fselem; fselem = nextfselem) {
+		nvlist_t *nvfs, *snaps;
+		nvlist_t *stream_nvfs = NULL;
+		nvpair_t *snapelem, *nextsnapelem;
+		uint64_t fromguid = 0;
+		uint64_t originguid = 0;
+		uint64_t stream_originguid = 0;
+		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
+		char *fsname, *stream_fsname;
+
+		nextfselem = nvlist_next_nvpair(local_nv, fselem);
+
+		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
+		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
+		    &parent_fromsnap_guid));
+		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
+
+		/*
+		 * First find the stream's fs, so we can check for
+		 * a different origin (due to "zfs promote")
+		 */
+		for (snapelem = nvlist_next_nvpair(snaps, NULL);
+		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
+			uint64_t thisguid;
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
+
+			if (stream_nvfs != NULL)
+				break;
+		}
+
+		/* check for promote */
+		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
+		    &stream_originguid);
+		if (stream_nvfs && originguid != stream_originguid) {
+			switch (created_before(hdl, local_avl,
+			    stream_originguid, originguid)) {
+			case 1: {
+				/* promote it! */
+				zfs_cmd_t zc = { 0 };
+				nvlist_t *origin_nvfs;
+				char *origin_fsname;
+
+				if (flags.verbose)
+					(void) printf("promoting %s\n", fsname);
+
+				origin_nvfs = fsavl_find(local_avl, originguid,
+				    NULL);
+				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
+				    "name", &origin_fsname));
+				(void) strlcpy(zc.zc_value, origin_fsname,
+				    sizeof (zc.zc_value));
+				(void) strlcpy(zc.zc_name, fsname,
+				    sizeof (zc.zc_name));
+				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+				if (error == 0)
+					progress = B_TRUE;
+				break;
+			}
+			default:
+				break;
+			case -1:
+				fsavl_destroy(local_avl);
+				nvlist_free(local_nv);
+				return (-1);
+			}
+			/*
+			 * We had/have the wrong origin, therefore our
+			 * list of snapshots is wrong.  Need to handle
+			 * them on the next pass.
+			 */
+			needagain = B_TRUE;
+			continue;
+		}
+
+		for (snapelem = nvlist_next_nvpair(snaps, NULL);
+		    snapelem; snapelem = nextsnapelem) {
+			uint64_t thisguid;
+			char *stream_snapname;
+			nvlist_t *found, *props;
+
+			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			found = fsavl_find(stream_avl, thisguid,
+			    &stream_snapname);
+
+			/* check for delete */
+			if (found == NULL) {
+				char name[ZFS_MAXNAMELEN];
+
+				if (!flags.force)
+					continue;
+
+				(void) snprintf(name, sizeof (name), "%s@%s",
+				    fsname, nvpair_name(snapelem));
+
+				error = recv_destroy(hdl, name,
+				    strlen(fsname)+1, newname, flags);
+				if (error)
+					needagain = B_TRUE;
+				else
+					progress = B_TRUE;
+				continue;
+			}
+
+			stream_nvfs = found;
+
+			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
+			    &props) && 0 == nvlist_lookup_nvlist(props,
+			    stream_snapname, &props)) {
+				zfs_cmd_t zc = { 0 };
+
+				zc.zc_cookie = B_TRUE; /* clear current props */
+				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
+				    "%s@%s", fsname, nvpair_name(snapelem));
+				if (zcmd_write_src_nvlist(hdl, &zc,
+				    props) == 0) {
+					(void) zfs_ioctl(hdl,
+					    ZFS_IOC_SET_PROP, &zc);
+					zcmd_free_nvlists(&zc);
+				}
+			}
+
+			/* check for different snapname */
+			if (strcmp(nvpair_name(snapelem),
+			    stream_snapname) != 0) {
+				char name[ZFS_MAXNAMELEN];
+				char tryname[ZFS_MAXNAMELEN];
+
+				(void) snprintf(name, sizeof (name), "%s@%s",
+				    fsname, nvpair_name(snapelem));
+				(void) snprintf(tryname, sizeof (name), "%s@%s",
+				    fsname, stream_snapname);
+
+				error = recv_rename(hdl, name, tryname,
+				    strlen(fsname)+1, newname, flags);
+				if (error)
+					needagain = B_TRUE;
+				else
+					progress = B_TRUE;
+			}
+
+			if (strcmp(stream_snapname, fromsnap) == 0)
+				fromguid = thisguid;
+		}
+
+		/* check for delete */
+		if (stream_nvfs == NULL) {
+			if (!flags.force)
+				continue;
+
+			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
+			    newname, flags);
+			if (error)
+				needagain = B_TRUE;
+			else
+				progress = B_TRUE;
+			continue;
+		}
+
+		if (fromguid == 0 && flags.verbose) {
+			(void) printf("local fs %s does not have fromsnap "
+			    "(%s in stream); must have been deleted locally; "
+			    "ignoring\n", fsname, fromsnap);
+			continue;
+		}
+
+		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
+		    "name", &stream_fsname));
+		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
+		    "parentfromsnap", &stream_parent_fromsnap_guid));
+
+		/* check for rename */
+		if ((stream_parent_fromsnap_guid != 0 &&
+		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
+		    strcmp(strrchr(fsname, '/'),
+		    strrchr(stream_fsname, '/')) != 0) {
+			nvlist_t *parent;
+			char tryname[ZFS_MAXNAMELEN];
+
+			parent = fsavl_find(local_avl,
+			    stream_parent_fromsnap_guid, NULL);
+			/*
+			 * NB: parent might not be found if we used the
+			 * tosnap for stream_parent_fromsnap_guid,
+			 * because the parent is a newly-created fs;
+			 * we'll be able to rename it after we recv the
+			 * new fs.
+			 */
+			if (parent != NULL) {
+				char *pname;
+
+				VERIFY(0 == nvlist_lookup_string(parent, "name",
+				    &pname));
+				(void) snprintf(tryname, sizeof (tryname),
+				    "%s%s", pname, strrchr(stream_fsname, '/'));
+			} else {
+				tryname[0] = '\0';
+				if (flags.verbose) {
+					(void) printf("local fs %s new parent "
+					    "not found\n", fsname);
+				}
+			}
+
+			error = recv_rename(hdl, fsname, tryname,
+			    strlen(tofs)+1, newname, flags);
+			if (error)
+				needagain = B_TRUE;
+			else
+				progress = B_TRUE;
+		}
+	}
+
+	fsavl_destroy(local_avl);
+	nvlist_free(local_nv);
+
+	if (needagain && progress) {
+		/* do another pass to fix up temporary names */
+		if (flags.verbose)
+			(void) printf("another pass:\n");
+		goto again;
+	}
+
+	return (needagain);
+}
+
+static int
+zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
+    recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
+    char **top_zfs)
+{
+	nvlist_t *stream_nv = NULL;
+	avl_tree_t *stream_avl = NULL;
+	char *fromsnap = NULL;
+	char tofs[ZFS_MAXNAMELEN];
+	char errbuf[1024];
+	dmu_replay_record_t drre;
+	int error;
+	boolean_t anyerr = B_FALSE;
+	boolean_t softerr = B_FALSE;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (strchr(destname, '@')) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can not specify snapshot name for multi-snapshot stream"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	assert(drr->drr_type == DRR_BEGIN);
+	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
+	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
+
+	/*
+	 * Read in the nvlist from the stream.
+	 */
+	if (drr->drr_payloadlen != 0) {
+		if (!flags.isprefix) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "must use -d to receive replication "
+			    "(send -R) stream"));
+			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+		}
+
+		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
+		    &stream_nv, flags.byteswap, zc);
+		if (error) {
+			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			goto out;
+		}
+	}
+
+	/*
+	 * Read in the end record and verify checksum.
+	 */
+	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
+	    flags.byteswap, NULL)))
+		goto out;
+	if (flags.byteswap) {
+		drre.drr_type = BSWAP_32(drre.drr_type);
+		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
+	}
+	if (drre.drr_type != DRR_END) {
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "incorrect header checksum"));
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+
+	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
+
+	if (drr->drr_payloadlen != 0) {
+		nvlist_t *stream_fss;
+
+		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
+		    &stream_fss));
+		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "couldn't allocate avl tree"));
+			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
+			goto out;
+		}
+
+		if (fromsnap != NULL) {
+			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
+			if (flags.isprefix) {
+				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
+				    "/@");
+				/* zfs_receive_one() will create_parents() */
+				(void) strlcat(tofs,
+				    &drr->drr_u.drr_begin.drr_toname[i],
+				    ZFS_MAXNAMELEN);
+				*strchr(tofs, '@') = '\0';
+			}
+			softerr = recv_incremental_replication(hdl, tofs,
+			    flags, stream_nv, stream_avl);
+		}
+	}
+
+
+	/* Finally, receive each contained stream */
+	do {
+		/*
+		 * we should figure out if it has a recoverable
+		 * error, in which case do a recv_skip() and drive on.
+		 * Note, if we fail due to already having this guid,
+		 * zfs_receive_one() will take care of it (ie,
+		 * recv_skip() and return 0).
+		 */
+		error = zfs_receive_impl(hdl, destname, flags, fd,
+		    stream_avl, top_zfs);
+		if (error == ENODATA) {
+			error = 0;
+			break;
+		}
+		anyerr |= error;
+	} while (error == 0);
+
+	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
+		/*
+		 * Now that we have the fs's they sent us, try the
+		 * renames again.
+		 */
+		softerr = recv_incremental_replication(hdl, tofs, flags,
+		    stream_nv, stream_avl);
+	}
+
+out:
+	fsavl_destroy(stream_avl);
+	if (stream_nv)
+		nvlist_free(stream_nv);
+	if (softerr)
+		error = -2;
+	if (anyerr)
+		error = -1;
+	return (error);
+}
+
+static int
+recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
+{
+	dmu_replay_record_t *drr;
+	void *buf = malloc(1<<20);
+
+	/* XXX would be great to use lseek if possible... */
+	drr = buf;
+
+	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
+	    byteswap, NULL) == 0) {
+		if (byteswap)
+			drr->drr_type = BSWAP_32(drr->drr_type);
+
+		switch (drr->drr_type) {
+		case DRR_BEGIN:
+			/* NB: not to be used on v2 stream packages */
+			assert(drr->drr_payloadlen == 0);
+			break;
+
+		case DRR_END:
+			free(buf);
+			return (0);
+
+		case DRR_OBJECT:
+			if (byteswap) {
+				drr->drr_u.drr_object.drr_bonuslen =
+				    BSWAP_32(drr->drr_u.drr_object.
+				    drr_bonuslen);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
+			    B_FALSE, NULL);
+			break;
+
+		case DRR_WRITE:
+			if (byteswap) {
+				drr->drr_u.drr_write.drr_length =
+				    BSWAP_64(drr->drr_u.drr_write.drr_length);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
+			break;
+
+		case DRR_FREEOBJECTS:
+		case DRR_FREE:
+			break;
+
+		default:
+			assert(!"invalid record type");
+		}
+	}
+
+	free(buf);
+	return (-1);
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ */
+static int
+zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
+    recvflags_t flags, dmu_replay_record_t *drr,
+    dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
+    char **top_zfs)
+{
+	zfs_cmd_t zc = { 0 };
+	time_t begin_time;
+	int ioctl_err, ioctl_errno, err, choplen;
+	char *cp;
+	struct drr_begin *drrb = &drr->drr_u.drr_begin;
+	char errbuf[1024];
+	char chopprefix[ZFS_MAXNAMELEN];
+	boolean_t newfs = B_FALSE;
+	boolean_t stream_wantsnewfs;
+	uint64_t parent_snapguid = 0;
+	prop_changelist_t *clp = NULL;
+	nvlist_t *snapprops_nvlist = NULL;
+
+	begin_time = time(NULL);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (stream_avl != NULL) {
+		char *snapname;
+		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
+		    &snapname);
+		nvlist_t *props;
+		int ret;
+
+		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
+		    &parent_snapguid);
+		err = nvlist_lookup_nvlist(fs, "props", &props);
+		if (err)
+			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
+
+		if (flags.canmountoff) {
+			VERIFY(0 == nvlist_add_uint64(props,
+			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
+		}
+		ret = zcmd_write_src_nvlist(hdl, &zc, props);
+		if (err)
+			nvlist_free(props);
+
+		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
+			VERIFY(0 == nvlist_lookup_nvlist(props,
+			    snapname, &snapprops_nvlist));
+		}
+
+		if (ret != 0)
+			return (-1);
+	}
+
+	/*
+	 * Determine how much of the snapshot name stored in the stream
+	 * we are going to tack on to the name they specified on the
+	 * command line, and how much we are going to chop off.
+	 *
+	 * If they specified a snapshot, chop the entire name stored in
+	 * the stream.
+	 */
+	(void) strcpy(chopprefix, drrb->drr_toname);
+	if (flags.isprefix) {
+		/*
+		 * They specified a fs with -d, we want to tack on
+		 * everything but the pool name stored in the stream
+		 */
+		if (strchr(tosnap, '@')) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "argument - snapshot not allowed with -d"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+		cp = strchr(chopprefix, '/');
+		if (cp == NULL)
+			cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	} else if (strchr(tosnap, '@') == NULL) {
+		/*
+		 * If they specified a filesystem without -d, we want to
+		 * tack on everything after the fs specified in the
+		 * first name from the stream.
+		 */
+		cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	}
+	choplen = strlen(chopprefix);
+
+	/*
+	 * Determine name of destination snapshot, store in zc_value.
+	 */
+	(void) strcpy(zc.zc_value, tosnap);
+	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
+	    sizeof (zc.zc_value));
+	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
+		zcmd_free_nvlists(&zc);
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	}
+
+	/*
+	 * Determine the name of the origin snapshot, store in zc_string.
+	 */
+	if (drrb->drr_flags & DRR_FLAG_CLONE) {
+		if (guid_to_name(hdl, tosnap,
+		    drrb->drr_fromguid, zc.zc_string) != 0) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "local origin for clone %s does not exist"),
+			    zc.zc_value);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		}
+		if (flags.verbose)
+			(void) printf("found clone origin %s\n", zc.zc_string);
+	}
+
+	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
+	    (drrb->drr_flags & DRR_FLAG_CLONE));
+
+	if (stream_wantsnewfs) {
+		/*
+		 * if the parent fs does not exist, look for it based on
+		 * the parent snap GUID
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot receive new filesystem stream"));
+
+		(void) strcpy(zc.zc_name, zc.zc_value);
+		cp = strrchr(zc.zc_name, '/');
+		if (cp)
+			*cp = '\0';
+		if (cp &&
+		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+			char suffix[ZFS_MAXNAMELEN];
+			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
+			if (guid_to_name(hdl, tosnap, parent_snapguid,
+			    zc.zc_value) == 0) {
+				*strchr(zc.zc_value, '@') = '\0';
+				(void) strcat(zc.zc_value, suffix);
+			}
+		}
+	} else {
+		/*
+		 * if the fs does not exist, look for it based on the
+		 * fromsnap GUID
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot receive incremental stream"));
+
+		(void) strcpy(zc.zc_name, zc.zc_value);
+		*strchr(zc.zc_name, '@') = '\0';
+
+		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+			char snap[ZFS_MAXNAMELEN];
+			(void) strcpy(snap, strchr(zc.zc_value, '@'));
+			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
+			    zc.zc_value) == 0) {
+				*strchr(zc.zc_value, '@') = '\0';
+				(void) strcat(zc.zc_value, snap);
+			}
+		}
+	}
+
+	(void) strcpy(zc.zc_name, zc.zc_value);
+	*strchr(zc.zc_name, '@') = '\0';
+
+	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+		zfs_handle_t *zhp;
+		/*
+		 * Destination fs exists.  Therefore this should either
+		 * be an incremental, or the stream specifies a new fs
+		 * (full stream or clone) and they want us to blow it
+		 * away (and have therefore specified -F and removed any
+		 * snapshots).
+		 */
+
+		if (stream_wantsnewfs) {
+			if (!flags.force) {
+				zcmd_free_nvlists(&zc);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "destination '%s' exists\n"
+				    "must specify -F to overwrite it"),
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+			}
+			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+			    &zc) == 0) {
+				zcmd_free_nvlists(&zc);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "destination has snapshots (eg. %s)\n"
+				    "must destroy them to overwrite it"),
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+			}
+		}
+
+		if ((zhp = zfs_open(hdl, zc.zc_name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+
+		if (stream_wantsnewfs &&
+		    zhp->zfs_dmustats.dds_origin[0]) {
+			zcmd_free_nvlists(&zc);
+			zfs_close(zhp);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' is a clone\n"
+			    "must destroy it to overwrite it"),
+			    zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+		}
+
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
+		    stream_wantsnewfs) {
+			/* We can't do online recv in this case */
+			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
+			if (clp == NULL) {
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+			if (changelist_prefix(clp) != 0) {
+				changelist_free(clp);
+				zcmd_free_nvlists(&zc);
+				return (-1);
+			}
+		}
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
+		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
+			zfs_close(zhp);
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+		zfs_close(zhp);
+	} else {
+		/*
+		 * Destination filesystem does not exist.  Therefore we better
+		 * be creating a new filesystem (either from a full backup, or
+		 * a clone).  It would therefore be invalid if the user
+		 * specified only the pool name (i.e. if the destination name
+		 * contained no slash character).
+		 */
+		if (!stream_wantsnewfs ||
+		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
+			zcmd_free_nvlists(&zc);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' does not exist"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		}
+
+		/*
+		 * Trim off the final dataset component so we perform the
+		 * recvbackup ioctl to the filesystems's parent.
+		 */
+		*cp = '\0';
+
+		if (flags.isprefix && !flags.dryrun &&
+		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
+			zcmd_free_nvlists(&zc);
+			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
+		}
+
+		newfs = B_TRUE;
+	}
+
+	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
+	zc.zc_cookie = infd;
+	zc.zc_guid = flags.force;
+	if (flags.verbose) {
+		(void) printf("%s %s stream of %s into %s\n",
+		    flags.dryrun ? "would receive" : "receiving",
+		    drrb->drr_fromguid ? "incremental" : "full",
+		    drrb->drr_toname, zc.zc_value);
+		(void) fflush(stdout);
+	}
+
+	if (flags.dryrun) {
+		zcmd_free_nvlists(&zc);
+		return (recv_skip(hdl, infd, flags.byteswap));
+	}
+
+	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
+	ioctl_errno = errno;
+	zcmd_free_nvlists(&zc);
+
+	if (err == 0 && snapprops_nvlist) {
+		zfs_cmd_t zc2 = { 0 };
+
+		(void) strcpy(zc2.zc_name, zc.zc_value);
+		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
+			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
+			zcmd_free_nvlists(&zc2);
+		}
+	}
+
+	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
+		/*
+		 * It may be that this snapshot already exists,
+		 * in which case we want to consume & ignore it
+		 * rather than failing.
+		 */
+		avl_tree_t *local_avl;
+		nvlist_t *local_nv, *fs;
+		char *cp = strchr(zc.zc_value, '@');
+
+		/*
+		 * XXX Do this faster by just iterating over snaps in
+		 * this fs.  Also if zc_value does not exist, we will
+		 * get a strange "does not exist" error message.
+		 */
+		*cp = '\0';
+		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
+		    &local_nv, &local_avl) == 0) {
+			*cp = '@';
+			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
+			fsavl_destroy(local_avl);
+			nvlist_free(local_nv);
+
+			if (fs != NULL) {
+				if (flags.verbose) {
+					(void) printf("snap %s already exists; "
+					    "ignoring\n", zc.zc_value);
+				}
+				ioctl_err = recv_skip(hdl, infd,
+				    flags.byteswap);
+			}
+		}
+		*cp = '@';
+	}
+
+
+	if (ioctl_err != 0) {
+		switch (ioctl_errno) {
+		case ENODEV:
+			cp = strchr(zc.zc_value, '@');
+			*cp = '\0';
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "most recent snapshot of %s does not\n"
+			    "match incremental source"), zc.zc_value);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			*cp = '@';
+			break;
+		case ETXTBSY:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination %s has been modified\n"
+			    "since most recent snapshot"), zc.zc_name);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			break;
+		case EEXIST:
+			cp = strchr(zc.zc_value, '@');
+			if (newfs) {
+				/* it's the containing fs that exists */
+				*cp = '\0';
+			}
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination already exists"));
+			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
+			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
+			    zc.zc_value);
+			*cp = '@';
+			break;
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		case ECKSUM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid stream (checksum mismatch)"));
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
+		}
+	}
+
+	/*
+	 * Mount or recreate the /dev links for the target filesystem
+	 * (if created, or if we tore them down to do an incremental
+	 * restore), and the /dev links for the new snapshot (if
+	 * created). Also mount any children of the target filesystem
+	 * if we did an incremental receive.
+	 */
+	cp = strchr(zc.zc_value, '@');
+	if (cp && (ioctl_err == 0 || !newfs)) {
+		zfs_handle_t *h;
+
+		*cp = '\0';
+		h = zfs_open(hdl, zc.zc_value,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (h != NULL) {
+			if (h->zfs_type == ZFS_TYPE_VOLUME) {
+				*cp = '@';
+				err = zvol_create_link(hdl, h->zfs_name);
+				if (err == 0 && ioctl_err == 0)
+					err = zvol_create_link(hdl,
+					    zc.zc_value);
+			} else if (newfs) {
+				/*
+				 * Track the first/top of hierarchy fs,
+				 * for mounting and sharing later.
+				 */
+				if (top_zfs && *top_zfs == NULL)
+					*top_zfs = zfs_strdup(hdl, zc.zc_value);
+			}
+			zfs_close(h);
+		}
+		*cp = '@';
+	}
+
+	if (clp) {
+		err |= changelist_postfix(clp);
+		changelist_free(clp);
+	}
+
+	if (err || ioctl_err)
+		return (-1);
+
+	if (flags.verbose) {
+		char buf1[64];
+		char buf2[64];
+		uint64_t bytes = zc.zc_cookie;
+		time_t delta = time(NULL) - begin_time;
+		if (delta == 0)
+			delta = 1;
+		zfs_nicenum(bytes, buf1, sizeof (buf1));
+		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
+
+		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
+		    buf1, delta, buf2);
+	}
+
+	return (0);
+}
+
+static int
+zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
+    int infd, avl_tree_t *stream_avl, char **top_zfs)
+{
+	int err;
+	dmu_replay_record_t drr, drr_noswap;
+	struct drr_begin *drrb = &drr.drr_u.drr_begin;
+	char errbuf[1024];
+	zio_cksum_t zcksum = { 0 };
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (flags.isprefix &&
+	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
+		    "(%s) does not exist"), tosnap);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
+	}
+
+	/* read in the BEGIN record */
+	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
+	    &zcksum)))
+		return (err);
+
+	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
+		/* It's the double end record at the end of a package */
+		return (ENODATA);
+	}
+
+	/* the kernel needs the non-byteswapped begin record */
+	drr_noswap = drr;
+
+	flags.byteswap = B_FALSE;
+	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+		/*
+		 * We computed the checksum in the wrong byteorder in
+		 * recv_read() above; do it again correctly.
+		 */
+		bzero(&zcksum, sizeof (zio_cksum_t));
+		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
+		flags.byteswap = B_TRUE;
+
+		drr.drr_type = BSWAP_32(drr.drr_type);
+		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
+		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+		drrb->drr_version = BSWAP_64(drrb->drr_version);
+		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
+		drrb->drr_type = BSWAP_32(drrb->drr_type);
+		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
+		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+	}
+
+	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad magic number)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (strchr(drrb->drr_toname, '@') == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad snapshot name)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
+		return (zfs_receive_one(hdl, infd, tosnap, flags,
+		    &drr, &drr_noswap, stream_avl, top_zfs));
+	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
+		return (zfs_receive_package(hdl, infd, tosnap, flags,
+		    &drr, &zcksum, top_zfs));
+	} else {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "stream is unsupported version %llu"),
+		    drrb->drr_version);
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ * Return 0 on total success, -2 if some things couldn't be
+ * destroyed/renamed/promoted, -1 if some things couldn't be received.
+ * (-1 will override -2).
+ */
+int
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
+    int infd, avl_tree_t *stream_avl)
+{
+	char *top_zfs = NULL;
+	int err;
+
+	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
+
+	if (err == 0 && top_zfs) {
+		zfs_handle_t *zhp;
+		prop_changelist_t *clp;
+
+		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
+		if (zhp != NULL) {
+			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
+			    CL_GATHER_MOUNT_ALWAYS, 0);
+			zfs_close(zhp);
+			if (clp != NULL) {
+				/* mount and share received datasets */
+				err = changelist_postfix(clp);
+				changelist_free(clp);
+			}
+		}
+		if (zhp == NULL || clp == NULL || err)
+			err = -1;
+	}
+	if (top_zfs)
+		free(top_zfs);
+
+	return (err);
+}
diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
new file mode 100644
index 000000000..c7eb04e74
--- /dev/null
+++ b/lib/libzfs/libzfs_status.c
@@ -0,0 +1,317 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * This file contains the functions which analyze the status of a pool.  This
+ * include both the status of an active pool, as well as the status exported
+ * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
+ * the pool.  This status is independent (to a certain degree) from the state of
+ * the pool.  A pool's state describes only whether or not it is capable of
+ * providing the necessary fault tolerance for data.  The status describes the
+ * overall status of devices.  A pool that is online can still have a device
+ * that is experiencing errors.
+ *
+ * Only a subset of the possible faults can be detected using 'zpool status',
+ * and not all possible errors correspond to a FMA message ID.  The explanation
+ * is left up to the caller, depending on whether it is a live pool or an
+ * import.
+ */
+
+#include <libzfs.h>
+#include <string.h>
+#include <unistd.h>
+#include "libzfs_impl.h"
+
+/*
+ * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
+ * in libzfs.h.  Note that there are some status results which go past the end
+ * of this table, and hence have no associated message ID.
+ */
+static char *zfs_msgid_table[] = {
+	"ZFS-8000-14",
+	"ZFS-8000-2Q",
+	"ZFS-8000-3C",
+	"ZFS-8000-4J",
+	"ZFS-8000-5E",
+	"ZFS-8000-6X",
+	"ZFS-8000-72",
+	"ZFS-8000-8A",
+	"ZFS-8000-9P",
+	"ZFS-8000-A5",
+	"ZFS-8000-EY",
+	"ZFS-8000-HC",
+	"ZFS-8000-JQ",
+	"ZFS-8000-K4",
+};
+
+#define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
+
+/* ARGSUSED */
+static int
+vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_CANT_OPEN &&
+	    aux == VDEV_AUX_OPEN_FAILED);
+}
+
+/* ARGSUSED */
+static int
+vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_FAULTED);
+}
+
+/* ARGSUSED */
+static int
+vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_DEGRADED || errs != 0);
+}
+
+/* ARGSUSED */
+static int
+vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_CANT_OPEN);
+}
+
+/* ARGSUSED */
+static int
+vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
+{
+	return (state == VDEV_STATE_OFFLINE);
+}
+
+/*
+ * Detect if any leaf devices that have seen errors or could not be opened.
+ */
+static boolean_t
+find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
+{
+	nvlist_t **child;
+	vdev_stat_t *vs;
+	uint_t c, children;
+	char *type;
+
+	/*
+	 * Ignore problems within a 'replacing' vdev, since we're presumably in
+	 * the process of repairing any such errors, and don't want to call them
+	 * out again.  We'll pick up the fact that a resilver is happening
+	 * later.
+	 */
+	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
+	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
+		return (B_FALSE);
+
+	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_vdev_problem(child[c], func))
+				return (B_TRUE);
+	} else {
+		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &c) == 0);
+
+		if (func(vs->vs_state, vs->vs_aux,
+		    vs->vs_read_errors +
+		    vs->vs_write_errors +
+		    vs->vs_checksum_errors))
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Active pool health status.
+ *
+ * To determine the status for a pool, we make several passes over the config,
+ * picking the most egregious error we find.  In order of importance, we do the
+ * following:
+ *
+ *	- Check for a complete and valid configuration
+ *	- Look for any faulted or missing devices in a non-replicated config
+ *	- Check for any data errors
+ *	- Check for any faulted or missing devices in a replicated config
+ *	- Look for any devices showing errors
+ *	- Check for any resilvering devices
+ *
+ * There can obviously be multiple errors within a single pool, so this routine
+ * only picks the most damaging of all the current errors to report.
+ */
+static zpool_status_t
+check_status(nvlist_t *config, boolean_t isimport)
+{
+	nvlist_t *nvroot;
+	vdev_stat_t *vs;
+	uint_t vsc;
+	uint64_t nerr;
+	uint64_t version;
+	uint64_t stateval;
+	uint64_t suspended;
+	uint64_t hostid = 0;
+
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	    (uint64_t **)&vs, &vsc) == 0);
+	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &stateval) == 0);
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+	/*
+	 * Pool last accessed by another system.
+	 */
+	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
+	    stateval == POOL_STATE_ACTIVE)
+		return (ZPOOL_STATUS_HOSTID_MISMATCH);
+
+	/*
+	 * Newer on-disk version.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
+		return (ZPOOL_STATUS_VERSION_NEWER);
+
+	/*
+	 * Check that the config is complete.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
+		return (ZPOOL_STATUS_BAD_GUID_SUM);
+
+	/*
+	 * Check whether the pool has suspended due to failed I/O.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
+	    &suspended) == 0) {
+		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
+			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
+		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+	}
+
+	/*
+	 * Could not read a log.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
+		return (ZPOOL_STATUS_BAD_LOG);
+	}
+
+	/*
+	 * Bad devices in non-replicated config.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_faulted))
+		return (ZPOOL_STATUS_FAULTED_DEV_NR);
+
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_missing))
+		return (ZPOOL_STATUS_MISSING_DEV_NR);
+
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    find_vdev_problem(nvroot, vdev_broken))
+		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
+
+	/*
+	 * Corrupted pool metadata
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
+		return (ZPOOL_STATUS_CORRUPT_POOL);
+
+	/*
+	 * Persistent data errors.
+	 */
+	if (!isimport) {
+		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
+		    &nerr) == 0 && nerr != 0)
+			return (ZPOOL_STATUS_CORRUPT_DATA);
+	}
+
+	/*
+	 * Missing devices in a replicated config.
+	 */
+	if (find_vdev_problem(nvroot, vdev_faulted))
+		return (ZPOOL_STATUS_FAULTED_DEV_R);
+	if (find_vdev_problem(nvroot, vdev_missing))
+		return (ZPOOL_STATUS_MISSING_DEV_R);
+	if (find_vdev_problem(nvroot, vdev_broken))
+		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
+
+	/*
+	 * Devices with errors
+	 */
+	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
+		return (ZPOOL_STATUS_FAILING_DEV);
+
+	/*
+	 * Offlined devices
+	 */
+	if (find_vdev_problem(nvroot, vdev_offlined))
+		return (ZPOOL_STATUS_OFFLINE_DEV);
+
+	/*
+	 * Currently resilvering
+	 */
+	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
+		return (ZPOOL_STATUS_RESILVERING);
+
+	/*
+	 * Outdated, but usable, version
+	 */
+	if (version < SPA_VERSION)
+		return (ZPOOL_STATUS_VERSION_OLDER);
+
+	return (ZPOOL_STATUS_OK);
+}
+
+zpool_status_t
+zpool_get_status(zpool_handle_t *zhp, char **msgid)
+{
+	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
+
+	if (ret >= NMSGID)
+		*msgid = NULL;
+	else
+		*msgid = zfs_msgid_table[ret];
+
+	return (ret);
+}
+
+zpool_status_t
+zpool_import_status(nvlist_t *config, char **msgid)
+{
+	zpool_status_t ret = check_status(config, B_TRUE);
+
+	if (ret >= NMSGID)
+		*msgid = NULL;
+	else
+		*msgid = zfs_msgid_table[ret];
+
+	return (ret);
+}
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
new file mode 100644
index 000000000..54de0f4b5
--- /dev/null
+++ b/lib/libzfs/libzfs_util.c
@@ -0,0 +1,1403 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Internal utility routines for the ZFS library.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <math.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#include <sys/types.h>
+
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include "zfs_prop.h"
+
+int
+libzfs_errno(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_error);
+}
+
+const char *
+libzfs_error_action(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_action);
+}
+
+const char *
+libzfs_error_description(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_desc[0] != '\0')
+		return (hdl->libzfs_desc);
+
+	switch (hdl->libzfs_error) {
+	case EZFS_NOMEM:
+		return (dgettext(TEXT_DOMAIN, "out of memory"));
+	case EZFS_BADPROP:
+		return (dgettext(TEXT_DOMAIN, "invalid property value"));
+	case EZFS_PROPREADONLY:
+		return (dgettext(TEXT_DOMAIN, "read only property"));
+	case EZFS_PROPTYPE:
+		return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
+		    "datasets of this type"));
+	case EZFS_PROPNONINHERIT:
+		return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
+	case EZFS_PROPSPACE:
+		return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+	case EZFS_BADTYPE:
+		return (dgettext(TEXT_DOMAIN, "operation not applicable to "
+		    "datasets of this type"));
+	case EZFS_BUSY:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
+	case EZFS_EXISTS:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
+	case EZFS_NOENT:
+		return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+	case EZFS_BADSTREAM:
+		return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
+	case EZFS_DSREADONLY:
+		return (dgettext(TEXT_DOMAIN, "dataset is read only"));
+	case EZFS_VOLTOOBIG:
+		return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
+		    "this system"));
+	case EZFS_VOLHASDATA:
+		return (dgettext(TEXT_DOMAIN, "volume has data"));
+	case EZFS_INVALIDNAME:
+		return (dgettext(TEXT_DOMAIN, "invalid name"));
+	case EZFS_BADRESTORE:
+		return (dgettext(TEXT_DOMAIN, "unable to restore to "
+		    "destination"));
+	case EZFS_BADBACKUP:
+		return (dgettext(TEXT_DOMAIN, "backup failed"));
+	case EZFS_BADTARGET:
+		return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
+	case EZFS_NODEVICE:
+		return (dgettext(TEXT_DOMAIN, "no such device in pool"));
+	case EZFS_BADDEV:
+		return (dgettext(TEXT_DOMAIN, "invalid device"));
+	case EZFS_NOREPLICAS:
+		return (dgettext(TEXT_DOMAIN, "no valid replicas"));
+	case EZFS_RESILVERING:
+		return (dgettext(TEXT_DOMAIN, "currently resilvering"));
+	case EZFS_BADVERSION:
+		return (dgettext(TEXT_DOMAIN, "unsupported version"));
+	case EZFS_POOLUNAVAIL:
+		return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
+	case EZFS_DEVOVERFLOW:
+		return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
+	case EZFS_BADPATH:
+		return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
+	case EZFS_CROSSTARGET:
+		return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
+		    "pools"));
+	case EZFS_ZONED:
+		return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
+	case EZFS_MOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "mount failed"));
+	case EZFS_UMOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "umount failed"));
+	case EZFS_UNSHARENFSFAILED:
+		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
+	case EZFS_SHARENFSFAILED:
+		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+	case EZFS_UNSHARESMBFAILED:
+		return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
+	case EZFS_SHARESMBFAILED:
+		return (dgettext(TEXT_DOMAIN, "smb add share failed"));
+	case EZFS_ISCSISVCUNAVAIL:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgt service need to be enabled by "
+		    "a privileged user"));
+	case EZFS_DEVLINKS:
+		return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
+	case EZFS_PERM:
+		return (dgettext(TEXT_DOMAIN, "permission denied"));
+	case EZFS_NOSPC:
+		return (dgettext(TEXT_DOMAIN, "out of space"));
+	case EZFS_IO:
+		return (dgettext(TEXT_DOMAIN, "I/O error"));
+	case EZFS_INTR:
+		return (dgettext(TEXT_DOMAIN, "signal received"));
+	case EZFS_ISSPARE:
+		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
+		    "spare"));
+	case EZFS_INVALCONFIG:
+		return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
+	case EZFS_RECURSIVE:
+		return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
+	case EZFS_NOHISTORY:
+		return (dgettext(TEXT_DOMAIN, "no history available"));
+	case EZFS_UNSHAREISCSIFAILED:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgtd failed request to unshare"));
+	case EZFS_SHAREISCSIFAILED:
+		return (dgettext(TEXT_DOMAIN,
+		    "iscsitgtd failed request to share"));
+	case EZFS_POOLPROPS:
+		return (dgettext(TEXT_DOMAIN, "failed to retrieve "
+		    "pool properties"));
+	case EZFS_POOL_NOTSUP:
+		return (dgettext(TEXT_DOMAIN, "operation not supported "
+		    "on this type of pool"));
+	case EZFS_POOL_INVALARG:
+		return (dgettext(TEXT_DOMAIN, "invalid argument for "
+		    "this pool operation"));
+	case EZFS_NAMETOOLONG:
+		return (dgettext(TEXT_DOMAIN, "dataset name is too long"));
+	case EZFS_OPENFAILED:
+		return (dgettext(TEXT_DOMAIN, "open failed"));
+	case EZFS_NOCAP:
+		return (dgettext(TEXT_DOMAIN,
+		    "disk capacity information could not be retrieved"));
+	case EZFS_LABELFAILED:
+		return (dgettext(TEXT_DOMAIN, "write of label failed"));
+	case EZFS_BADWHO:
+		return (dgettext(TEXT_DOMAIN, "invalid user/group"));
+	case EZFS_BADPERM:
+		return (dgettext(TEXT_DOMAIN, "invalid permission"));
+	case EZFS_BADPERMSET:
+		return (dgettext(TEXT_DOMAIN, "invalid permission set name"));
+	case EZFS_NODELEGATION:
+		return (dgettext(TEXT_DOMAIN, "delegated administration is "
+		    "disabled on pool"));
+	case EZFS_PERMRDONLY:
+		return (dgettext(TEXT_DOMAIN, "snapshot permissions cannot be"
+		    " modified"));
+	case EZFS_BADCACHE:
+		return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
+	case EZFS_ISL2CACHE:
+		return (dgettext(TEXT_DOMAIN, "device is in use as a cache"));
+	case EZFS_VDEVNOTSUP:
+		return (dgettext(TEXT_DOMAIN, "vdev specification is not "
+		    "supported"));
+	case EZFS_NOTSUP:
+		return (dgettext(TEXT_DOMAIN, "operation not supported "
+		    "on this dataset"));
+	case EZFS_ACTIVE_SPARE:
+		return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
+		    "device"));
+	case EZFS_UNKNOWN:
+		return (dgettext(TEXT_DOMAIN, "unknown error"));
+	default:
+		assert(hdl->libzfs_error == 0);
+		return (dgettext(TEXT_DOMAIN, "no error"));
+	}
+}
+
+/*PRINTFLIKE2*/
+void
+zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	(void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
+	    fmt, ap);
+	hdl->libzfs_desc_active = 1;
+
+	va_end(ap);
+}
+
+static void
+zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
+{
+	(void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
+	    fmt, ap);
+	hdl->libzfs_error = error;
+
+	if (hdl->libzfs_desc_active)
+		hdl->libzfs_desc_active = 0;
+	else
+		hdl->libzfs_desc[0] = '\0';
+
+	if (hdl->libzfs_printerr) {
+		if (error == EZFS_UNKNOWN) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
+			    "error: %s\n"), libzfs_error_description(hdl));
+			abort();
+		}
+
+		(void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
+		    libzfs_error_description(hdl));
+		if (error == EZFS_NOMEM)
+			exit(1);
+	}
+}
+
+int
+zfs_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zfs_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	zfs_verror(hdl, error, fmt, ap);
+
+	va_end(ap);
+
+	return (-1);
+}
+
+static int
+zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
+    va_list ap)
+{
+	switch (error) {
+	case EPERM:
+	case EACCES:
+		zfs_verror(hdl, EZFS_PERM, fmt, ap);
+		return (-1);
+
+	case ECANCELED:
+		zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap);
+		return (-1);
+
+	case EIO:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		return (-1);
+
+	case EINTR:
+		zfs_verror(hdl, EZFS_INTR, fmt, ap);
+		return (-1);
+	}
+
+	return (0);
+}
+
+int
+zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zfs_standard_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+	switch (error) {
+	case ENXIO:
+	case ENODEV:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset does not exist"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is busy"));
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+		break;
+	case EROFS:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot permissions cannot be modified"));
+		zfs_verror(hdl, EZFS_PERMRDONLY, fmt, ap);
+		break;
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
+		break;
+	case ENOTSUP:
+		zfs_verror(hdl, EZFS_BADVERSION, fmt, ap);
+		break;
+	default:
+		zfs_error_aux(hdl, strerror(errno));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+		break;
+	}
+
+	va_end(ap);
+	return (-1);
+}
+
+int
+zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
+{
+	return (zpool_standard_error_fmt(hdl, error, "%s", msg));
+}
+
+/*PRINTFLIKE3*/
+int
+zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+	switch (error) {
+	case ENODEV:
+		zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+		break;
+
+	case ENXIO:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "one or more devices is currently unavailable"));
+		zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
+		break;
+
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
+		break;
+
+	case ENOTSUP:
+		zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap);
+		break;
+
+	case EINVAL:
+		zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap);
+		break;
+
+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
+	default:
+		zfs_error_aux(hdl, strerror(error));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+	}
+
+	va_end(ap);
+	return (-1);
+}
+
+/*
+ * Display an out of memory error message and abort the current program.
+ */
+int
+no_memory(libzfs_handle_t *hdl)
+{
+	return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
+}
+
+/*
+ * A safe form of malloc() which will die if the allocation fails.
+ */
+void *
+zfs_alloc(libzfs_handle_t *hdl, size_t size)
+{
+	void *data;
+
+	if ((data = calloc(1, size)) == NULL)
+		(void) no_memory(hdl);
+
+	return (data);
+}
+
+/*
+ * A safe form of realloc(), which also zeroes newly allocated space.
+ */
+void *
+zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize)
+{
+	void *ret;
+
+	if ((ret = realloc(ptr, newsize)) == NULL) {
+		(void) no_memory(hdl);
+		free(ptr);
+		return (NULL);
+	}
+
+	bzero((char *)ret + oldsize, (newsize - oldsize));
+	return (ret);
+}
+
+/*
+ * A safe form of strdup() which will die if the allocation fails.
+ */
+char *
+zfs_strdup(libzfs_handle_t *hdl, const char *str)
+{
+	char *ret;
+
+	if ((ret = strdup(str)) == NULL)
+		(void) no_memory(hdl);
+
+	return (ret);
+}
+
+/*
+ * Convert a number to an appropriately human-readable output.
+ */
+void
+zfs_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+	uint64_t n = num;
+	int index = 0;
+	char u;
+
+	while (n >= 1024) {
+		n /= 1024;
+		index++;
+	}
+
+	u = " KMGTPE"[index];
+
+	if (index == 0) {
+		(void) snprintf(buf, buflen, "%llu", n);
+	} else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
+		/*
+		 * If this is an even multiple of the base, always display
+		 * without any decimal precision.
+		 */
+		(void) snprintf(buf, buflen, "%llu%c", n, u);
+	} else {
+		/*
+		 * We want to choose a precision that reflects the best choice
+		 * for fitting in 5 characters.  This can get rather tricky when
+		 * we have numbers that are very close to an order of magnitude.
+		 * For example, when displaying 10239 (which is really 9.999K),
+		 * we want only a single place of precision for 10.0K.  We could
+		 * develop some complex heuristics for this, but it's much
+		 * easier just to try each combination in turn.
+		 */
+		int i;
+		for (i = 2; i >= 0; i--) {
+			if (snprintf(buf, buflen, "%.*f%c", i,
+			    (double)num / (1ULL << 10 * index), u) <= 5)
+				break;
+		}
+	}
+}
+
+void
+libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
+{
+	hdl->libzfs_printerr = printerr;
+}
+
+libzfs_handle_t *
+libzfs_init(void)
+{
+	libzfs_handle_t *hdl;
+
+	if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+		free(hdl);
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+		(void) close(hdl->libzfs_fd);
+		free(hdl);
+		return (NULL);
+	}
+
+	hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r");
+
+	zfs_prop_init();
+	zpool_prop_init();
+
+	return (hdl);
+}
+
+void
+libzfs_fini(libzfs_handle_t *hdl)
+{
+	(void) close(hdl->libzfs_fd);
+	if (hdl->libzfs_mnttab)
+		(void) fclose(hdl->libzfs_mnttab);
+	if (hdl->libzfs_sharetab)
+		(void) fclose(hdl->libzfs_sharetab);
+	zfs_uninit_libshare(hdl);
+	if (hdl->libzfs_log_str)
+		(void) free(hdl->libzfs_log_str);
+	zpool_free_handles(hdl);
+	namespace_clear(hdl);
+	free(hdl);
+}
+
+libzfs_handle_t *
+zpool_get_handle(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
+libzfs_handle_t *
+zfs_get_handle(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_hdl);
+}
+
+zpool_handle_t *
+zfs_get_pool_handle(const zfs_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
+/*
+ * Given a name, determine whether or not it's a valid path
+ * (starts with '/' or "./").  If so, walk the mnttab trying
+ * to match the device number.  If not, treat the path as an
+ * fs/vol/snap name.
+ */
+zfs_handle_t *
+zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
+{
+	struct stat64 statbuf;
+	struct extmnttab entry;
+	int ret;
+
+	if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
+		/*
+		 * It's not a valid path, assume it's a name of type 'argtype'.
+		 */
+		return (zfs_open(hdl, path, argtype));
+	}
+
+	if (stat64(path, &statbuf) != 0) {
+		(void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
+		return (NULL);
+	}
+
+	rewind(hdl->libzfs_mnttab);
+	while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) {
+		if (makedevice(entry.mnt_major, entry.mnt_minor) ==
+		    statbuf.st_dev) {
+			break;
+		}
+	}
+	if (ret != 0) {
+		return (NULL);
+	}
+
+	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
+		(void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
+		    path);
+		return (NULL);
+	}
+
+	return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM));
+}
+
+/*
+ * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from
+ * an ioctl().
+ */
+int
+zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
+{
+	if (len == 0)
+		len = 2048;
+	zc->zc_nvlist_dst_size = len;
+	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Called when an ioctl() which returns an nvlist fails with ENOMEM.  This will
+ * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was
+ * filled in by the kernel to indicate the actual required size.
+ */
+int
+zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+	free((void *)(uintptr_t)zc->zc_nvlist_dst);
+	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc->zc_nvlist_dst_size))
+	    == NULL)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Called to free the src and dst nvlists stored in the command structure.
+ */
+void
+zcmd_free_nvlists(zfs_cmd_t *zc)
+{
+	free((void *)(uintptr_t)zc->zc_nvlist_conf);
+	free((void *)(uintptr_t)zc->zc_nvlist_src);
+	free((void *)(uintptr_t)zc->zc_nvlist_dst);
+}
+
+static int
+zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen,
+    nvlist_t *nvl)
+{
+	char *packed;
+	size_t len;
+
+	verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0);
+
+	if ((packed = zfs_alloc(hdl, len)) == NULL)
+		return (-1);
+
+	verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
+
+	*outnv = (uint64_t)(uintptr_t)packed;
+	*outlen = len;
+
+	return (0);
+}
+
+int
+zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf,
+	    &zc->zc_nvlist_conf_size, nvl));
+}
+
+int
+zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
+{
+	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src,
+	    &zc->zc_nvlist_src_size, nvl));
+}
+
+/*
+ * Unpacks an nvlist from the ZFS ioctl command structure.
+ */
+int
+zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
+{
+	if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst,
+	    zc->zc_nvlist_dst_size, nvlp, 0) != 0)
+		return (no_memory(hdl));
+
+	return (0);
+}
+
+int
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
+{
+	int error;
+
+	zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str;
+	error = ioctl(hdl->libzfs_fd, request, zc);
+	if (hdl->libzfs_log_str) {
+		free(hdl->libzfs_log_str);
+		hdl->libzfs_log_str = NULL;
+	}
+	zc->zc_history = 0;
+
+	return (error);
+}
+
+/*
+ * ================================================================
+ * API shared by zfs and zpool property management
+ * ================================================================
+ */
+
+static void
+zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
+{
+	zprop_list_t *pl = cbp->cb_proplist;
+	int i;
+	char *title;
+	size_t len;
+
+	cbp->cb_first = B_FALSE;
+	if (cbp->cb_scripted)
+		return;
+
+	/*
+	 * Start with the length of the column headers.
+	 */
+	cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME"));
+	cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN,
+	    "PROPERTY"));
+	cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
+	    "VALUE"));
+	cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
+	    "SOURCE"));
+
+	/*
+	 * Go through and calculate the widths for each column.  For the
+	 * 'source' column, we kludge it up by taking the worst-case scenario of
+	 * inheriting from the longest name.  This is acceptable because in the
+	 * majority of cases 'SOURCE' is the last column displayed, and we don't
+	 * use the width anyway.  Note that the 'VALUE' column can be oversized,
+	 * if the name of the property is much longer the any values we find.
+	 */
+	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
+		/*
+		 * 'PROPERTY' column
+		 */
+		if (pl->pl_prop != ZPROP_INVAL) {
+			const char *propname = (type == ZFS_TYPE_POOL) ?
+			    zpool_prop_to_name(pl->pl_prop) :
+			    zfs_prop_to_name(pl->pl_prop);
+
+			len = strlen(propname);
+			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
+				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
+		} else {
+			len = strlen(pl->pl_user_prop);
+			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
+				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
+		}
+
+		/*
+		 * 'VALUE' column
+		 */
+		if ((pl->pl_prop != ZFS_PROP_NAME || !pl->pl_all) &&
+		    pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
+			cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
+
+		/*
+		 * 'NAME' and 'SOURCE' columns
+		 */
+		if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME :
+		    ZFS_PROP_NAME) &&
+		    pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) {
+			cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width;
+			cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width +
+			    strlen(dgettext(TEXT_DOMAIN, "inherited from"));
+		}
+	}
+
+	/*
+	 * Now go through and print the headers.
+	 */
+	for (i = 0; i < 4; i++) {
+		switch (cbp->cb_columns[i]) {
+		case GET_COL_NAME:
+			title = dgettext(TEXT_DOMAIN, "NAME");
+			break;
+		case GET_COL_PROPERTY:
+			title = dgettext(TEXT_DOMAIN, "PROPERTY");
+			break;
+		case GET_COL_VALUE:
+			title = dgettext(TEXT_DOMAIN, "VALUE");
+			break;
+		case GET_COL_SOURCE:
+			title = dgettext(TEXT_DOMAIN, "SOURCE");
+			break;
+		default:
+			title = NULL;
+		}
+
+		if (title != NULL) {
+			if (i == 3 || cbp->cb_columns[i + 1] == 0)
+				(void) printf("%s", title);
+			else
+				(void) printf("%-*s  ",
+				    cbp->cb_colwidths[cbp->cb_columns[i]],
+				    title);
+		}
+	}
+	(void) printf("\n");
+}
+
+/*
+ * Display a single line of output, according to the settings in the callback
+ * structure.
+ */
+void
+zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
+    const char *propname, const char *value, zprop_source_t sourcetype,
+    const char *source)
+{
+	int i;
+	const char *str;
+	char buf[128];
+
+	/*
+	 * Ignore those source types that the user has chosen to ignore.
+	 */
+	if ((sourcetype & cbp->cb_sources) == 0)
+		return;
+
+	if (cbp->cb_first)
+		zprop_print_headers(cbp, cbp->cb_type);
+
+	for (i = 0; i < 4; i++) {
+		switch (cbp->cb_columns[i]) {
+		case GET_COL_NAME:
+			str = name;
+			break;
+
+		case GET_COL_PROPERTY:
+			str = propname;
+			break;
+
+		case GET_COL_VALUE:
+			str = value;
+			break;
+
+		case GET_COL_SOURCE:
+			switch (sourcetype) {
+			case ZPROP_SRC_NONE:
+				str = "-";
+				break;
+
+			case ZPROP_SRC_DEFAULT:
+				str = "default";
+				break;
+
+			case ZPROP_SRC_LOCAL:
+				str = "local";
+				break;
+
+			case ZPROP_SRC_TEMPORARY:
+				str = "temporary";
+				break;
+
+			case ZPROP_SRC_INHERITED:
+				(void) snprintf(buf, sizeof (buf),
+				    "inherited from %s", source);
+				str = buf;
+				break;
+			}
+			break;
+
+		default:
+			continue;
+		}
+
+		if (cbp->cb_columns[i + 1] == 0)
+			(void) printf("%s", str);
+		else if (cbp->cb_scripted)
+			(void) printf("%s\t", str);
+		else
+			(void) printf("%-*s  ",
+			    cbp->cb_colwidths[cbp->cb_columns[i]],
+			    str);
+
+	}
+
+	(void) printf("\n");
+}
+
+/*
+ * Given a numeric suffix, convert the value into a number of bits that the
+ * resulting value must be shifted.
+ */
+static int
+str2shift(libzfs_handle_t *hdl, const char *buf)
+{
+	const char *ends = "BKMGTPEZ";
+	int i;
+
+	if (buf[0] == '\0')
+		return (0);
+	for (i = 0; i < strlen(ends); i++) {
+		if (toupper(buf[0]) == ends[i])
+			break;
+	}
+	if (i == strlen(ends)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid numeric suffix '%s'"), buf);
+		return (-1);
+	}
+
+	/*
+	 * We want to allow trailing 'b' characters for 'GB' or 'Mb'.  But don't
+	 * allow 'BB' - that's just weird.
+	 */
+	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
+	    toupper(buf[0]) != 'B'))
+		return (10*i);
+
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "invalid numeric suffix '%s'"), buf);
+	return (-1);
+}
+
+/*
+ * Convert a string of the form '100G' into a real number.  Used when setting
+ * properties or creating a volume.  'buf' is used to place an extended error
+ * message for the caller to use.
+ */
+int
+zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
+{
+	char *end;
+	int shift;
+
+	*num = 0;
+
+	/* Check to see if this looks like a number.  */
+	if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "bad numeric value '%s'"), value);
+		return (-1);
+	}
+
+	/* Rely on stroll() to process the numeric portion.  */
+	errno = 0;
+	*num = strtoll(value, &end, 10);
+
+	/*
+	 * Check for ERANGE, which indicates that the value is too large to fit
+	 * in a 64-bit value.
+	 */
+	if (errno == ERANGE) {
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "numeric value is too large"));
+		return (-1);
+	}
+
+	/*
+	 * If we have a decimal value, then do the computation with floating
+	 * point arithmetic.  Otherwise, use standard arithmetic.
+	 */
+	if (*end == '.') {
+		double fval = strtod(value, &end);
+
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		fval *= pow(2, shift);
+
+		if (fval > UINT64_MAX) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num = (uint64_t)fval;
+	} else {
+		if ((shift = str2shift(hdl, end)) == -1)
+			return (-1);
+
+		/* Check for overflow */
+		if (shift >= 64 || (*num << shift) >> shift != *num) {
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
+			return (-1);
+		}
+
+		*num <<= shift;
+	}
+
+	return (0);
+}
+
+/*
+ * Given a propname=value nvpair to set, parse any numeric properties
+ * (index, boolean, etc) if they are specified as strings and add the
+ * resulting nvpair to the returned nvlist.
+ *
+ * At the DSL layer, all properties are either 64-bit numbers or strings.
+ * We want the user to be able to ignore this fact and specify properties
+ * as native values (numbers, for example) or as strings (to simplify
+ * command line utilities).  This also handles converting index types
+ * (compression, checksum, etc) from strings to their on-disk index.
+ */
+int
+zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
+    zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp,
+    const char *errbuf)
+{
+	data_type_t datatype = nvpair_type(elem);
+	zprop_type_t proptype;
+	const char *propname;
+	char *value;
+	boolean_t isnone = B_FALSE;
+
+	if (type == ZFS_TYPE_POOL) {
+		proptype = zpool_prop_get_type(prop);
+		propname = zpool_prop_to_name(prop);
+	} else {
+		proptype = zfs_prop_get_type(prop);
+		propname = zfs_prop_to_name(prop);
+	}
+
+	/*
+	 * Convert any properties to the internal DSL value types.
+	 */
+	*svalp = NULL;
+	*ivalp = 0;
+
+	switch (proptype) {
+	case PROP_TYPE_STRING:
+		if (datatype != DATA_TYPE_STRING) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a string"), nvpair_name(elem));
+			goto error;
+		}
+		(void) nvpair_value_string(elem, svalp);
+		if (strlen(*svalp) >= ZFS_MAXPROPLEN) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' is too long"), nvpair_name(elem));
+			goto error;
+		}
+		break;
+
+	case PROP_TYPE_NUMBER:
+		if (datatype == DATA_TYPE_STRING) {
+			(void) nvpair_value_string(elem, &value);
+			if (strcmp(value, "none") == 0) {
+				isnone = B_TRUE;
+			} else if (zfs_nicestrtonum(hdl, value, ivalp)
+			    != 0) {
+				goto error;
+			}
+		} else if (datatype == DATA_TYPE_UINT64) {
+			(void) nvpair_value_uint64(elem, ivalp);
+		} else {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a number"), nvpair_name(elem));
+			goto error;
+		}
+
+		/*
+		 * Quota special: force 'none' and don't allow 0.
+		 */
+		if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone &&
+		    (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "use 'none' to disable quota/refquota"));
+			goto error;
+		}
+		break;
+
+	case PROP_TYPE_INDEX:
+		if (datatype != DATA_TYPE_STRING) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be a string"), nvpair_name(elem));
+			goto error;
+		}
+
+		(void) nvpair_value_string(elem, &value);
+
+		if (zprop_string_to_index(prop, value, ivalp, type) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "'%s' must be one of '%s'"), propname,
+			    zprop_values(prop, type));
+			goto error;
+		}
+		break;
+
+	default:
+		abort();
+	}
+
+	/*
+	 * Add the result to our return set of properties.
+	 */
+	if (*svalp != NULL) {
+		if (nvlist_add_string(ret, propname, *svalp) != 0) {
+			(void) no_memory(hdl);
+			return (-1);
+		}
+	} else {
+		if (nvlist_add_uint64(ret, propname, *ivalp) != 0) {
+			(void) no_memory(hdl);
+			return (-1);
+		}
+	}
+
+	return (0);
+error:
+	(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+	return (-1);
+}
+
+static int
+addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
+    zfs_type_t type)
+{
+	int prop;
+	zprop_list_t *entry;
+
+	prop = zprop_name_to_prop(propname, type);
+
+	if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type))
+		prop = ZPROP_INVAL;
+
+	/*
+	 * When no property table entry can be found, return failure if
+	 * this is a pool property or if this isn't a user-defined
+	 * dataset property,
+	 */
+	if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
+	    !zfs_prop_user(propname))) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid property '%s'"), propname);
+		return (zfs_error(hdl, EZFS_BADPROP,
+		    dgettext(TEXT_DOMAIN, "bad property list")));
+	}
+
+	if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
+		return (-1);
+
+	entry->pl_prop = prop;
+	if (prop == ZPROP_INVAL) {
+		if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) {
+			free(entry);
+			return (-1);
+		}
+		entry->pl_width = strlen(propname);
+	} else {
+		entry->pl_width = zprop_width(prop, &entry->pl_fixed,
+		    type);
+	}
+
+	*listp = entry;
+
+	return (0);
+}
+
+/*
+ * Given a comma-separated list of properties, construct a property list
+ * containing both user-defined and native properties.  This function will
+ * return a NULL list if 'all' is specified, which can later be expanded
+ * by zprop_expand_list().
+ */
+int
+zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp,
+    zfs_type_t type)
+{
+	*listp = NULL;
+
+	/*
+	 * If 'all' is specified, return a NULL list.
+	 */
+	if (strcmp(props, "all") == 0)
+		return (0);
+
+	/*
+	 * If no props were specified, return an error.
+	 */
+	if (props[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no properties specified"));
+		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+		    "bad property list")));
+	}
+
+	/*
+	 * It would be nice to use getsubopt() here, but the inclusion of column
+	 * aliases makes this more effort than it's worth.
+	 */
+	while (*props != '\0') {
+		size_t len;
+		char *p;
+		char c;
+
+		if ((p = strchr(props, ',')) == NULL) {
+			len = strlen(props);
+			p = props + len;
+		} else {
+			len = p - props;
+		}
+
+		/*
+		 * Check for empty options.
+		 */
+		if (len == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "empty property name"));
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad property list")));
+		}
+
+		/*
+		 * Check all regular property names.
+		 */
+		c = props[len];
+		props[len] = '\0';
+
+		if (strcmp(props, "space") == 0) {
+			static char *spaceprops[] = {
+				"name", "avail", "used", "usedbysnapshots",
+				"usedbydataset", "usedbyrefreservation",
+				"usedbychildren", NULL
+			};
+			int i;
+
+			for (i = 0; spaceprops[i]; i++) {
+				if (addlist(hdl, spaceprops[i], listp, type))
+					return (-1);
+				listp = &(*listp)->pl_next;
+			}
+		} else {
+			if (addlist(hdl, props, listp, type))
+				return (-1);
+			listp = &(*listp)->pl_next;
+		}
+
+		props = p;
+		if (c == ',')
+			props++;
+	}
+
+	return (0);
+}
+
+void
+zprop_free_list(zprop_list_t *pl)
+{
+	zprop_list_t *next;
+
+	while (pl != NULL) {
+		next = pl->pl_next;
+		free(pl->pl_user_prop);
+		free(pl);
+		pl = next;
+	}
+}
+
+typedef struct expand_data {
+	zprop_list_t	**last;
+	libzfs_handle_t	*hdl;
+	zfs_type_t type;
+} expand_data_t;
+
+int
+zprop_expand_list_cb(int prop, void *cb)
+{
+	zprop_list_t *entry;
+	expand_data_t *edp = cb;
+
+	if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL)
+		return (ZPROP_INVAL);
+
+	entry->pl_prop = prop;
+	entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type);
+	entry->pl_all = B_TRUE;
+
+	*(edp->last) = entry;
+	edp->last = &entry->pl_next;
+
+	return (ZPROP_CONT);
+}
+
+int
+zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type)
+{
+	zprop_list_t *entry;
+	zprop_list_t **last;
+	expand_data_t exp;
+
+	if (*plp == NULL) {
+		/*
+		 * If this is the very first time we've been called for an 'all'
+		 * specification, expand the list to include all native
+		 * properties.
+		 */
+		last = plp;
+
+		exp.last = last;
+		exp.hdl = hdl;
+		exp.type = type;
+
+		if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE,
+		    B_FALSE, type) == ZPROP_INVAL)
+			return (-1);
+
+		/*
+		 * Add 'name' to the beginning of the list, which is handled
+		 * specially.
+		 */
+		if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
+			return (-1);
+
+		entry->pl_prop = (type == ZFS_TYPE_POOL) ?  ZPOOL_PROP_NAME :
+		    ZFS_PROP_NAME;
+		entry->pl_width = zprop_width(entry->pl_prop,
+		    &entry->pl_fixed, type);
+		entry->pl_all = B_TRUE;
+		entry->pl_next = *plp;
+		*plp = entry;
+	}
+	return (0);
+}
+
+int
+zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered,
+    zfs_type_t type)
+{
+	return (zprop_iter_common(func, cb, show_all, ordered, type));
+}
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
new file mode 100644
index 000000000..fe817cc64
--- /dev/null
+++ b/lib/libzpool/kernel.c
@@ -0,0 +1,885 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <sys/spa.h>
+#include <sys/stat.h>
+#include <sys/processor.h>
+#include <sys/zfs_context.h>
+#include <sys/zmod.h>
+#include <sys/utsname.h>
+
+/*
+ * Emulation of kernel services in userland.
+ */
+
+uint64_t physmem;
+vnode_t *rootdir = (vnode_t *)0xabcd1234;
+char hw_serial[11];
+
+struct utsname utsname = {
+	"userland", "libzpool", "1", "1", "na"
+};
+
+/*
+ * =========================================================================
+ * threads
+ * =========================================================================
+ */
+/*ARGSUSED*/
+kthread_t *
+zk_thread_create(void (*func)(), void *arg)
+{
+	thread_t tid;
+
+	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
+	    &tid) == 0);
+
+	return ((void *)(uintptr_t)tid);
+}
+
+/*
+ * =========================================================================
+ * kstats
+ * =========================================================================
+ */
+/*ARGSUSED*/
+kstat_t *
+kstat_create(char *module, int instance, char *name, char *class,
+    uchar_t type, ulong_t ndata, uchar_t ks_flag)
+{
+	return (NULL);
+}
+
+/*ARGSUSED*/
+void
+kstat_install(kstat_t *ksp)
+{}
+
+/*ARGSUSED*/
+void
+kstat_delete(kstat_t *ksp)
+{}
+
+/*
+ * =========================================================================
+ * mutexes
+ * =========================================================================
+ */
+void
+zmutex_init(kmutex_t *mp)
+{
+	mp->m_owner = NULL;
+	mp->initialized = B_TRUE;
+	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
+}
+
+void
+zmutex_destroy(kmutex_t *mp)
+{
+	ASSERT(mp->initialized == B_TRUE);
+	ASSERT(mp->m_owner == NULL);
+	(void) _mutex_destroy(&(mp)->m_lock);
+	mp->m_owner = (void *)-1UL;
+	mp->initialized = B_FALSE;
+}
+
+void
+mutex_enter(kmutex_t *mp)
+{
+	ASSERT(mp->initialized == B_TRUE);
+	ASSERT(mp->m_owner != (void *)-1UL);
+	ASSERT(mp->m_owner != curthread);
+	VERIFY(mutex_lock(&mp->m_lock) == 0);
+	ASSERT(mp->m_owner == NULL);
+	mp->m_owner = curthread;
+}
+
+int
+mutex_tryenter(kmutex_t *mp)
+{
+	ASSERT(mp->initialized == B_TRUE);
+	ASSERT(mp->m_owner != (void *)-1UL);
+	if (0 == mutex_trylock(&mp->m_lock)) {
+		ASSERT(mp->m_owner == NULL);
+		mp->m_owner = curthread;
+		return (1);
+	} else {
+		return (0);
+	}
+}
+
+void
+mutex_exit(kmutex_t *mp)
+{
+	ASSERT(mp->initialized == B_TRUE);
+	ASSERT(mutex_owner(mp) == curthread);
+	mp->m_owner = NULL;
+	VERIFY(mutex_unlock(&mp->m_lock) == 0);
+}
+
+void *
+mutex_owner(kmutex_t *mp)
+{
+	ASSERT(mp->initialized == B_TRUE);
+	return (mp->m_owner);
+}
+
+/*
+ * =========================================================================
+ * rwlocks
+ * =========================================================================
+ */
+/*ARGSUSED*/
+void
+rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
+{
+	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
+	rwlp->rw_owner = NULL;
+	rwlp->initialized = B_TRUE;
+}
+
+void
+rw_destroy(krwlock_t *rwlp)
+{
+	rwlock_destroy(&rwlp->rw_lock);
+	rwlp->rw_owner = (void *)-1UL;
+	rwlp->initialized = B_FALSE;
+}
+
+void
+rw_enter(krwlock_t *rwlp, krw_t rw)
+{
+	ASSERT(!RW_LOCK_HELD(rwlp));
+	ASSERT(rwlp->initialized == B_TRUE);
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+	ASSERT(rwlp->rw_owner != curthread);
+
+	if (rw == RW_READER)
+		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
+	else
+		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
+
+	rwlp->rw_owner = curthread;
+}
+
+void
+rw_exit(krwlock_t *rwlp)
+{
+	ASSERT(rwlp->initialized == B_TRUE);
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+
+	rwlp->rw_owner = NULL;
+	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
+}
+
+int
+rw_tryenter(krwlock_t *rwlp, krw_t rw)
+{
+	int rv;
+
+	ASSERT(rwlp->initialized == B_TRUE);
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+
+	if (rw == RW_READER)
+		rv = rw_tryrdlock(&rwlp->rw_lock);
+	else
+		rv = rw_trywrlock(&rwlp->rw_lock);
+
+	if (rv == 0) {
+		rwlp->rw_owner = curthread;
+		return (1);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+rw_tryupgrade(krwlock_t *rwlp)
+{
+	ASSERT(rwlp->initialized == B_TRUE);
+	ASSERT(rwlp->rw_owner != (void *)-1UL);
+
+	return (0);
+}
+
+/*
+ * =========================================================================
+ * condition variables
+ * =========================================================================
+ */
+/*ARGSUSED*/
+void
+cv_init(kcondvar_t *cv, char *name, int type, void *arg)
+{
+	VERIFY(cond_init(cv, type, NULL) == 0);
+}
+
+void
+cv_destroy(kcondvar_t *cv)
+{
+	VERIFY(cond_destroy(cv) == 0);
+}
+
+void
+cv_wait(kcondvar_t *cv, kmutex_t *mp)
+{
+	ASSERT(mutex_owner(mp) == curthread);
+	mp->m_owner = NULL;
+	int ret = cond_wait(cv, &mp->m_lock);
+	VERIFY(ret == 0 || ret == EINTR);
+	mp->m_owner = curthread;
+}
+
+clock_t
+cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
+{
+	int error;
+	timestruc_t ts;
+	clock_t delta;
+
+top:
+	delta = abstime - lbolt;
+	if (delta <= 0)
+		return (-1);
+
+	ts.tv_sec = delta / hz;
+	ts.tv_nsec = (delta % hz) * (NANOSEC / hz);
+
+	ASSERT(mutex_owner(mp) == curthread);
+	mp->m_owner = NULL;
+	error = cond_reltimedwait(cv, &mp->m_lock, &ts);
+	mp->m_owner = curthread;
+
+	if (error == ETIME)
+		return (-1);
+
+	if (error == EINTR)
+		goto top;
+
+	ASSERT(error == 0);
+
+	return (1);
+}
+
+void
+cv_signal(kcondvar_t *cv)
+{
+	VERIFY(cond_signal(cv) == 0);
+}
+
+void
+cv_broadcast(kcondvar_t *cv)
+{
+	VERIFY(cond_broadcast(cv) == 0);
+}
+
+/*
+ * =========================================================================
+ * vnode operations
+ * =========================================================================
+ */
+/*
+ * Note: for the xxxat() versions of these functions, we assume that the
+ * starting vp is always rootdir (which is true for spa_directory.c, the only
+ * ZFS consumer of these interfaces).  We assert this is true, and then emulate
+ * them by adding '/' in front of the path.
+ */
+
+/*ARGSUSED*/
+int
+vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
+{
+	int fd;
+	vnode_t *vp;
+	int old_umask;
+	char realpath[MAXPATHLEN];
+	struct stat64 st;
+
+	/*
+	 * If we're accessing a real disk from userland, we need to use
+	 * the character interface to avoid caching.  This is particularly
+	 * important if we're trying to look at a real in-kernel storage
+	 * pool from userland, e.g. via zdb, because otherwise we won't
+	 * see the changes occurring under the segmap cache.
+	 * On the other hand, the stupid character device returns zero
+	 * for its size.  So -- gag -- we open the block device to get
+	 * its size, and remember it for subsequent VOP_GETATTR().
+	 */
+	if (strncmp(path, "/dev/", 5) == 0) {
+		char *dsk;
+		fd = open64(path, O_RDONLY);
+		if (fd == -1)
+			return (errno);
+		if (fstat64(fd, &st) == -1) {
+			close(fd);
+			return (errno);
+		}
+		close(fd);
+		(void) sprintf(realpath, "%s", path);
+		dsk = strstr(path, "/dsk/");
+		if (dsk != NULL)
+			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
+			    dsk + 1);
+	} else {
+		(void) sprintf(realpath, "%s", path);
+		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
+			return (errno);
+	}
+
+	if (flags & FCREAT)
+		old_umask = umask(0);
+
+	/*
+	 * The construct 'flags - FREAD' conveniently maps combinations of
+	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
+	 */
+	fd = open64(realpath, flags - FREAD, mode);
+
+	if (flags & FCREAT)
+		(void) umask(old_umask);
+
+	if (fd == -1)
+		return (errno);
+
+	if (fstat64(fd, &st) == -1) {
+		close(fd);
+		return (errno);
+	}
+
+	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
+
+	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
+
+	vp->v_fd = fd;
+	vp->v_size = st.st_size;
+	vp->v_path = spa_strdup(path);
+
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
+    int x3, vnode_t *startvp, int fd)
+{
+	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
+	int ret;
+
+	ASSERT(startvp == rootdir);
+	(void) sprintf(realpath, "/%s", path);
+
+	/* fd ignored for now, need if want to simulate nbmand support */
+	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
+
+	umem_free(realpath, strlen(path) + 2);
+
+	return (ret);
+}
+
+/*ARGSUSED*/
+int
+vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
+	int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
+{
+	ssize_t iolen, split;
+
+	if (uio == UIO_READ) {
+		iolen = pread64(vp->v_fd, addr, len, offset);
+	} else {
+		/*
+		 * To simulate partial disk writes, we split writes into two
+		 * system calls so that the process can be killed in between.
+		 */
+		split = (len > 0 ? rand() % len : 0);
+		iolen = pwrite64(vp->v_fd, addr, split, offset);
+		iolen += pwrite64(vp->v_fd, (char *)addr + split,
+		    len - split, offset + split);
+	}
+
+	if (iolen == -1)
+		return (errno);
+	if (residp)
+		*residp = len - iolen;
+	else if (iolen != len)
+		return (EIO);
+	return (0);
+}
+
+void
+vn_close(vnode_t *vp)
+{
+	close(vp->v_fd);
+	spa_strfree(vp->v_path);
+	umem_free(vp, sizeof (vnode_t));
+}
+
+#ifdef ZFS_DEBUG
+
+/*
+ * =========================================================================
+ * Figure out which debugging statements to print
+ * =========================================================================
+ */
+
+static char *dprintf_string;
+static int dprintf_print_all;
+
+int
+dprintf_find_string(const char *string)
+{
+	char *tmp_str = dprintf_string;
+	int len = strlen(string);
+
+	/*
+	 * Find out if this is a string we want to print.
+	 * String format: file1.c,function_name1,file2.c,file3.c
+	 */
+
+	while (tmp_str != NULL) {
+		if (strncmp(tmp_str, string, len) == 0 &&
+		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
+			return (1);
+		tmp_str = strchr(tmp_str, ',');
+		if (tmp_str != NULL)
+			tmp_str++; /* Get rid of , */
+	}
+	return (0);
+}
+
+void
+dprintf_setup(int *argc, char **argv)
+{
+	int i, j;
+
+	/*
+	 * Debugging can be specified two ways: by setting the
+	 * environment variable ZFS_DEBUG, or by including a
+	 * "debug=..."  argument on the command line.  The command
+	 * line setting overrides the environment variable.
+	 */
+
+	for (i = 1; i < *argc; i++) {
+		int len = strlen("debug=");
+		/* First look for a command line argument */
+		if (strncmp("debug=", argv[i], len) == 0) {
+			dprintf_string = argv[i] + len;
+			/* Remove from args */
+			for (j = i; j < *argc; j++)
+				argv[j] = argv[j+1];
+			argv[j] = NULL;
+			(*argc)--;
+		}
+	}
+
+	if (dprintf_string == NULL) {
+		/* Look for ZFS_DEBUG environment variable */
+		dprintf_string = getenv("ZFS_DEBUG");
+	}
+
+	/*
+	 * Are we just turning on all debugging?
+	 */
+	if (dprintf_find_string("on"))
+		dprintf_print_all = 1;
+}
+
+/*
+ * =========================================================================
+ * debug printfs
+ * =========================================================================
+ */
+void
+__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
+{
+	const char *newfile;
+	va_list adx;
+
+	/*
+	 * Get rid of annoying "../common/" prefix to filename.
+	 */
+	newfile = strrchr(file, '/');
+	if (newfile != NULL) {
+		newfile = newfile + 1; /* Get rid of leading / */
+	} else {
+		newfile = file;
+	}
+
+	if (dprintf_print_all ||
+	    dprintf_find_string(newfile) ||
+	    dprintf_find_string(func)) {
+		/* Print out just the function name if requested */
+		flockfile(stdout);
+		if (dprintf_find_string("pid"))
+			(void) printf("%d ", getpid());
+		if (dprintf_find_string("tid"))
+			(void) printf("%u ", thr_self());
+		if (dprintf_find_string("cpu"))
+			(void) printf("%u ", getcpuid());
+		if (dprintf_find_string("time"))
+			(void) printf("%llu ", gethrtime());
+		if (dprintf_find_string("long"))
+			(void) printf("%s, line %d: ", newfile, line);
+		(void) printf("%s: ", func);
+		va_start(adx, fmt);
+		(void) vprintf(fmt, adx);
+		va_end(adx);
+		funlockfile(stdout);
+	}
+}
+
+#endif /* ZFS_DEBUG */
+
+/*
+ * =========================================================================
+ * cmn_err() and panic()
+ * =========================================================================
+ */
+static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
+static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
+
+void
+vpanic(const char *fmt, va_list adx)
+{
+	(void) fprintf(stderr, "error: ");
+	(void) vfprintf(stderr, fmt, adx);
+	(void) fprintf(stderr, "\n");
+
+	abort();	/* think of it as a "user-level crash dump" */
+}
+
+void
+panic(const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	vpanic(fmt, adx);
+	va_end(adx);
+}
+
+void
+vcmn_err(int ce, const char *fmt, va_list adx)
+{
+	if (ce == CE_PANIC)
+		vpanic(fmt, adx);
+	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
+		(void) fprintf(stderr, "%s", ce_prefix[ce]);
+		(void) vfprintf(stderr, fmt, adx);
+		(void) fprintf(stderr, "%s", ce_suffix[ce]);
+	}
+}
+
+/*PRINTFLIKE2*/
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	va_list adx;
+
+	va_start(adx, fmt);
+	vcmn_err(ce, fmt, adx);
+	va_end(adx);
+}
+
+/*
+ * =========================================================================
+ * kobj interfaces
+ * =========================================================================
+ */
+struct _buf *
+kobj_open_file(char *name)
+{
+	struct _buf *file;
+	vnode_t *vp;
+
+	/* set vp as the _fd field of the file */
+	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
+	    -1) != 0)
+		return ((void *)-1UL);
+
+	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
+	file->_fd = (intptr_t)vp;
+	return (file);
+}
+
+int
+kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
+{
+	ssize_t resid;
+
+	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
+	    UIO_SYSSPACE, 0, 0, 0, &resid);
+
+	return (size - resid);
+}
+
+void
+kobj_close_file(struct _buf *file)
+{
+	vn_close((vnode_t *)file->_fd);
+	umem_free(file, sizeof (struct _buf));
+}
+
+int
+kobj_get_filesize(struct _buf *file, uint64_t *size)
+{
+	struct stat64 st;
+	vnode_t *vp = (vnode_t *)file->_fd;
+
+	if (fstat64(vp->v_fd, &st) == -1) {
+		vn_close(vp);
+		return (errno);
+	}
+	*size = st.st_size;
+	return (0);
+}
+
+/*
+ * =========================================================================
+ * misc routines
+ * =========================================================================
+ */
+
+void
+delay(clock_t ticks)
+{
+	poll(0, 0, ticks * (1000 / hz));
+}
+
+/*
+ * Find highest one bit set.
+ *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
+ * High order bit is 31 (or 63 in _LP64 kernel).
+ */
+int
+highbit(ulong_t i)
+{
+	register int h = 1;
+
+	if (i == 0)
+		return (0);
+#ifdef _LP64
+	if (i & 0xffffffff00000000ul) {
+		h += 32; i >>= 32;
+	}
+#endif
+	if (i & 0xffff0000) {
+		h += 16; i >>= 16;
+	}
+	if (i & 0xff00) {
+		h += 8; i >>= 8;
+	}
+	if (i & 0xf0) {
+		h += 4; i >>= 4;
+	}
+	if (i & 0xc) {
+		h += 2; i >>= 2;
+	}
+	if (i & 0x2) {
+		h += 1;
+	}
+	return (h);
+}
+
+static int random_fd = -1, urandom_fd = -1;
+
+static int
+random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
+{
+	size_t resid = len;
+	ssize_t bytes;
+
+	ASSERT(fd != -1);
+
+	while (resid != 0) {
+		bytes = read(fd, ptr, resid);
+		ASSERT3S(bytes, >=, 0);
+		ptr += bytes;
+		resid -= bytes;
+	}
+
+	return (0);
+}
+
+int
+random_get_bytes(uint8_t *ptr, size_t len)
+{
+	return (random_get_bytes_common(ptr, len, random_fd));
+}
+
+int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+	return (random_get_bytes_common(ptr, len, urandom_fd));
+}
+
+int
+ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
+{
+	char *end;
+
+	*result = strtoul(hw_serial, &end, base);
+	if (*result == 0)
+		return (errno);
+	return (0);
+}
+
+/*
+ * =========================================================================
+ * kernel emulation setup & teardown
+ * =========================================================================
+ */
+static int
+umem_out_of_memory(void)
+{
+	char errmsg[] = "out of memory -- generating core dump\n";
+
+	write(fileno(stderr), errmsg, sizeof (errmsg));
+	abort();
+	return (0);
+}
+
+void
+kernel_init(int mode)
+{
+	umem_nofail_callback(umem_out_of_memory);
+
+	physmem = sysconf(_SC_PHYS_PAGES);
+
+	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
+	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
+
+	snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
+
+	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
+	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
+
+	system_taskq_init();
+
+	spa_init(mode);
+}
+
+void
+kernel_fini(void)
+{
+	spa_fini();
+
+	close(random_fd);
+	close(urandom_fd);
+
+	random_fd = -1;
+	urandom_fd = -1;
+}
+
+int
+z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
+{
+	int ret;
+	uLongf len = *dstlen;
+
+	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
+		*dstlen = (size_t)len;
+
+	return (ret);
+}
+
+int
+z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
+    int level)
+{
+	int ret;
+	uLongf len = *dstlen;
+
+	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
+		*dstlen = (size_t)len;
+
+	return (ret);
+}
+
+uid_t
+crgetuid(cred_t *cr)
+{
+	return (0);
+}
+
+gid_t
+crgetgid(cred_t *cr)
+{
+	return (0);
+}
+
+int
+crgetngroups(cred_t *cr)
+{
+	return (0);
+}
+
+gid_t *
+crgetgroups(cred_t *cr)
+{
+	return (NULL);
+}
+
+int
+zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
+{
+	return (0);
+}
+
+int
+zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
+{
+	return (0);
+}
+
+int
+zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
+{
+	return (0);
+}
+
+ksiddomain_t *
+ksid_lookupdomain(const char *dom)
+{
+	ksiddomain_t *kd;
+
+	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
+	kd->kd_name = spa_strdup(dom);
+	return (kd);
+}
+
+void
+ksiddomain_rele(ksiddomain_t *ksid)
+{
+	spa_strfree(ksid->kd_name);
+	umem_free(ksid, sizeof (ksiddomain_t));
+}
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
new file mode 100644
index 000000000..93acdcf8e
--- /dev/null
+++ b/lib/libzpool/taskq.c
@@ -0,0 +1,261 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+
+int taskq_now;
+taskq_t *system_taskq;
+
+typedef struct task {
+	struct task	*task_next;
+	struct task	*task_prev;
+	task_func_t	*task_func;
+	void		*task_arg;
+} task_t;
+
+#define	TASKQ_ACTIVE	0x00010000
+
+struct taskq {
+	kmutex_t	tq_lock;
+	krwlock_t	tq_threadlock;
+	kcondvar_t	tq_dispatch_cv;
+	kcondvar_t	tq_wait_cv;
+	thread_t	*tq_threadlist;
+	int		tq_flags;
+	int		tq_active;
+	int		tq_nthreads;
+	int		tq_nalloc;
+	int		tq_minalloc;
+	int		tq_maxalloc;
+	task_t		*tq_freelist;
+	task_t		tq_task;
+};
+
+static task_t *
+task_alloc(taskq_t *tq, int tqflags)
+{
+	task_t *t;
+
+	if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
+		tq->tq_freelist = t->task_next;
+	} else {
+		mutex_exit(&tq->tq_lock);
+		if (tq->tq_nalloc >= tq->tq_maxalloc) {
+			if (!(tqflags & KM_SLEEP)) {
+				mutex_enter(&tq->tq_lock);
+				return (NULL);
+			}
+			/*
+			 * We don't want to exceed tq_maxalloc, but we can't
+			 * wait for other tasks to complete (and thus free up
+			 * task structures) without risking deadlock with
+			 * the caller.  So, we just delay for one second
+			 * to throttle the allocation rate.
+			 */
+			delay(hz);
+		}
+		t = kmem_alloc(sizeof (task_t), tqflags);
+		mutex_enter(&tq->tq_lock);
+		if (t != NULL)
+			tq->tq_nalloc++;
+	}
+	return (t);
+}
+
+static void
+task_free(taskq_t *tq, task_t *t)
+{
+	if (tq->tq_nalloc <= tq->tq_minalloc) {
+		t->task_next = tq->tq_freelist;
+		tq->tq_freelist = t;
+	} else {
+		tq->tq_nalloc--;
+		mutex_exit(&tq->tq_lock);
+		kmem_free(t, sizeof (task_t));
+		mutex_enter(&tq->tq_lock);
+	}
+}
+
+taskqid_t
+taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
+{
+	task_t *t;
+
+	if (taskq_now) {
+		func(arg);
+		return (1);
+	}
+
+	mutex_enter(&tq->tq_lock);
+	ASSERT(tq->tq_flags & TASKQ_ACTIVE);
+	if ((t = task_alloc(tq, tqflags)) == NULL) {
+		mutex_exit(&tq->tq_lock);
+		return (0);
+	}
+	t->task_next = &tq->tq_task;
+	t->task_prev = tq->tq_task.task_prev;
+	t->task_next->task_prev = t;
+	t->task_prev->task_next = t;
+	t->task_func = func;
+	t->task_arg = arg;
+	cv_signal(&tq->tq_dispatch_cv);
+	mutex_exit(&tq->tq_lock);
+	return (1);
+}
+
+void
+taskq_wait(taskq_t *tq)
+{
+	mutex_enter(&tq->tq_lock);
+	while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+	mutex_exit(&tq->tq_lock);
+}
+
+static void *
+taskq_thread(void *arg)
+{
+	taskq_t *tq = arg;
+	task_t *t;
+
+	mutex_enter(&tq->tq_lock);
+	while (tq->tq_flags & TASKQ_ACTIVE) {
+		if ((t = tq->tq_task.task_next) == &tq->tq_task) {
+			if (--tq->tq_active == 0)
+				cv_broadcast(&tq->tq_wait_cv);
+			cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
+			tq->tq_active++;
+			continue;
+		}
+		t->task_prev->task_next = t->task_next;
+		t->task_next->task_prev = t->task_prev;
+		mutex_exit(&tq->tq_lock);
+
+		rw_enter(&tq->tq_threadlock, RW_READER);
+		t->task_func(t->task_arg);
+		rw_exit(&tq->tq_threadlock);
+
+		mutex_enter(&tq->tq_lock);
+		task_free(tq, t);
+	}
+	tq->tq_nthreads--;
+	cv_broadcast(&tq->tq_wait_cv);
+	mutex_exit(&tq->tq_lock);
+	return (NULL);
+}
+
+/*ARGSUSED*/
+taskq_t *
+taskq_create(const char *name, int nthreads, pri_t pri,
+	int minalloc, int maxalloc, uint_t flags)
+{
+	taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
+	int t;
+
+	rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
+	mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
+	tq->tq_flags = flags | TASKQ_ACTIVE;
+	tq->tq_active = nthreads;
+	tq->tq_nthreads = nthreads;
+	tq->tq_minalloc = minalloc;
+	tq->tq_maxalloc = maxalloc;
+	tq->tq_task.task_next = &tq->tq_task;
+	tq->tq_task.task_prev = &tq->tq_task;
+	tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP);
+
+	if (flags & TASKQ_PREPOPULATE) {
+		mutex_enter(&tq->tq_lock);
+		while (minalloc-- > 0)
+			task_free(tq, task_alloc(tq, KM_SLEEP));
+		mutex_exit(&tq->tq_lock);
+	}
+
+	for (t = 0; t < nthreads; t++)
+		(void) thr_create(0, 0, taskq_thread,
+		    tq, THR_BOUND, &tq->tq_threadlist[t]);
+
+	return (tq);
+}
+
+void
+taskq_destroy(taskq_t *tq)
+{
+	int t;
+	int nthreads = tq->tq_nthreads;
+
+	taskq_wait(tq);
+
+	mutex_enter(&tq->tq_lock);
+
+	tq->tq_flags &= ~TASKQ_ACTIVE;
+	cv_broadcast(&tq->tq_dispatch_cv);
+
+	while (tq->tq_nthreads != 0)
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+
+	tq->tq_minalloc = 0;
+	while (tq->tq_nalloc != 0) {
+		ASSERT(tq->tq_freelist != NULL);
+		task_free(tq, task_alloc(tq, KM_SLEEP));
+	}
+
+	mutex_exit(&tq->tq_lock);
+
+	for (t = 0; t < nthreads; t++)
+		(void) thr_join(tq->tq_threadlist[t], NULL, NULL);
+
+	kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t));
+
+	rw_destroy(&tq->tq_threadlock);
+	mutex_destroy(&tq->tq_lock);
+	cv_destroy(&tq->tq_dispatch_cv);
+	cv_destroy(&tq->tq_wait_cv);
+
+	kmem_free(tq, sizeof (taskq_t));
+}
+
+int
+taskq_member(taskq_t *tq, void *t)
+{
+	int i;
+
+	if (taskq_now)
+		return (1);
+
+	for (i = 0; i < tq->tq_nthreads; i++)
+		if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t)
+			return (1);
+
+	return (0);
+}
+
+void
+system_taskq_init(void)
+{
+	system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
+	    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
+}
diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c
new file mode 100644
index 000000000..781edb6e8
--- /dev/null
+++ b/lib/libzpool/util.c
@@ -0,0 +1,156 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <sys/zfs_context.h>
+#include <sys/avl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/spa.h>
+#include <sys/fs/zfs.h>
+#include <sys/refcount.h>
+
+/*
+ * Routines needed by more than one client of libzpool.
+ */
+
+void
+nicenum(uint64_t num, char *buf)
+{
+	uint64_t n = num;
+	int index = 0;
+	char u;
+
+	while (n >= 1024) {
+		n = (n + (1024 / 2)) / 1024; /* Round up or down */
+		index++;
+	}
+
+	u = " KMGTPE"[index];
+
+	if (index == 0) {
+		(void) sprintf(buf, "%llu", (u_longlong_t)n);
+	} else if (n < 10 && (num & (num - 1)) != 0) {
+		(void) sprintf(buf, "%.2f%c",
+		    (double)num / (1ULL << 10 * index), u);
+	} else if (n < 100 && (num & (num - 1)) != 0) {
+		(void) sprintf(buf, "%.1f%c",
+		    (double)num / (1ULL << 10 * index), u);
+	} else {
+		(void) sprintf(buf, "%llu%c", (u_longlong_t)n, u);
+	}
+}
+
+static void
+show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
+{
+	vdev_stat_t *vs;
+	vdev_stat_t v0 = { 0 };
+	uint64_t sec;
+	uint64_t is_log = 0;
+	nvlist_t **child;
+	uint_t c, children;
+	char used[6], avail[6];
+	char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
+	char *prefix = "";
+
+	if (indent == 0 && desc != NULL) {
+		(void) printf("                           "
+		    " capacity   operations   bandwidth  ---- errors ----\n");
+		(void) printf("description                "
+		    "used avail  read write  read write  read write cksum\n");
+	}
+
+	if (desc != NULL) {
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
+
+		if (is_log)
+			prefix = "log ";
+
+		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		    (uint64_t **)&vs, &c) != 0)
+			vs = &v0;
+
+		sec = MAX(1, vs->vs_timestamp / NANOSEC);
+
+		nicenum(vs->vs_alloc, used);
+		nicenum(vs->vs_space - vs->vs_alloc, avail);
+		nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops);
+		nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops);
+		nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes);
+		nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes);
+		nicenum(vs->vs_read_errors, rerr);
+		nicenum(vs->vs_write_errors, werr);
+		nicenum(vs->vs_checksum_errors, cerr);
+
+		(void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
+		    indent, "",
+		    prefix,
+		    indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12),
+		    desc,
+		    vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
+		    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
+		    rops, wops, rbytes, wbytes, rerr, werr, cerr);
+	}
+
+	if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0)
+		return;
+
+	for (c = 0; c < children; c++) {
+		nvlist_t *cnv = child[c];
+		char *cname, *tname;
+		uint64_t np;
+		if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
+		    nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
+			cname = "<unknown>";
+		tname = calloc(1, strlen(cname) + 2);
+		(void) strcpy(tname, cname);
+		if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
+			tname[strlen(tname)] = '0' + np;
+		show_vdev_stats(tname, ctype, cnv, indent + 2);
+		free(tname);
+	}
+}
+
+void
+show_pool_stats(spa_t *spa)
+{
+	nvlist_t *config, *nvroot;
+	char *name;
+
+	VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0);
+
+	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &name) == 0);
+
+	show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0);
+	show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0);
+	show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0);
+
+	nvlist_free(config);
+}
author	Brian Behlendorf <[email protected]>	2008-12-11 11:08:09 -0800
committer	Brian Behlendorf <[email protected]>	2008-12-11 11:08:09 -0800
commit	172bb4bd5e4afef721dd4d2972d8680d983f144b (patch)
tree	18ab1e97e5e409150066c529b5a981ecf600ef80 /lib
parent	9e8b1e836caa454586797f771a7ad1817ebae315 (diff)