12 files changed, 997 insertions, 450 deletions
diff --git a/lib/libefi/include/sys/uuid.h b/lib/libefi/include/sys/uuid.h
index 9ce872e34..eab4622a6 100644
--- a/lib/libefi/include/sys/uuid.h
+++ b/lib/libefi/include/sys/uuid.h
@@ -74,12 +74,8 @@ struct uuid {
 	uint8_t		node_addr[6];
 };
 
-#define	UUID_LEN	16
-
 #define	UUID_PRINTABLE_STRING_LENGTH 37
 
-typedef uchar_t		uuid_t[UUID_LEN];
-
 /*
  * Convert a uuid to/from little-endian format
  */
diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
index e682b840a..da71e3486 100644
--- a/lib/libefi/rdwr_efi.c
+++ b/lib/libefi/rdwr_efi.c
@@ -29,6 +29,7 @@
 #include <strings.h>
 #include <unistd.h>
 #include <uuid/uuid.h>
+#include <zlib.h>
 #include <libintl.h>
 #include <sys/types.h>
 #include <sys/dkio.h>
@@ -38,7 +39,9 @@
 #include <sys/dktp/fdisk.h>
 #include <sys/efi_partition.h>
 #include <sys/byteorder.h>
-#include <sys/ddi.h>
+#if defined(__linux__)
+#include <linux/fs.h>
+#endif
 
 static struct uuid_to_ptag {
 	struct uuid	uuid;
@@ -49,11 +52,11 @@ static struct uuid_to_ptag {
 	{ EFI_SWAP },
 	{ EFI_USR },
 	{ EFI_BACKUP },
-	{ 0 },			/* STAND is never used */
+	{ EFI_UNUSED },		/* STAND is never used */
 	{ EFI_VAR },
 	{ EFI_HOME },
 	{ EFI_ALTSCTR },
-	{ 0 },			/* CACHE (cachefs) is never used */
+	{ EFI_UNUSED },		/* CACHE (cachefs) is never used */
 	{ EFI_RESERVED },
 	{ EFI_SYSTEM },
 	{ EFI_LEGACY_MBR },
@@ -107,19 +110,142 @@ int efi_debug = 1;
 int efi_debug = 0;
 #endif
 
-extern unsigned int	efi_crc32(const unsigned char *, unsigned int);
-static int		efi_read(int, struct dk_gpt *);
+static int efi_read(int, struct dk_gpt *);
+
+/*
+ * Return a 32-bit CRC of the contents of the buffer.  Pre-and-post
+ * one's conditioning will be handled by crc32() internally.
+ */
+static uint32_t
+efi_crc32(const unsigned char *buf, unsigned int size)
+{
+	uint32_t crc = crc32(0, Z_NULL, 0);
+
+	crc = crc32(crc, buf, size);
+
+	return (crc);
+}
 
 static int
 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
 {
-	struct dk_minfo		disk_info;
+	int sector_size;
+	unsigned long long capacity_size;
+
+        if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
+                return (-1);
+
+	if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
+		return (-1);
+
+	*lbsize = (uint_t)sector_size;
+	*capacity = (diskaddr_t)(capacity_size / sector_size);
+
+	return (0);
+}
 
-	if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1)
-		return (errno);
-	*capacity = disk_info.dki_capacity;
-	*lbsize = disk_info.dki_lbsize;
+static int
+efi_get_info(int fd, struct dk_cinfo *dki_info)
+{
+#if defined(__linux__)
+	char *path;
+	char *dev_path;
+	int rval = 0;
+
+	memset(dki_info, 0, sizeof(*dki_info));
+
+	path = calloc(PATH_MAX, 1);
+	if (path == NULL)
+		goto error;
+
+	/*
+	 * The simplest way to get the partition number under linux is
+	 * to parse it out of the /dev/<disk><parition> block device name.
+	 * The kernel creates this using the partition number when it
+	 * populates /dev/ so it may be trusted.  The tricky bit here is
+	 * that the naming convention is based on the block device type.
+	 * So we need to take this in to account when parsing out the
+	 * partition information.  Another issue is that the libefi API
+	 * API only provides the open fd and not the file path.  To handle
+	 * this realpath(3) is used to resolve the block device name from
+	 * /proc/self/fd/<fd>.  Aside from the partition number we collect
+	 * some additional device info.
+	 */
+	(void) sprintf(path, "/proc/self/fd/%d", fd);
+	dev_path = realpath(path, NULL);
+	free(path);
+
+	if (dev_path == NULL)
+		goto error;
+
+	if ((strncmp(dev_path, "/dev/sd", 7) == 0)) {
+		strcpy(dki_info->dki_cname, "sd");
+		dki_info->dki_ctype = DKC_SCSI_CCS;
+		rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
+			      dki_info->dki_dname,
+			      &dki_info->dki_partition);
+	} else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) {
+		strcpy(dki_info->dki_cname, "hd");
+		dki_info->dki_ctype = DKC_DIRECT;
+		rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
+			      dki_info->dki_dname,
+			      &dki_info->dki_partition);
+	} else if ((strncmp(dev_path, "/dev/md", 7) == 0)) {
+		strcpy(dki_info->dki_cname, "pseudo");
+		dki_info->dki_ctype = DKC_MD;
+		rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+			      dki_info->dki_dname,
+			      &dki_info->dki_partition);
+	} else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) {
+		strcpy(dki_info->dki_cname, "pseudo");
+		dki_info->dki_ctype = DKC_VBD;
+		rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9-]p%hu",
+			      dki_info->dki_dname,
+			      &dki_info->dki_partition);
+	} else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) {
+		strcpy(dki_info->dki_cname, "pseudo");
+		dki_info->dki_ctype = DKC_PCMCIA_MEM;
+		rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+			      dki_info->dki_dname,
+			      &dki_info->dki_partition);
+	} else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) {
+		strcpy(dki_info->dki_cname, "pseudo");
+		dki_info->dki_ctype = DKC_VBD;
+		rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+			      dki_info->dki_dname,
+			      &dki_info->dki_partition);
+	} else {
+		strcpy(dki_info->dki_dname, "unknown");
+		strcpy(dki_info->dki_cname, "unknown");
+		dki_info->dki_ctype = DKC_UNKNOWN;
+	}
+
+	switch (rval) {
+	case 0:
+		errno = EINVAL;
+		goto error;
+	case 1:
+		dki_info->dki_partition = 0;
+	}
+
+	free(dev_path);
+#else
+	if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1)
+		goto error;
+#endif
 	return (0);
+error:
+	if (efi_debug)
+		(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
+
+	switch (errno) {
+	case EIO:
+		return (VT_EIO);
+	case EINVAL:
+		return (VT_EINVAL);
+	default:
+		return (VT_ERROR);
+	}
 }
 
 /*
@@ -135,12 +261,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
 int
 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
 {
-	diskaddr_t	capacity;
-	uint_t		lbsize;
+	diskaddr_t	capacity = 0;
+	uint_t		lbsize = 0;
 	uint_t		nblocks;
 	size_t		length;
 	struct dk_gpt	*vptr;
 	struct uuid	uuid;
+	struct dk_cinfo	dki_info;
 
 	if (read_disk_info(fd, &capacity, &lbsize) != 0) {
 		if (efi_debug)
@@ -148,6 +275,22 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
 			    "couldn't read disk information\n");
 		return (-1);
 	}
+#if defined(__linux__)
+	if (efi_get_info(fd, &dki_info) != 0) {
+		if (efi_debug)
+			(void) fprintf(stderr,
+			    "couldn't read disk information\n");
+		return (-1);
+	}
+
+	if (dki_info.dki_partition != 0)
+		return (-1);
+
+	if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
+	    (dki_info.dki_ctype == DKC_VBD) ||
+	    (dki_info.dki_ctype == DKC_UNKNOWN))
+		return (-1);
+#endif
 
 	nblocks = NBLOCKS(nparts, lbsize);
 	if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
@@ -243,14 +386,138 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
 {
 	void *data = dk_ioc->dki_data;
 	int error;
+#if defined(__linux__)
+	diskaddr_t capacity;
+	uint_t lbsize;
+
+	/*
+	 * When the IO is not being performed in kernel as an ioctl we need
+	 * to know the sector size so we can seek to the proper byte offset.
+	 */
+	if (read_disk_info(fd, &capacity, &lbsize) == -1) {
+		if (efi_debug)
+			fprintf(stderr,"unable to read disk info: %d",errno);
+
+		errno = EIO;
+		return -1;
+	}
+
+	switch (cmd) {
+	case DKIOCGETEFI:
+		if (lbsize == 0) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCGETEFI assuming "
+					       "LBA %d bytes\n", DEV_BSIZE);
+
+			lbsize = DEV_BSIZE;
+		}
+
+		error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
+		if (error == -1) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCGETEFI lseek "
+				               "error: %d\n", errno);
+			return error;
+		}
+
+		error = read(fd, data, dk_ioc->dki_length);
+		if (error == -1) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCGETEFI read "
+				               "error: %d\n", errno);
+			return error;
+		}
 
+		if (error != dk_ioc->dki_length) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCGETEFI short "
+					       "read of %d bytes\n", error);
+			errno = EIO;
+			return -1;
+		}
+		error = 0;
+		break;
+
+	case DKIOCSETEFI:
+		if (lbsize == 0) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCSETEFI unknown "
+					       "LBA size\n");
+			errno = EIO;
+			return -1;
+		}
+
+		error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
+		if (error == -1) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCSETEFI lseek "
+				               "error: %d\n", errno);
+			return error;
+		}
+
+		error = write(fd, data, dk_ioc->dki_length);
+		if (error == -1) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCSETEFI write "
+				               "error: %d\n", errno);
+			return error;
+		}
+
+		if (error != dk_ioc->dki_length) {
+			if (efi_debug)
+				(void) fprintf(stderr, "DKIOCSETEFI short "
+					       "write of %d bytes\n", error);
+			errno = EIO;
+			return -1;
+		}
+
+		/* Sync the new EFI table to disk */
+		error = fsync(fd);
+		if (error == -1)
+			return error;
+
+		/* Ensure any local disk cache is also flushed */
+		if (ioctl(fd, BLKFLSBUF, 0) == -1)
+			return error;
+
+		error = 0;
+		break;
+
+	default:
+		if (efi_debug)
+			(void) fprintf(stderr, "unsupported ioctl()\n");
+
+		errno = EIO;
+		return -1;
+	}
+#else
 	dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
 	error = ioctl(fd, cmd, (void *)dk_ioc);
 	dk_ioc->dki_data = data;
-
+#endif
 	return (error);
 }
 
+#if defined(__linux__)
+static int
+efi_rescan(int fd)
+{
+	int retry = 5;
+	int error;
+
+	/* Notify the kernel a devices partition table has been updated */
+	while ((error = ioctl(fd, BLKRRPART)) != 0) {
+		if (--retry == 0) {
+			(void) fprintf(stderr, "the kernel failed to rescan "
+				       "the partition table: %d\n", errno);
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+#endif
+
 static int
 check_label(int fd, dk_efi_t *dk_ioc)
 {
@@ -305,6 +572,8 @@ efi_read(int fd, struct dk_gpt *vtoc)
 	int			rval = 0;
 	int			md_flag = 0;
 	int			vdc_flag = 0;
+	diskaddr_t		capacity = 0;
+	uint_t			lbsize = 0;
 	struct dk_minfo		disk_info;
 	dk_efi_t		dk_ioc;
 	efi_gpt_t		*efi;
@@ -316,19 +585,9 @@ efi_read(int fd, struct dk_gpt *vtoc)
 	/*
 	 * get the partition number for this file descriptor.
 	 */
-	if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
-		if (efi_debug) {
-			(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
-		}
-		switch (errno) {
-		case EIO:
-			return (VT_EIO);
-		case EINVAL:
-			return (VT_EINVAL);
-		default:
-			return (VT_ERROR);
-		}
-	}
+	if ((rval = efi_get_info(fd, &dki_info)) != 0)
+		return rval;
+
 	if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
 	    (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
 		md_flag++;
@@ -342,14 +601,18 @@ efi_read(int fd, struct dk_gpt *vtoc)
 	}
 
 	/* get the LBA size */
-	if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) {
+	if (read_disk_info(fd, &capacity, &lbsize) == -1) {
 		if (efi_debug) {
 			(void) fprintf(stderr,
-			    "assuming LBA 512 bytes %d\n",
-			    errno);
+				       "unable to read disk info: %d",
+				       errno);
 		}
-		disk_info.dki_lbsize = DEV_BSIZE;
+		return (VT_EINVAL);
 	}
+
+	disk_info.dki_lbsize = lbsize;
+	disk_info.dki_capacity = capacity;
+
 	if (disk_info.dki_lbsize == 0) {
 		if (efi_debug) {
 			(void) fprintf(stderr,
@@ -374,9 +637,11 @@ efi_read(int fd, struct dk_gpt *vtoc)
 		}
 	}
 
-	if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL)
+	if (posix_memalign((void **)&dk_ioc.dki_data,
+		           disk_info.dki_lbsize, label_len))
 		return (VT_ERROR);
 
+	memset(dk_ioc.dki_data, 0, label_len);
 	dk_ioc.dki_length = disk_info.dki_lbsize;
 	user_length = vtoc->efi_nparts;
 	efi = dk_ioc.dki_data;
@@ -572,12 +837,14 @@ write_pmbr(int fd, struct dk_gpt *vtoc)
 	int		len;
 
 	len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
-	buf = calloc(len, 1);
+	if (posix_memalign((void **)&buf, len, len))
+		return (VT_ERROR);
 
 	/*
 	 * Preserve any boot code and disk signature if the first block is
 	 * already an MBR.
 	 */
+	memset(buf, 0, len);
 	dk_ioc.dki_lba = 0;
 	dk_ioc.dki_length = len;
 	/* LINTED -- always longlong aligned */
@@ -663,10 +930,9 @@ check_input(struct dk_gpt *vtoc)
 		if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
 		    (vtoc->efi_parts[i].p_size != 0)) {
 			if (efi_debug) {
-				(void) fprintf(stderr,
-"partition %d is \"unassigned\" but has a size of %llu",
-				    i,
-				    vtoc->efi_parts[i].p_size);
+				(void) fprintf(stderr, "partition %d is "
+				    "\"unassigned\" but has a size of %llu",
+				    i, vtoc->efi_parts[i].p_size);
 			}
 			return (VT_EINVAL);
 		}
@@ -679,9 +945,9 @@ check_input(struct dk_gpt *vtoc)
 		if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
 			if (resv_part != -1) {
 				if (efi_debug) {
-					(void) fprintf(stderr,
-"found duplicate reserved partition at %d\n",
-					    i);
+					(void) fprintf(stderr, "found "
+					    "duplicate reserved partition "
+					    "at %d\n", i);
 				}
 				return (VT_EINVAL);
 			}
@@ -732,8 +998,8 @@ check_input(struct dk_gpt *vtoc)
 				    (istart <= endsect)) {
 					if (efi_debug) {
 						(void) fprintf(stderr,
-"Partition %d overlaps partition %d.",
-						    i, j);
+						    "Partition %d overlaps "
+						    "partition %d.", i, j);
 					}
 					return (VT_EINVAL);
 				}
@@ -839,22 +1105,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
 	efi_gpe_t		*efi_parts;
 	int			i, j;
 	struct dk_cinfo		dki_info;
+	int			rval;
 	int			md_flag = 0;
 	int			nblocks;
 	diskaddr_t		lba_backup_gpt_hdr;
 
-	if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
-		if (efi_debug)
-			(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
-		switch (errno) {
-		case EIO:
-			return (VT_EIO);
-		case EINVAL:
-			return (VT_EINVAL);
-		default:
-			return (VT_ERROR);
-		}
-	}
+	if ((rval = efi_get_info(fd, &dki_info)) != 0)
+		return rval;
 
 	/* check if we are dealing wih a metadevice */
 	if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
@@ -892,9 +1149,11 @@ efi_write(int fd, struct dk_gpt *vtoc)
 	 * for backup GPT header.
 	 */
 	lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
-	if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL)
+	if (posix_memalign((void **)&dk_ioc.dki_data,
+		           vtoc->efi_lbasize, dk_ioc.dki_length))
 		return (VT_ERROR);
 
+	memset(dk_ioc.dki_data, 0, dk_ioc.dki_length);
 	efi = dk_ioc.dki_data;
 
 	/* stuff user's input into EFI struct */
@@ -941,6 +1200,10 @@ efi_write(int fd, struct dk_gpt *vtoc)
 			return (VT_EINVAL);
 		}
 
+		/* Zero's should be written for empty partitions */
+		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
+			continue;
+
 		efi_parts[i].efi_gpe_StartingLBA =
 		    LE_64(vtoc->efi_parts[i].p_start);
 		efi_parts[i].efi_gpe_EndingLBA =
@@ -1032,6 +1295,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
 	/* write the PMBR */
 	(void) write_pmbr(fd, vtoc);
 	free(dk_ioc.dki_data);
+
+#if defined(__linux__)
+	rval = efi_rescan(fd);
+	if (rval)
+		return (VT_ERROR);
+#endif
+
 	return (0);
 }
 
@@ -1049,6 +1319,7 @@ efi_free(struct dk_gpt *ptr)
 int
 efi_type(int fd)
 {
+#if 0
 	struct vtoc vtoc;
 	struct extvtoc extvtoc;
 
@@ -1062,6 +1333,9 @@ efi_type(int fd)
 		}
 	}
 	return (0);
+#else
+	return (ENOSYS);
+#endif
 }
 
 void
@@ -1175,7 +1449,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc)
 		return (-1);
 	}
 
-	for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) {
+	for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) {
 		(*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag;
 		(*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag;
 		(*vtoc)->efi_parts[i].p_start = 0;
diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h
index 197e2eefc..dcbd283ac 100644
--- a/lib/libzfs/include/libzfs.h
+++ b/lib/libzfs/include/libzfs.h
@@ -49,6 +49,26 @@ extern "C" {
 #define	ZPOOL_MAXPROPLEN	MAXPATHLEN
 
 /*
+ * Default device paths
+ */
+
+#if defined(__sun__) || defined(__sun)
+#define	DISK_ROOT	"/dev/dsk"
+#define	RDISK_ROOT	"/dev/rdsk"
+#define	UDISK_ROOT	RDISK_ROOT
+#define	FIRST_SLICE	"s0"
+#define	BACKUP_SLICE	"s2"
+#endif
+
+#ifdef __linux__
+#define	DISK_ROOT	"/dev"
+#define	RDISK_ROOT	DISK_ROOT
+#define	UDISK_ROOT	"/dev/disk"
+#define	FIRST_SLICE	"1"
+#define	BACKUP_SLICE	""
+#endif
+
+/*
  * libzfs errors
  */
 enum {
@@ -248,6 +268,7 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
     boolean_t *, boolean_t *);
 extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
     boolean_t *, boolean_t *, boolean_t *);
+extern int zpool_label_disk_wait(char *, int);
 extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
 
 /*
@@ -661,9 +682,6 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
 extern int zpool_read_label(int, nvlist_t **);
 extern int zpool_clear_label(int);
 
-/* is this zvol valid for use as a dump device? */
-extern int zvol_check_dump_config(char *);
-
 /*
  * Management interfaces for SMB ACL files
  */
diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h
index 3d001df07..2389b7823 100644
--- a/lib/libzfs/include/libzfs_impl.h
+++ b/lib/libzfs/include/libzfs_impl.h
@@ -191,6 +191,8 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
 
 int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
 
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
 boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
 
 int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
index 0bcfc0423..6f067d563 100644
--- a/lib/libzfs/libzfs_changelist.c
+++ b/lib/libzfs/libzfs_changelist.c
@@ -93,6 +93,7 @@ struct prop_changelist {
 int
 changelist_prefix(prop_changelist_t *clp)
 {
+#ifdef HAVE_ZPL
 	prop_changenode_t *cn;
 	int ret = 0;
 
@@ -141,6 +142,9 @@ changelist_prefix(prop_changelist_t *clp)
 		(void) changelist_postfix(clp);
 
 	return (ret);
+#else
+	return 0;
+#endif  /* HAVE_ZPL */
 }
 
 /*
@@ -155,6 +159,7 @@ changelist_prefix(prop_changelist_t *clp)
 int
 changelist_postfix(prop_changelist_t *clp)
 {
+#ifdef HAVE_ZPL
 	prop_changenode_t *cn;
 	char shareopts[ZFS_MAXPROPLEN];
 	int errors = 0;
@@ -255,6 +260,9 @@ changelist_postfix(prop_changelist_t *clp)
 	}
 
 	return (errors ? -1 : 0);
+#else
+	return 0;
+#endif  /* HAVE_ZPL */
 }
 
 /*
@@ -317,6 +325,7 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
 int
 changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
 {
+#ifdef HAVE_ZPL
 	prop_changenode_t *cn;
 	int ret = 0;
 
@@ -331,6 +340,9 @@ changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
 	}
 
 	return (ret);
+#else
+	return 0;
+#endif
 }
 
 /*
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index baf289b64..d876e5d1f 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -57,6 +57,7 @@
 #include "libzfs_impl.h"
 #include "zfs_deleg.h"
 
+static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
 static int userquota_propname_decode(const char *propname, boolean_t zoned,
     zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
 
@@ -994,6 +995,7 @@ badlabel:
 
 			/*FALLTHRU*/
 
+#ifdef HAVE_ZPL
 		case ZFS_PROP_SHARESMB:
 		case ZFS_PROP_SHARENFS:
 			/*
@@ -1104,6 +1106,7 @@ badlabel:
 			}
 
 			break;
+#endif /* HAVE_ZPL */
 		case ZFS_PROP_UTF8ONLY:
 			chosen_utf = (int)intval;
 			break;
@@ -2742,6 +2745,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
 			goto ancestorerr;
 		}
 
+#ifdef HAVE_ZPL
 		if (zfs_mount(h, NULL, 0) != 0) {
 			opname = dgettext(TEXT_DOMAIN, "mount");
 			goto ancestorerr;
@@ -2751,6 +2755,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
 			opname = dgettext(TEXT_DOMAIN, "share");
 			goto ancestorerr;
 		}
+#endif /* HAVE_ZPL */
 
 		zfs_close(h);
 	}
@@ -2887,6 +2892,18 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
 	/* create the dataset */
 	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
 
+	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(hdl, path);
+		if (ret) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully created, but device links "
+			    "were not created"));
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
+	}
+
 	zcmd_free_nvlists(&zc);
 
 	/* check for failure */
@@ -2949,6 +2966,9 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (ZFS_IS_VOLUME(zhp)) {
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
+
 		zc.zc_objset_type = DMU_OST_ZVOL;
 	} else {
 		zc.zc_objset_type = DMU_OST_ZFS;
@@ -2991,9 +3011,17 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
 		zfs_close(szhp);
 	}
 
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		(void) zvol_remove_link(zhp->zfs_hdl, name);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
 	dd->closezhp = B_TRUE;
-	if (!dd->gotone)
-		rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg);
+	rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg);
 	if (closezhp)
 		zfs_close(zhp);
 	return (rv);
@@ -3128,11 +3156,70 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
 			return (zfs_standard_error(zhp->zfs_hdl, errno,
 			    errbuf));
 		}
+	} else if (ZFS_IS_VOLUME(zhp)) {
+		ret = zvol_create_link(zhp->zfs_hdl, target);
 	}
 
 	return (ret);
 }
 
+typedef struct promote_data {
+	char cb_mountpoint[MAXPATHLEN];
+	const char *cb_target;
+	const char *cb_errbuf;
+	uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+	zfs_handle_t *szhp;
+	char snapname[MAXPATHLEN];
+	int rv = 0;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* Remove the device link if it's a zvol. */
+	if (ZFS_IS_VOLUME(zhp))
+		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
+
+	/* Check for conflicting names */
+	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
+	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
+	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+	if (szhp != NULL) {
+		zfs_close(szhp);
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot name '%s' from origin \n"
+		    "conflicts with '%s' from target"),
+		    zhp->zfs_name, snapname);
+		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
+	}
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+promote_snap_done_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
+		/* Create the device link if it's a zvol. */
+		if (ZFS_IS_VOLUME(zhp))
+			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
 /*
  * Promotes the given clone fs to be the clone parent.
  */
@@ -3142,7 +3229,10 @@ zfs_promote(zfs_handle_t *zhp)
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
 	char parent[MAXPATHLEN];
+	char *cp;
 	int ret;
+	zfs_handle_t *pzhp;
+	promote_data_t pd;
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -3160,7 +3250,29 @@ zfs_promote(zfs_handle_t *zhp)
 		    "not a cloned filesystem"));
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
+	cp = strchr(parent, '@');
+	*cp = '\0';
+
+	/* Walk the snapshots we will be moving */
+	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+	if (pzhp == NULL)
+		return (-1);
+	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+	zfs_close(pzhp);
+	pd.cb_target = zhp->zfs_name;
+	pd.cb_errbuf = errbuf;
+	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
+	if (pzhp == NULL)
+		return (-1);
+	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+	if (ret != 0) {
+		zfs_close(pzhp);
+		return (-1);
+	}
 
+	/* issue the ioctl */
 	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
 	    sizeof (zc.zc_value));
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -3169,9 +3281,16 @@ zfs_promote(zfs_handle_t *zhp)
 	if (ret != 0) {
 		int save_errno = errno;
 
+		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
+		zfs_close(pzhp);
+
 		switch (save_errno) {
 		case EEXIST:
-			/* There is a conflicting snapshot name. */
+			/*
+			 * There is a conflicting snapshot name.  We
+			 * should have caught this above, but they could
+			 * have renamed something in the mean time.
+			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "conflicting snapshot '%s' from parent '%s'"),
 			    zc.zc_string, parent);
@@ -3180,7 +3299,44 @@ zfs_promote(zfs_handle_t *zhp)
 		default:
 			return (zfs_standard_error(hdl, save_errno, errbuf));
 		}
+	} else {
+		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
 	}
+
+	zfs_close(pzhp);
+	return (ret);
+}
+
+struct createdata {
+	const char *cd_snapname;
+	int cd_ifexists;
+};
+
+static int
+zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
+{
+	struct createdata *cd = arg;
+	int ret;
+
+	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		char name[MAXPATHLEN];
+
+		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
+		(void) strlcat(name, "@", sizeof (name));
+		(void) strlcat(name, cd->cd_snapname, sizeof (name));
+		(void) zvol_create_link_common(zhp->zfs_hdl, name,
+		    cd->cd_ifexists);
+		/*
+		 * NB: this is simply a best-effort.  We don't want to
+		 * return an error, because then we wouldn't visit all
+		 * the volumes.
+		 */
+	}
+
+	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
+
+	zfs_close(zhp);
+
 	return (ret);
 }
 
@@ -3244,12 +3400,32 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
 	 * if it was recursive, the one that actually failed will be in
 	 * zc.zc_name.
 	 */
-	if (ret != 0) {
+	if (ret != 0)
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
-		(void) zfs_standard_error(hdl, errno, errbuf);
+
+	if (ret == 0 && recursive) {
+		struct createdata cd;
+
+		cd.cd_snapname = delim + 1;
+		cd.cd_ifexists = B_FALSE;
+		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
+	}
+	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		ret = zvol_create_link(zhp->zfs_hdl, path);
+		if (ret != 0) {
+			(void) zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN,
+			    "Volume successfully snapshotted, but device links "
+			    "were not created"));
+			zfs_close(zhp);
+			return (-1);
+		}
 	}
 
+	if (ret != 0)
+		(void) zfs_standard_error(hdl, errno, errbuf);
+
 	zfs_close(zhp);
 
 	return (ret);
@@ -3351,6 +3527,8 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 	 */
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+			return (-1);
 		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
 			return (-1);
 		old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
@@ -3388,6 +3566,10 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 	 */
 	if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
 	    (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
+		if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) {
+			zfs_close(zhp);
+			return (err);
+		}
 		if (restore_resv) {
 			new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 			if (old_volsize != new_volsize)
@@ -3536,6 +3718,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 	}
 
 	if (recursive) {
+		struct destroydata dd;
 
 		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
 		if (parentname == NULL) {
@@ -3550,6 +3733,15 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 			goto error;
 		}
 
+		dd.snapname = delim + 1;
+		dd.gotone = B_FALSE;
+		dd.closezhp = B_TRUE;
+
+		/* We remove any zvol links prior to renaming them */
+		ret = zfs_iter_filesystems(zhrp, zfs_check_snap_cb, &dd);
+		if (ret) {
+			goto error;
+		}
 	} else {
 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL)
 			return (-1);
@@ -3598,10 +3790,27 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 		 * On failure, we still want to remount any filesystems that
 		 * were previously mounted, so we don't alter the system state.
 		 */
-		if (!recursive)
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = delim + 1;
+			cd.cd_ifexists = B_TRUE;
+			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
 			(void) changelist_postfix(cl);
+		}
 	} else {
-		if (!recursive) {
+		if (recursive) {
+			struct createdata cd;
+
+			/* only create links for datasets that had existed */
+			cd.cd_snapname = strchr(target, '@') + 1;
+			cd.cd_ifexists = B_TRUE;
+			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+			    &cd);
+		} else {
 			changelist_rename(cl, zfs_get_name(zhp), target);
 			ret = changelist_postfix(cl);
 		}
@@ -3620,19 +3829,103 @@ error:
 	return (ret);
 }
 
-nvlist_t *
-zfs_get_user_props(zfs_handle_t *zhp)
+/*
+ * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
+ * and wait briefly for udev to create the /dev link.
+ */
+int
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
 {
-	return (zhp->zfs_user_props);
+	return (zvol_create_link_common(hdl, dataset, B_FALSE));
+}
+
+static int
+zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
+{
+	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	char path[MAXPATHLEN];
+	int error;
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	/*
+	 * Issue the appropriate ioctl.
+	 */
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case EEXIST:
+			/*
+			 * Silently ignore the case where the link already
+			 * exists.  This allows 'zfs volinit' to be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		case ENOENT:
+			/*
+			 * Dataset does not exist in the kernel.  If we
+			 * don't care (see zfs_rename), then ignore the
+			 * error quietly.
+			 */
+			if (ifexists) {
+				return (0);
+			}
+
+			/* FALLTHROUGH */
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset));
+		}
+	}
+
+	/*
+	 * Wait up to 10 seconds for udev to create the device.
+	 */
+	(void) snprintf(path, sizeof (path), "%s/%s", ZVOL_DIR, dataset);
+	error = zpool_label_disk_wait(path, 10000);
+	if (error)
+		(void) printf(gettext("%s may not be immediately "
+		    "available\n"), path);
+
+	return (0);
+}
+
+/*
+ * Remove a minor node for the given zvol and the associated /dev links.
+ */
+int
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+{
+	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+		switch (errno) {
+		case ENXIO:
+			/*
+			 * Silently ignore the case where the link no longer
+			 * exists, so that 'zfs volfini' can be run multiple
+			 * times without errors.
+			 */
+			return (0);
+
+		default:
+			return (zfs_standard_error_fmt(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot remove device "
+			    "links for '%s'"), dataset));
+		}
+	}
+
+	return (0);
 }
 
 nvlist_t *
-zfs_get_recvd_props(zfs_handle_t *zhp)
+zfs_get_user_props(zfs_handle_t *zhp)
 {
-	if (zhp->zfs_recvd_props == NULL)
-		if (get_recvd_props_ioctl(zhp) != 0)
-			return (NULL);
-	return (zhp->zfs_recvd_props);
+	return (zhp->zfs_user_props);
 }
 
 /*
@@ -3744,6 +4037,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received)
 	return (0);
 }
 
+#ifdef HAVE_ZPL
 int
 zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
     char *resource, void *export, void *sharetab,
@@ -3763,6 +4057,7 @@ zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
 	return (error);
 }
+#endif /* HAVE_ZPL */
 
 void
 zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 386ab002f..ee0064892 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -52,9 +52,11 @@
 #include <sys/vtoc.h>
 #include <sys/dktp/fdisk.h>
 #include <sys/efi_partition.h>
-#include <thread_pool.h>
 
 #include <sys/vdev_impl.h>
+#ifdef HAVE_LIBBLKID
+#include <blkid/blkid.h>
+#endif
 
 #include "libzfs.h"
 #include "libzfs_impl.h"
@@ -904,211 +906,76 @@ zpool_read_label(int fd, nvlist_t **config)
 	return (0);
 }
 
-typedef struct rdsk_node {
-	char *rn_name;
-	int rn_dfd;
-	libzfs_handle_t *rn_hdl;
-	nvlist_t *rn_config;
-	avl_tree_t *rn_avl;
-	avl_node_t rn_node;
-	boolean_t rn_nozpool;
-} rdsk_node_t;
-
+#ifdef HAVE_LIBBLKID
+/*
+ * Use libblkid to quickly search for zfs devices
+ */
 static int
-slice_cache_compare(const void *arg1, const void *arg2)
-{
-	const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
-	const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
-	char *nm1slice, *nm2slice;
-	int rv;
-
-	/*
-	 * slices zero and two are the most likely to provide results,
-	 * so put those first
-	 */
-	nm1slice = strstr(nm1, "s0");
-	nm2slice = strstr(nm2, "s0");
-	if (nm1slice && !nm2slice) {
-		return (-1);
-	}
-	if (!nm1slice && nm2slice) {
-		return (1);
-	}
-	nm1slice = strstr(nm1, "s2");
-	nm2slice = strstr(nm2, "s2");
-	if (nm1slice && !nm2slice) {
-		return (-1);
-	}
-	if (!nm1slice && nm2slice) {
-		return (1);
-	}
-
-	rv = strcmp(nm1, nm2);
-	if (rv == 0)
-		return (0);
-	return (rv > 0 ? 1 : -1);
-}
-
-static void
-check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
-    diskaddr_t size, uint_t blksz)
-{
-	rdsk_node_t tmpnode;
-	rdsk_node_t *node;
-	char sname[MAXNAMELEN];
-
-	tmpnode.rn_name = &sname[0];
-	(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
-	    diskname, partno);
-	/*
-	 * protect against division by zero for disk labels that
-	 * contain a bogus sector size
-	 */
-	if (blksz == 0)
-		blksz = DEV_BSIZE;
-	/* too small to contain a zpool? */
-	if ((size < (SPA_MINDEVSIZE / blksz)) &&
-	    (node = avl_find(r, &tmpnode, NULL)))
-		node->rn_nozpool = B_TRUE;
-}
-
-static void
-nozpool_all_slices(avl_tree_t *r, const char *sname)
-{
-	char diskname[MAXNAMELEN];
-	char *ptr;
-	int i;
-
-	(void) strncpy(diskname, sname, MAXNAMELEN);
-	if (((ptr = strrchr(diskname, 's')) == NULL) &&
-	    ((ptr = strrchr(diskname, 'p')) == NULL))
-		return;
-	ptr[0] = 's';
-	ptr[1] = '\0';
-	for (i = 0; i < NDKMAP; i++)
-		check_one_slice(r, diskname, i, 0, 1);
-	ptr[0] = 'p';
-	for (i = 0; i <= FD_NUMPART; i++)
-		check_one_slice(r, diskname, i, 0, 1);
-}
-
-static void
-check_slices(avl_tree_t *r, int fd, const char *sname)
+zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
 {
-	struct extvtoc vtoc;
-	struct dk_gpt *gpt;
-	char diskname[MAXNAMELEN];
-	char *ptr;
-	int i;
-
-	(void) strncpy(diskname, sname, MAXNAMELEN);
-	if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
-		return;
-	ptr[1] = '\0';
-
-	if (read_extvtoc(fd, &vtoc) >= 0) {
-		for (i = 0; i < NDKMAP; i++)
-			check_one_slice(r, diskname, i,
-			    vtoc.v_part[i].p_size, vtoc.v_sectorsz);
-	} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
-		/*
-		 * on x86 we'll still have leftover links that point
-		 * to slices s[9-15], so use NDKMAP instead
-		 */
-		for (i = 0; i < NDKMAP; i++)
-			check_one_slice(r, diskname, i,
-			    gpt->efi_parts[i].p_size, gpt->efi_lbasize);
-		/* nodes p[1-4] are never used with EFI labels */
-		ptr[0] = 'p';
-		for (i = 1; i <= FD_NUMPART; i++)
-			check_one_slice(r, diskname, i, 0, 1);
-		efi_free(gpt);
-	}
-}
-
-static void
-zpool_open_func(void *arg)
-{
-	rdsk_node_t *rn = arg;
-	struct stat64 statbuf;
+	blkid_cache cache;
+	blkid_dev_iterate iter;
+	blkid_dev dev;
+	const char *devname;
 	nvlist_t *config;
-	int fd;
+	int fd, err;
 
-	if (rn->rn_nozpool)
-		return;
-	if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
-		/* symlink to a device that's no longer there */
-		if (errno == ENOENT)
-			nozpool_all_slices(rn->rn_avl, rn->rn_name);
-		return;
-	}
-	/*
-	 * Ignore failed stats.  We only want regular
-	 * files, character devs and block devs.
-	 */
-	if (fstat64(fd, &statbuf) != 0 ||
-	    (!S_ISREG(statbuf.st_mode) &&
-	    !S_ISCHR(statbuf.st_mode) &&
-	    !S_ISBLK(statbuf.st_mode))) {
-		(void) close(fd);
-		return;
-	}
-	/* this file is too small to hold a zpool */
-	if (S_ISREG(statbuf.st_mode) &&
-	    statbuf.st_size < SPA_MINDEVSIZE) {
-		(void) close(fd);
-		return;
-	} else if (!S_ISREG(statbuf.st_mode)) {
-		/*
-		 * Try to read the disk label first so we don't have to
-		 * open a bunch of minor nodes that can't have a zpool.
-		 */
-		check_slices(rn->rn_avl, fd, rn->rn_name);
+	err = blkid_get_cache(&cache, NULL);
+	if (err != 0) {
+		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err);
+		goto err_blkid1;
 	}
 
-	if ((zpool_read_label(fd, &config)) != 0) {
-		(void) close(fd);
-		(void) no_memory(rn->rn_hdl);
-		return;
+	err = blkid_probe_all(cache);
+	if (err != 0) {
+		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err);
+		goto err_blkid2;
 	}
-	(void) close(fd);
 
+	iter = blkid_dev_iterate_begin(cache);
+	if (iter == NULL) {
+		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()"));
+		goto err_blkid2;
+	}
 
-	rn->rn_config = config;
-	if (config != NULL) {
-		assert(rn->rn_nozpool == B_FALSE);
+	err = blkid_dev_set_search(iter, "TYPE", "zfs");
+	if (err != 0) {
+		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err);
+		goto err_blkid3;
 	}
-}
 
-/*
- * Given a file descriptor, clear (zero) the label information.  This function
- * is currently only used in the appliance stack as part of the ZFS sysevent
- * module.
- */
-int
-zpool_clear_label(int fd)
-{
-	struct stat64 statbuf;
-	int l;
-	vdev_label_t *label;
-	uint64_t size;
+	while (blkid_dev_next(iter, &dev) == 0) {
+		devname = blkid_dev_devname(dev);
+		if ((fd = open64(devname, O_RDONLY)) < 0)
+			continue;
 
-	if (fstat64(fd, &statbuf) == -1)
-		return (0);
-	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+		err = zpool_read_label(fd, &config);
+		(void) close(fd);
 
-	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
-		return (-1);
+		if (err != 0) {
+			(void) no_memory(hdl);
+			goto err_blkid3;
+		}
 
-	for (l = 0; l < VDEV_LABELS; l++) {
-		if (pwrite64(fd, label, sizeof (vdev_label_t),
-		    label_offset(size, l)) != sizeof (vdev_label_t))
-			return (-1);
+		if (config != NULL) {
+			err = add_config(hdl, pools, devname, config);
+			if (err != 0)
+				goto err_blkid3;
+		}
 	}
 
-	free(label);
-	return (0);
+err_blkid3:
+	blkid_dev_iterate_end(iter);
+err_blkid2:
+	blkid_put_cache(cache);
+err_blkid1:
+	return err;
 }
+#endif /* HAVE_LIBBLKID */
 
 /*
  * Given a list of directories to search, find all pools stored on disk.  This
@@ -1126,18 +993,28 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 	char path[MAXPATHLEN];
 	char *end, **dir = iarg->path;
 	size_t pathleft;
-	nvlist_t *ret = NULL;
-	static char *default_dir = "/dev/dsk";
+	struct stat64 statbuf;
+	nvlist_t *ret = NULL, *config;
+	static char *default_dir = DISK_ROOT;
+	int fd;
 	pool_list_t pools = { 0 };
 	pool_entry_t *pe, *penext;
 	vdev_entry_t *ve, *venext;
 	config_entry_t *ce, *cenext;
 	name_entry_t *ne, *nenext;
-	avl_tree_t slice_cache;
-	rdsk_node_t *slice;
-	void *cookie;
+
+	verify(iarg->poolname == NULL || iarg->guid == 0);
 
 	if (dirs == 0) {
+#ifdef HAVE_LIBBLKID
+		/* Use libblkid to scan all device for their type */
+		if (zpool_find_import_blkid(hdl, &pools) == 0)
+			goto skip_scanning;
+
+		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+		    dgettext(TEXT_DOMAIN, "blkid failure falling back "
+		    "to manual probing"));
+#endif /* HAVE_LIBBLKID */
 		dirs = 1;
 		dir = &default_dir;
 	}
@@ -1148,7 +1025,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 	 * and toplevel GUID.
 	 */
 	for (i = 0; i < dirs; i++) {
-		tpool_t *t;
 		char *rdsk;
 		int dfd;
 
@@ -1182,8 +1058,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 			goto error;
 		}
 
-		avl_create(&slice_cache, slice_cache_compare,
-		    sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
 		/*
 		 * This is not MT-safe, but we have no MT consumers of libzfs
 		 */
@@ -1193,37 +1067,51 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
 				continue;
 
-			slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
-			slice->rn_name = zfs_strdup(hdl, name);
-			slice->rn_avl = &slice_cache;
-			slice->rn_dfd = dfd;
-			slice->rn_hdl = hdl;
-			slice->rn_nozpool = B_FALSE;
-			avl_add(&slice_cache, slice);
-		}
-		/*
-		 * create a thread pool to do all of this in parallel;
-		 * rn_nozpool is not protected, so this is racy in that
-		 * multiple tasks could decide that the same slice can
-		 * not hold a zpool, which is benign.  Also choose
-		 * double the number of processors; we hold a lot of
-		 * locks in the kernel, so going beyond this doesn't
-		 * buy us much.
-		 */
-		t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
-		    0, NULL);
-		for (slice = avl_first(&slice_cache); slice;
-		    (slice = avl_walk(&slice_cache, slice,
-		    AVL_AFTER)))
-			(void) tpool_dispatch(t, zpool_open_func, slice);
-		tpool_wait(t);
-		tpool_destroy(t);
-
-		cookie = NULL;
-		while ((slice = avl_destroy_nodes(&slice_cache,
-		    &cookie)) != NULL) {
-			if (slice->rn_config != NULL) {
-				nvlist_t *config = slice->rn_config;
+			/*
+			 * Skip checking devices with well known prefixes:
+			 * watchdog - A special close is required to avoid
+			 *            triggering it and resetting the system.
+			 * fuse     - Fuse control device.
+			 * ppp      - Generic PPP driver.
+			 * tty*     - Generic serial interface.
+			 * vcs*     - Virtual console memory.
+			 * parport* - Parallel port interface.
+			 * lp*      - Printer interface.
+			 * fd*      - Floppy interface.
+			 */
+			if ((strncmp(name, "watchdog", 8) == 0) ||
+			    (strncmp(name, "fuse", 4) == 0)     ||
+			    (strncmp(name, "ppp", 3) == 0)      ||
+			    (strncmp(name, "tty", 3) == 0)      ||
+			    (strncmp(name, "vcs", 3) == 0)      ||
+			    (strncmp(name, "parport", 7) == 0)  ||
+			    (strncmp(name, "lp", 2) == 0)       ||
+			    (strncmp(name, "fd", 2) == 0))
+				continue;
+
+			if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
+				continue;
+
+			/*
+			 * Ignore failed stats.  We only want regular
+			 * files and block devs.
+			 */
+			if (fstat64(fd, &statbuf) != 0 ||
+			    (!S_ISREG(statbuf.st_mode) &&
+			    !S_ISBLK(statbuf.st_mode))) {
+				(void) close(fd);
+				continue;
+			}
+
+			if ((zpool_read_label(fd, &config)) != 0) {
+				(void) close(fd);
+				(void) no_memory(hdl);
+				goto error;
+			}
+
+			(void) close(fd);
+
+			if (config != NULL) {
 				boolean_t matched = B_TRUE;
 
 				if (iarg->poolname != NULL) {
@@ -1247,19 +1135,19 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 					continue;
 				}
 				/* use the non-raw path for the config */
-				(void) strlcpy(end, slice->rn_name, pathleft);
+				(void) strlcpy(end, name, pathleft);
 				if (add_config(hdl, &pools, path, config) != 0)
 					goto error;
 			}
-			free(slice->rn_name);
-			free(slice);
 		}
-		avl_destroy(&slice_cache);
 
 		(void) closedir(dirp);
 		dirp = NULL;
 	}
 
+#ifdef HAVE_LIBBLKID
+skip_scanning:
+#endif
 	ret = get_configs(hdl, &pools, iarg->can_be_active);
 
 error:
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
index c31a12371..4b9038de8 100644
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -81,6 +81,7 @@
 #include <sys/systeminfo.h>
 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
 
+#ifdef HAVE_ZPL
 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
     zfs_share_proto_t);
@@ -1268,3 +1269,53 @@ out:
 
 	return (ret);
 }
+
+#else  /* HAVE_ZPL */
+
+int
+zfs_unshare_iscsi(zfs_handle_t *zhp)
+{
+	return 0;
+}
+
+int
+zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
+{
+	return 0;
+}
+
+void
+remove_mountpoint(zfs_handle_t *zhp) {
+	return;
+}
+
+boolean_t
+is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
+{
+	return B_FALSE;
+}
+
+boolean_t
+zfs_is_mounted(zfs_handle_t *zhp, char **where)
+{
+	return is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where);
+}
+
+boolean_t
+zfs_is_shared(zfs_handle_t *zhp)
+{
+	return B_FALSE;
+}
+
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+	return B_FALSE;
+}
+
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+	return B_FALSE;
+}
+#endif /* HAVE_ZPL */
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 42f303894..ec27b5756 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -32,6 +32,8 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <zone.h>
+#include <sys/stat.h>
 #include <sys/efi_partition.h>
 #include <sys/vtoc.h>
 #include <sys/zfs_ioctl.h>
@@ -44,10 +46,6 @@
 
 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
 
-#define	DISK_ROOT	"/dev/dsk"
-#define	RDISK_ROOT	"/dev/rdsk"
-#define	BACKUP_SLICE	"s2"
-
 typedef struct prop_flags {
 	int create:1;	/* Validate property on creation */
 	int import:1;	/* Validate property on import */
@@ -651,9 +649,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
 
 /*
  * Don't start the slice at the default block of 34; many storage
- * devices will use a stripe width of 128k, so start there instead.
+ * devices will use a stripe width of 128k, other vendors prefer a 1m
+ * alignment.  It is best to play it safe and ensure a 1m alignment
+ * give 512b blocks.  When the block size is larger by a power of 2
+ * we will still be 1m aligned.
  */
-#define	NEW_START_BLOCK	256
+#define	NEW_START_BLOCK	2048
 
 /*
  * Validate the given pool name, optionally putting an extended error message in
@@ -948,10 +949,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 			 * This can happen if the user has specified the same
 			 * device multiple times.  We can't reliably detect this
 			 * until we try to add it and see we already have a
-			 * label.
+			 * label.  This can also happen under if the device is
+			 * part of an active md or lvm device.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "one or more vdevs refer to the same device"));
+			    "one or more vdevs refer to the same device, or one of\n"
+			    "the devices is part of an active md or lvm device"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case EOVERFLOW:
@@ -1928,7 +1931,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
 	} else if (zpool_vdev_is_interior(path)) {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
 	} else if (path[0] != '/') {
-		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
+		(void) snprintf(buf, sizeof (buf), "%s/%s", DISK_ROOT, path);
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
 	} else {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
@@ -2101,22 +2104,14 @@ zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
  * the disk to use the new unallocated space.
  */
 static int
-zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
 {
-	char path[MAXPATHLEN];
 	char errbuf[1024];
 	int fd, error;
-	int (*_efi_use_whole_disk)(int);
-
-	if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
-	    "efi_use_whole_disk")) == NULL)
-		return (-1);
 
-	(void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
-
-	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-		    "relabel '%s': unable to open device"), name);
+		    "relabel '%s': unable to open device"), path);
 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
 	}
 
@@ -2125,11 +2120,11 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
 	 * does not have any unallocated space left. If so, we simply
 	 * ignore that error and continue on.
 	 */
-	error = _efi_use_whole_disk(fd);
+	error = efi_use_whole_disk(fd);
 	(void) close(fd);
 	if (error && error != VT_ENOSPC) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-		    "relabel '%s': unable to read disk capacity"), name);
+		    "relabel '%s': unable to read disk capacity"), path);
 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
 	}
 	return (0);
@@ -3071,7 +3066,7 @@ char *
 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
     boolean_t verbose)
 {
-	char *path, *devid;
+	char *path, *devid, *type;
 	uint64_t value;
 	char buf[64];
 	vdev_stat_t *vs;
@@ -3085,7 +3080,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
 		    (u_longlong_t)value);
 		path = buf;
 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
-
 		/*
 		 * If the device is dead (faulted, offline, etc) then don't
 		 * bother opening it.  Otherwise we may be forcing the user to
@@ -3124,9 +3118,19 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
 				devid_str_free(newdevid);
 		}
 
-		if (strncmp(path, "/dev/dsk/", 9) == 0)
-			path += 9;
+		/*
+		 * For a block device only use the name.
+		 */
+		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+		if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+			path = strrchr(path, '/');
+			path++;
+		}
 
+#if defined(__sun__) || defined(__sun)
+		/*
+		 * The following code strips the slice from the device path.
+		 */
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    &value) == 0 && value) {
 			int pathlen = strlen(path);
@@ -3148,6 +3152,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
 			}
 			return (tmp);
 		}
+#endif
 	} else {
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
 
@@ -3629,7 +3634,7 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb)
 
 	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
 	    strrchr(path, '/'));
-	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
+	if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
 		struct dk_gpt *vtoc;
 
 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
@@ -3675,6 +3680,54 @@ find_start_block(nvlist_t *config)
 	return (MAXOFFSET_T);
 }
 
+int
+zpool_label_disk_wait(char *path, int timeout)
+{
+	struct stat64 statbuf;
+	int i;
+
+	/*
+	 * Wait timeout miliseconds for a newly created device to be available
+	 * from the given path.  There is a small window when a /dev/ device
+	 * will exist and the udev link will not, so we must wait for the
+	 * symlink.  Depending on the udev rules this may take a few seconds.
+	 */
+	for (i = 0; i < timeout; i++) {
+		usleep(1000);
+
+		errno = 0;
+		if ((stat64(path, &statbuf) == 0) && (errno == 0))
+			return (0);
+	}
+
+	return (ENOENT);
+}
+
+int
+zpool_label_disk_check(char *path)
+{
+	struct dk_gpt *vtoc;
+	int fd, err;
+
+	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
+		return errno;
+
+	if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+		(void) close(fd);
+		return err;
+	}
+
+	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+		efi_free(vtoc);
+		(void) close(fd);
+		return EIDRM;
+	}
+
+	efi_free(vtoc);
+	(void) close(fd);
+	return 0;
+}
+
 /*
  * Label an individual disk.  The name provided is the short name,
  * stripped of any leading /dev path.
@@ -3684,7 +3737,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
 {
 	char path[MAXPATHLEN];
 	struct dk_gpt *vtoc;
-	int fd;
+	int rval, fd;
 	size_t resv = EFI_MIN_RESV_SIZE;
 	uint64_t slice_size;
 	diskaddr_t start_block;
@@ -3720,13 +3773,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
 	    BACKUP_SLICE);
 
-	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
 		/*
 		 * This shouldn't happen.  We've long since verified that this
 		 * is a valid device.
 		 */
-		zfs_error_aux(hdl,
-		    dgettext(TEXT_DOMAIN, "unable to open device"));
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "unable to open device '%s': %d"), path, errno);
 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
 	}
 
@@ -3769,7 +3822,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
 	vtoc->efi_parts[8].p_size = resv;
 	vtoc->efi_parts[8].p_tag = V_RESERVED;
 
-	if (efi_write(fd, vtoc) != 0) {
+	if ((rval = efi_write(fd, vtoc)) != 0) {
 		/*
 		 * Some block drivers (like pcata) may not support EFI
 		 * GPT labels.  Print out a helpful error message dir-
@@ -3779,123 +3832,34 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
 		(void) close(fd);
 		efi_free(vtoc);
 
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "try using fdisk(1M) and then provide a specific slice"));
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
+		    "parted(8) and then provide a specific slice: %d"), rval);
 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
 	}
 
 	(void) close(fd);
 	efi_free(vtoc);
-	return (0);
-}
-
-static boolean_t
-supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
-{
-	char *type;
-	nvlist_t **child;
-	uint_t children, c;
-
-	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
-	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
-	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
-	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
-	    strcmp(type, VDEV_TYPE_HOLE) == 0 ||
-	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "vdev type '%s' is not supported"), type);
-		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
-		return (B_FALSE);
-	}
-	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) == 0) {
-		for (c = 0; c < children; c++) {
-			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
-				return (B_FALSE);
-		}
-	}
-	return (B_TRUE);
-}
-
-/*
- * check if this zvol is allowable for use as a dump device; zero if
- * it is, > 0 if it isn't, < 0 if it isn't a zvol
- */
-int
-zvol_check_dump_config(char *arg)
-{
-	zpool_handle_t *zhp = NULL;
-	nvlist_t *config, *nvroot;
-	char *p, *volname;
-	nvlist_t **top;
-	uint_t toplevels;
-	libzfs_handle_t *hdl;
-	char errbuf[1024];
-	char poolname[ZPOOL_MAXNAMELEN];
-	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
-	int ret = 1;
-
-	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
-		return (-1);
-	}
-
-	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "dump is not supported on device '%s'"), arg);
 
-	if ((hdl = libzfs_init()) == NULL)
-		return (1);
-	libzfs_print_on_error(hdl, B_TRUE);
-
-	volname = arg + pathlen;
-
-	/* check the configuration of the pool */
-	if ((p = strchr(volname, '/')) == NULL) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "malformed dataset name"));
-		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
-		return (1);
-	} else if (p - volname >= ZFS_MAXNAMELEN) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "dataset name is too long"));
-		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
-		return (1);
-	} else {
-		(void) strncpy(poolname, volname, p - volname);
-		poolname[p - volname] = '\0';
-	}
-
-	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "could not open pool '%s'"), poolname);
-		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
-		goto out;
-	}
-	config = zpool_get_config(zhp, NULL);
-	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-	    &nvroot) != 0) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "could not obtain vdev configuration for  '%s'"), poolname);
-		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
-		goto out;
-	}
-
-	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
-	    &top, &toplevels) == 0);
-	if (toplevels != 1) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "'%s' has multiple top level vdevs"), poolname);
-		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
-		goto out;
+	/* Wait for the first expected slice to appear. */
+	(void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
+	    isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
+	rval = zpool_label_disk_wait(path, 3000);
+	if (rval) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
+		    "detect device partitions on '%s': %d"), path, rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
 	}
 
-	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
-		goto out;
+	/* We can't be to paranoid.  Read the label back and verify it. */
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+	rval = zpool_label_disk_check(path);
+	if (rval) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
+		    "EFI label on '%s' is damaged.  Ensure\nthis device "
+		    "is not in in use, and is functioning properly: %d"),
+		    path, rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
 	}
-	ret = 0;
 
-out:
-	if (zhp)
-		zpool_close(zhp);
-	libzfs_fini(hdl);
-	return (ret);
+	return 0;
 }
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 87ffd124f..40d1d2e53 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -2608,6 +2608,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 				return (-1);
 			}
 		}
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
+		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
+			zfs_close(zhp);
+			zcmd_free_nvlists(&zc);
+			return (-1);
+		}
 		zfs_close(zhp);
 	} else {
 		/*
@@ -2813,6 +2819,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		if (h != NULL) {
 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
 				*cp = '@';
+				err = zvol_create_link(hdl, h->zfs_name);
+				if (err == 0 && ioctl_err == 0)
+					err = zvol_create_link(hdl,
+					    zc.zc_value);
 			} else if (newfs || stream_avl) {
 				/*
 				 * Track the first/top of hierarchy fs,
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index cb7d87cb2..71f81831b 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -36,6 +36,7 @@
 #include <unistd.h>
 #include <ctype.h>
 #include <math.h>
+#include <sys/stat.h>
 #include <sys/mnttab.h>
 #include <sys/mntent.h>
 #include <sys/types.h>
@@ -648,7 +649,9 @@ libzfs_fini(libzfs_handle_t *hdl)
 #endif
 	if (hdl->libzfs_sharetab)
 		(void) fclose(hdl->libzfs_sharetab);
+#ifdef HAVE_ZPL
 	zfs_uninit_libshare(hdl);
+#endif
 	if (hdl->libzfs_log_str)
 		(void) free(hdl->libzfs_log_str);
 	zpool_free_handles(hdl);
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index 494e544ea..6f06f4001 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -35,6 +35,8 @@
 #include <sys/processor.h>
 #include <sys/zfs_context.h>
 #include <sys/utsname.h>
+#include <sys/time.h>
+#include <sys/mount.h> /* for BLKGETSIZE64 */
 #include <sys/systeminfo.h>
 
 /*
@@ -533,7 +535,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 	 * for its size.  So -- gag -- we open the block device to get
 	 * its size, and remember it for subsequent VOP_GETATTR().
 	 */
+#if defined(__sun__) || defined(__sun)
 	if (strncmp(path, "/dev/", 5) == 0) {
+#else
+	if (0) {
+#endif
 		char *dsk;
 		fd = open64(path, O_RDONLY);
 		if (fd == -1) {
@@ -562,6 +568,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 		}
 	}
 
+	if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
+#ifdef __linux__
+		flags |= O_DIRECT;
+#endif
+		/* We shouldn't be writing to block devices in userspace */
+		VERIFY(!(flags & FWRITE));
+	}
+
 	if (flags & FCREAT)
 		old_umask = umask(0);
 
@@ -584,6 +598,16 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 		return (err);
 	}
 
+#ifdef __linux__
+	/* In Linux, use an ioctl to get the size of a block device. */
+	if (S_ISBLK(st.st_mode)) {
+		if (ioctl(fd, BLKGETSIZE64, &st.st_size) != 0) {
+			err = errno;
+			close(fd);
+			return (err);
+		}
+	}
+#endif
 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 
 	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
@@ -637,6 +661,16 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
 		}
 	}
 
+#ifdef __linux__
+	if (rc == -1 && errno == EINVAL) {
+		/*
+		 * Under Linux, this most likely means an alignment issue
+		 * (memory or disk) due to O_DIRECT, so we abort() in order to
+		 * catch the offender.
+		 */
+		 abort();
+	}
+#endif
 	if (rc == -1)
 		return (errno);