aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zed/agents/zfs_mod.c10
-rw-r--r--include/libzutil.h13
-rw-r--r--lib/libzutil/Makefile.am9
-rw-r--r--lib/libzutil/os/linux/zutil_device_path_os.c493
-rw-r--r--lib/libzutil/os/linux/zutil_import_os.c856
-rw-r--r--lib/libzutil/zutil_device_path.c451
-rw-r--r--lib/libzutil/zutil_import.c937
-rw-r--r--lib/libzutil/zutil_import.h76
8 files changed, 1508 insertions, 1337 deletions
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index 959ed24cd..39ddd8140 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -191,8 +191,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
char rawpath[PATH_MAX], fullpath[PATH_MAX];
char devpath[PATH_MAX];
int ret;
- int is_dm = 0;
- int is_sd = 0;
+ boolean_t is_dm = B_FALSE;
+ boolean_t is_sd = B_FALSE;
uint_t c;
vdev_stat_t *vs;
@@ -220,8 +220,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
is_dm = zfs_dev_is_dm(path);
zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
- " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
- physpath ? physpath : "NULL", wholedisk, is_dm,
+ " wholedisk %d, %s dm (guid %llu)", zpool_get_name(zhp), path,
+ physpath ? physpath : "NULL", wholedisk, is_dm ? "is" : "not",
(long long unsigned int)guid);
/*
@@ -266,7 +266,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* testing)
*/
if (physpath != NULL && strcmp("scsidebug", physpath) == 0)
- is_sd = 1;
+ is_sd = B_TRUE;
/*
* If the pool doesn't have the autoreplace property set, then use
diff --git a/include/libzutil.h b/include/libzutil.h
index 7c483f0d9..4a8df0132 100644
--- a/include/libzutil.h
+++ b/include/libzutil.h
@@ -79,15 +79,10 @@ extern const char * const * zpool_default_search_paths(size_t *count);
extern int zpool_read_label(int, nvlist_t **, int *);
extern int zpool_label_disk_wait(const char *, int);
-#ifdef HAVE_LIBUDEV
struct udev_device;
extern int zfs_device_get_devid(struct udev_device *, char *, size_t);
extern int zfs_device_get_physical(struct udev_device *, char *, size_t);
-#else
-#define zfs_device_get_devid(dev, bufptr, buflen) (ENODATA)
-#define zfs_device_get_physical(dev, bufptr, buflen) (ENODATA)
-#endif
extern void update_vdev_config_dev_strs(nvlist_t *);
@@ -106,16 +101,12 @@ extern char *zfs_strip_partition_path(char *);
extern int zfs_strcmp_pathname(const char *, const char *, int);
-extern int zfs_dev_is_dm(const char *);
-extern int zfs_dev_is_whole_disk(const char *);
+extern boolean_t zfs_dev_is_dm(const char *);
+extern boolean_t zfs_dev_is_whole_disk(const char *);
extern char *zfs_get_underlying_path(const char *);
extern char *zfs_get_enclosure_sysfs_path(const char *);
-#ifdef HAVE_LIBUDEV
extern boolean_t is_mpath_whole_disk(const char *);
-#else
-#define is_mpath_whole_disk(path) (B_FALSE)
-#endif
extern boolean_t zfs_isnumber(const char *);
diff --git a/lib/libzutil/Makefile.am b/lib/libzutil/Makefile.am
index 62b0114ed..8b53c374e 100644
--- a/lib/libzutil/Makefile.am
+++ b/lib/libzutil/Makefile.am
@@ -3,14 +3,23 @@ include $(top_srcdir)/config/Rules.am
# Suppress unused but set variable warnings often due to ASSERTs
AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
+DEFAULT_INCLUDES += -I.
+
noinst_LTLIBRARIES = libzutil.la
USER_C = \
zutil_device_path.c \
zutil_import.c \
+ zutil_import.h \
zutil_nicenum.c \
zutil_pool.c
+if BUILD_LINUX
+USER_C += \
+ os/linux/zutil_device_path_os.c \
+ os/linux/zutil_import_os.c
+endif
+
nodist_libzutil_la_SOURCES = $(USER_C)
libzutil_la_LIBADD = \
diff --git a/lib/libzutil/os/linux/zutil_device_path_os.c b/lib/libzutil/os/linux/zutil_device_path_os.c
new file mode 100644
index 000000000..8ca4f8570
--- /dev/null
+++ b/lib/libzutil/os/linux/zutil_device_path_os.c
@@ -0,0 +1,493 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/efi_partition.h>
+
+#ifdef HAVE_LIBUDEV
+#include <libudev.h>
+#endif
+
+#include <libzutil.h>
+
+/*
+ * Append partition suffix to an otherwise fully qualified device path.
+ * This is used to generate the name the full path as its stored in
+ * ZPOOL_CONFIG_PATH for whole disk devices. On success the new length
+ * of 'path' will be returned on error a negative value is returned.
+ */
+int
+zfs_append_partition(char *path, size_t max_len)
+{
+ int len = strlen(path);
+
+ if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
+ (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
+ if (len + 6 >= max_len)
+ return (-1);
+
+ (void) strcat(path, "-part1");
+ len += 6;
+ } else {
+ if (len + 2 >= max_len)
+ return (-1);
+
+ if (isdigit(path[len-1])) {
+ (void) strcat(path, "p1");
+ len += 2;
+ } else {
+ (void) strcat(path, "1");
+ len += 1;
+ }
+ }
+
+ return (len);
+}
+
+/*
+ * Allocate and return the underlying device name for a device mapper device.
+ * If a device mapper device maps to multiple devices, return the first device.
+ *
+ * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
+ * DM device (like /dev/disk/by-vdev/A0) are also allowed.
+ *
+ * Returns device name, or NULL on error or no match. If dm_name is not a DM
+ * device then return NULL.
+ *
+ * NOTE: The returned name string must be *freed*.
+ */
+static char *
+dm_get_underlying_path(const char *dm_name)
+{
+ DIR *dp = NULL;
+ struct dirent *ep;
+ char *realp;
+ char *tmp = NULL;
+ char *path = NULL;
+ char *dev_str;
+ int size;
+
+ if (dm_name == NULL)
+ return (NULL);
+
+ /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
+ realp = realpath(dm_name, NULL);
+ if (realp == NULL)
+ return (NULL);
+
+ /*
+ * If they preface 'dev' with a path (like "/dev") then strip it off.
+ * We just want the 'dm-N' part.
+ */
+ tmp = strrchr(realp, '/');
+ if (tmp != NULL)
+ dev_str = tmp + 1; /* +1 since we want the chr after '/' */
+ else
+ dev_str = tmp;
+
+ size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
+ if (size == -1 || !tmp)
+ goto end;
+
+ dp = opendir(tmp);
+ if (dp == NULL)
+ goto end;
+
+ /* Return first sd* entry in /sys/block/dm-N/slaves/ */
+ while ((ep = readdir(dp))) {
+ if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
+ size = asprintf(&path, "/dev/%s", ep->d_name);
+ break;
+ }
+ }
+
+end:
+ if (dp != NULL)
+ closedir(dp);
+ free(tmp);
+ free(realp);
+ return (path);
+}
+
+/*
+ * Return B_TRUE if device is a device mapper or multipath device.
+ * Return B_FALSE if not.
+ */
+boolean_t
+zfs_dev_is_dm(const char *dev_name)
+{
+
+ char *tmp;
+ tmp = dm_get_underlying_path(dev_name);
+ if (tmp == NULL)
+ return (B_FALSE);
+
+ free(tmp);
+ return (B_TRUE);
+}
+
+/*
+ * By "whole disk" we mean an entire physical disk (something we can
+ * label, toggle the write cache on, etc.) as opposed to the full
+ * capacity of a pseudo-device such as lofi or did. We act as if we
+ * are labeling the disk, which should be a pretty good test of whether
+ * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
+ * it isn't.
+ */
+boolean_t
+zfs_dev_is_whole_disk(const char *dev_name)
+{
+ struct dk_gpt *label;
+ int fd;
+
+ if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
+ return (B_FALSE);
+
+ if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+
+ efi_free(label);
+ (void) close(fd);
+
+ return (B_TRUE);
+}
+
+/*
+ * Lookup the underlying device for a device name
+ *
+ * Often you'll have a symlink to a device, a partition device,
+ * or a multipath device, and want to look up the underlying device.
+ * This function returns the underlying device name. If the device
+ * name is already the underlying device, then just return the same
+ * name. If the device is a DM device with multiple underlying devices
+ * then return the first one.
+ *
+ * For example:
+ *
+ * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
+ * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
+ * returns: /dev/sda
+ *
+ * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
+ * dev_name: /dev/mapper/mpatha
+ * returns: /dev/sda (first device)
+ *
+ * 3. /dev/sda (already the underlying device)
+ * dev_name: /dev/sda
+ * returns: /dev/sda
+ *
+ * 4. /dev/dm-3 (mapped to /dev/sda)
+ * dev_name: /dev/dm-3
+ * returns: /dev/sda
+ *
+ * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
+ * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
+ * returns: /dev/sdb
+ *
+ * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
+ * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
+ * returns: /dev/sda
+ *
+ * Returns underlying device name, or NULL on error or no match.
+ *
+ * NOTE: The returned name string must be *freed*.
+ */
+char *
+zfs_get_underlying_path(const char *dev_name)
+{
+ char *name = NULL;
+ char *tmp;
+
+ if (dev_name == NULL)
+ return (NULL);
+
+ tmp = dm_get_underlying_path(dev_name);
+
+ /* dev_name not a DM device, so just un-symlinkize it */
+ if (tmp == NULL)
+ tmp = realpath(dev_name, NULL);
+
+ if (tmp != NULL) {
+ name = zfs_strip_partition_path(tmp);
+ free(tmp);
+ }
+
+ return (name);
+}
+
+/*
+ * Given a dev name like "sda", return the full enclosure sysfs path to
+ * the disk. You can also pass in the name with "/dev" prepended
+ * to it (like /dev/sda).
+ *
+ * For example, disk "sda" in enclosure slot 1:
+ * dev: "sda"
+ * returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
+ *
+ * 'dev' must be a non-devicemapper device.
+ *
+ * Returned string must be freed.
+ */
+char *
+zfs_get_enclosure_sysfs_path(const char *dev_name)
+{
+ DIR *dp = NULL;
+ struct dirent *ep;
+ char buf[MAXPATHLEN];
+ char *tmp1 = NULL;
+ char *tmp2 = NULL;
+ char *tmp3 = NULL;
+ char *path = NULL;
+ size_t size;
+ int tmpsize;
+
+ if (dev_name == NULL)
+ return (NULL);
+
+ /* If they preface 'dev' with a path (like "/dev") then strip it off */
+ tmp1 = strrchr(dev_name, '/');
+ if (tmp1 != NULL)
+ dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
+
+ tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
+ if (tmpsize == -1 || tmp1 == NULL) {
+ tmp1 = NULL;
+ goto end;
+ }
+
+ dp = opendir(tmp1);
+ if (dp == NULL) {
+ tmp1 = NULL; /* To make free() at the end a NOP */
+ goto end;
+ }
+
+ /*
+ * Look though all sysfs entries in /sys/block/<dev>/device for
+ * the enclosure symlink.
+ */
+ while ((ep = readdir(dp))) {
+ /* Ignore everything that's not our enclosure_device link */
+ if (strstr(ep->d_name, "enclosure_device") == NULL)
+ continue;
+
+ if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
+ tmp2 == NULL)
+ break;
+
+ size = readlink(tmp2, buf, sizeof (buf));
+
+ /* Did readlink fail or crop the link name? */
+ if (size == -1 || size >= sizeof (buf)) {
+ free(tmp2);
+ tmp2 = NULL; /* To make free() at the end a NOP */
+ break;
+ }
+
+ /*
+ * We got a valid link. readlink() doesn't terminate strings
+ * so we have to do it.
+ */
+ buf[size] = '\0';
+
+ /*
+ * Our link will look like:
+ *
+ * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
+ *
+ * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
+ */
+ tmp3 = strstr(buf, "enclosure");
+ if (tmp3 == NULL)
+ break;
+
+ if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
+ /* If asprintf() fails, 'path' is undefined */
+ path = NULL;
+ break;
+ }
+
+ if (path == NULL)
+ break;
+ }
+
+end:
+ free(tmp2);
+ free(tmp1);
+
+ if (dp != NULL)
+ closedir(dp);
+
+ return (path);
+}
+
+/*
+ * Remove partition suffix from a vdev path. Partition suffixes may take three
+ * forms: "-partX", "pX", or "X", where X is a string of digits. The second
+ * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
+ * third case only occurs when preceded by a string matching the regular
+ * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
+ *
+ * caller must free the returned string
+ */
+char *
+zfs_strip_partition(char *path)
+{
+ char *tmp = strdup(path);
+ char *part = NULL, *d = NULL;
+ if (!tmp)
+ return (NULL);
+
+ if ((part = strstr(tmp, "-part")) && part != tmp) {
+ d = part + 5;
+ } else if ((part = strrchr(tmp, 'p')) &&
+ part > tmp + 1 && isdigit(*(part-1))) {
+ d = part + 1;
+ } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
+ tmp[1] == 'd') {
+ for (d = &tmp[2]; isalpha(*d); part = ++d) { }
+ } else if (strncmp("xvd", tmp, 3) == 0) {
+ for (d = &tmp[3]; isalpha(*d); part = ++d) { }
+ }
+ if (part && d && *d != '\0') {
+ for (; isdigit(*d); d++) { }
+ if (*d == '\0')
+ *part = '\0';
+ }
+
+ return (tmp);
+}
+
+/*
+ * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
+ *
+ * path: /dev/sda1
+ * returns: /dev/sda
+ *
+ * Returned string must be freed.
+ */
+char *
+zfs_strip_partition_path(char *path)
+{
+ char *newpath = strdup(path);
+ char *sd_offset;
+ char *new_sd;
+
+ if (!newpath)
+ return (NULL);
+
+ /* Point to "sda1" part of "/dev/sda1" */
+ sd_offset = strrchr(newpath, '/') + 1;
+
+ /* Get our new name "sda" */
+ new_sd = zfs_strip_partition(sd_offset);
+ if (!new_sd) {
+ free(newpath);
+ return (NULL);
+ }
+
+ /* Paste the "sda" where "sda1" was */
+ strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
+
+ /* Free temporary "sda" */
+ free(new_sd);
+
+ return (newpath);
+}
+
+#ifdef HAVE_LIBUDEV
+
+/*
+ * A disk is considered a multipath whole disk when:
+ * DEVNAME key value has "dm-"
+ * DM_NAME key value has "mpath" prefix
+ * DM_UUID key exists
+ * ID_PART_TABLE_TYPE key does not exist or is not gpt
+ */
+static boolean_t
+udev_mpath_whole_disk(struct udev_device *dev)
+{
+ const char *devname, *type, *uuid;
+
+ devname = udev_device_get_property_value(dev, "DEVNAME");
+ type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
+ uuid = udev_device_get_property_value(dev, "DM_UUID");
+
+ if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
+ ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
+ (uuid != NULL)) {
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Check if a disk is effectively a multipath whole disk
+ */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+ struct udev *udev;
+ struct udev_device *dev = NULL;
+ char nodepath[MAXPATHLEN];
+ char *sysname;
+ boolean_t wholedisk = B_FALSE;
+
+ if (realpath(path, nodepath) == NULL)
+ return (B_FALSE);
+ sysname = strrchr(nodepath, '/') + 1;
+ if (strncmp(sysname, "dm-", 3) != 0)
+ return (B_FALSE);
+ if ((udev = udev_new()) == NULL)
+ return (B_FALSE);
+ if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
+ sysname)) == NULL) {
+ udev_device_unref(dev);
+ return (B_FALSE);
+ }
+
+ wholedisk = udev_mpath_whole_disk(dev);
+
+ udev_device_unref(dev);
+ return (wholedisk);
+}
+
+#else /* HAVE_LIBUDEV */
+
+/* ARGSUSED */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+ return (B_FALSE);
+}
+
+#endif /* HAVE_LIBUDEV */
diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c
new file mode 100644
index 000000000..811eae397
--- /dev/null
+++ b/lib/libzutil/os/linux/zutil_import_os.c
@@ -0,0 +1,856 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+/*
+ * Pool import support functions.
+ *
+ * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
+ * these commands are expected to run in the global zone, we can assume
+ * that the devices are all readable when called.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device. If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ * pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded. Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev. We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed. Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <libgen.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/dktp/fdisk.h>
+#include <sys/vdev_impl.h>
+#include <sys/fs/zfs.h>
+#include <sys/vdev_impl.h>
+
+#include <thread_pool.h>
+#include <libzutil.h>
+#include <libnvpair.h>
+
+#include "zutil_import.h"
+
+#ifdef HAVE_LIBUDEV
+#include <libudev.h>
+#include <sched.h>
+#endif
+#include <blkid/blkid.h>
+
+#define DEFAULT_IMPORT_PATH_SIZE 9
+#define DEV_BYID_PATH "/dev/disk/by-id/"
+
+static boolean_t
+is_watchdog_dev(char *dev)
+{
+ /* For 'watchdog' dev */
+ if (strcmp(dev, "watchdog") == 0)
+ return (B_TRUE);
+
+ /* For 'watchdog<digit><whatever> */
+ if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+void
+zpool_open_func(void *arg)
+{
+ rdsk_node_t *rn = arg;
+ libpc_handle_t *hdl = rn->rn_hdl;
+ struct stat64 statbuf;
+ nvlist_t *config;
+ char *bname, *dupname;
+ uint64_t vdev_guid = 0;
+ int error;
+ int num_labels = 0;
+ int fd;
+
+ /*
+ * Skip devices with well known prefixes there can be side effects
+ * when opening devices which need to be avoided.
+ *
+ * hpet - High Precision Event Timer
+ * watchdog - Watchdog must be closed in a special way.
+ */
+ dupname = zutil_strdup(hdl, rn->rn_name);
+ bname = basename(dupname);
+ error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
+ free(dupname);
+ if (error)
+ return;
+
+ /*
+ * Ignore failed stats. We only want regular files and block devices.
+ */
+ if (stat64(rn->rn_name, &statbuf) != 0 ||
+ (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
+ return;
+
+ /*
+ * Preferentially open using O_DIRECT to bypass the block device
+ * cache which may be stale for multipath devices. An EINVAL errno
+ * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
+ */
+ fd = open(rn->rn_name, O_RDONLY | O_DIRECT);
+ if ((fd < 0) && (errno == EINVAL))
+ fd = open(rn->rn_name, O_RDONLY);
+ if ((fd < 0) && (errno == EACCES))
+ hdl->lpc_open_access_error = B_TRUE;
+ if (fd < 0)
+ return;
+
+ /*
+ * This file is too small to hold a zpool
+ */
+ if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
+ (void) close(fd);
+ return;
+ }
+
+ error = zpool_read_label(fd, &config, &num_labels);
+ if (error != 0) {
+ (void) close(fd);
+ return;
+ }
+
+ if (num_labels == 0) {
+ (void) close(fd);
+ nvlist_free(config);
+ return;
+ }
+
+ /*
+ * Check that the vdev is for the expected guid. Additional entries
+ * are speculatively added based on the paths stored in the labels.
+ * Entries with valid paths but incorrect guids must be removed.
+ */
+ error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
+ if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
+ (void) close(fd);
+ nvlist_free(config);
+ return;
+ }
+
+ (void) close(fd);
+
+ rn->rn_config = config;
+ rn->rn_num_labels = num_labels;
+
+ /*
+ * Add additional entries for paths described by this label.
+ */
+ if (rn->rn_labelpaths) {
+ char *path = NULL;
+ char *devid = NULL;
+ rdsk_node_t *slice;
+ avl_index_t where;
+ int error;
+
+ if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
+ return;
+
+ /*
+ * Allow devlinks to stabilize so all paths are available.
+ */
+ zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT);
+
+ if (path != NULL) {
+ slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+ slice->rn_name = zutil_strdup(hdl, path);
+ slice->rn_vdev_guid = vdev_guid;
+ slice->rn_avl = rn->rn_avl;
+ slice->rn_hdl = hdl;
+ slice->rn_order = IMPORT_ORDER_PREFERRED_1;
+ slice->rn_labelpaths = B_FALSE;
+ pthread_mutex_lock(rn->rn_lock);
+ if (avl_find(rn->rn_avl, slice, &where)) {
+ pthread_mutex_unlock(rn->rn_lock);
+ free(slice->rn_name);
+ free(slice);
+ } else {
+ avl_insert(rn->rn_avl, slice, where);
+ pthread_mutex_unlock(rn->rn_lock);
+ zpool_open_func(slice);
+ }
+ }
+
+ if (devid != NULL) {
+ slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+ error = asprintf(&slice->rn_name, "%s%s",
+ DEV_BYID_PATH, devid);
+ if (error == -1) {
+ free(slice);
+ return;
+ }
+
+ slice->rn_vdev_guid = vdev_guid;
+ slice->rn_avl = rn->rn_avl;
+ slice->rn_hdl = hdl;
+ slice->rn_order = IMPORT_ORDER_PREFERRED_2;
+ slice->rn_labelpaths = B_FALSE;
+ pthread_mutex_lock(rn->rn_lock);
+ if (avl_find(rn->rn_avl, slice, &where)) {
+ pthread_mutex_unlock(rn->rn_lock);
+ free(slice->rn_name);
+ free(slice);
+ } else {
+ avl_insert(rn->rn_avl, slice, where);
+ pthread_mutex_unlock(rn->rn_lock);
+ zpool_open_func(slice);
+ }
+ }
+ }
+}
+
+static char *
+zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
+ "/dev/disk/by-vdev", /* Custom rules, use first if they exist */
+ "/dev/mapper", /* Use multipath devices before components */
+ "/dev/disk/by-partlabel", /* Single unique entry set by user */
+ "/dev/disk/by-partuuid", /* Generated partition uuid */
+ "/dev/disk/by-label", /* Custom persistent labels */
+ "/dev/disk/by-uuid", /* Single unique entry and persistent */
+ "/dev/disk/by-id", /* May be multiple entries and persistent */
+ "/dev/disk/by-path", /* Encodes physical location and persistent */
+ "/dev" /* UNSAFE device names will change */
+};
+
+const char * const *
+zpool_default_search_paths(size_t *count)
+{
+ *count = DEFAULT_IMPORT_PATH_SIZE;
+ return ((const char * const *)zpool_default_import_path);
+}
+
+/*
+ * Given a full path to a device determine if that device appears in the
+ * import search path. If it does return the first match and store the
+ * index in the passed 'order' variable, otherwise return an error.
+ */
+static int
+zfs_path_order(char *name, int *order)
+{
+ int i = 0, error = ENOENT;
+ char *dir, *env, *envdup;
+
+ env = getenv("ZPOOL_IMPORT_PATH");
+ if (env) {
+ envdup = strdup(env);
+ dir = strtok(envdup, ":");
+ while (dir) {
+ if (strncmp(name, dir, strlen(dir)) == 0) {
+ *order = i;
+ error = 0;
+ break;
+ }
+ dir = strtok(NULL, ":");
+ i++;
+ }
+ free(envdup);
+ } else {
+ for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
+ if (strncmp(name, zpool_default_import_path[i],
+ strlen(zpool_default_import_path[i])) == 0) {
+ *order = i;
+ error = 0;
+ break;
+ }
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Use libblkid to quickly enumerate all known zfs devices.
+ */
+int
+zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+ avl_tree_t **slice_cache)
+{
+ rdsk_node_t *slice;
+ blkid_cache cache;
+ blkid_dev_iterate iter;
+ blkid_dev dev;
+ avl_index_t where;
+ int error;
+
+ *slice_cache = NULL;
+
+ error = blkid_get_cache(&cache, NULL);
+ if (error != 0)
+ return (error);
+
+ error = blkid_probe_all_new(cache);
+ if (error != 0) {
+ blkid_put_cache(cache);
+ return (error);
+ }
+
+ iter = blkid_dev_iterate_begin(cache);
+ if (iter == NULL) {
+ blkid_put_cache(cache);
+ return (EINVAL);
+ }
+
+ error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
+ if (error != 0) {
+ blkid_dev_iterate_end(iter);
+ blkid_put_cache(cache);
+ return (error);
+ }
+
+ *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
+ avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
+ offsetof(rdsk_node_t, rn_node));
+
+ while (blkid_dev_next(iter, &dev) == 0) {
+ slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+ slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
+ slice->rn_vdev_guid = 0;
+ slice->rn_lock = lock;
+ slice->rn_avl = *slice_cache;
+ slice->rn_hdl = hdl;
+ slice->rn_labelpaths = B_TRUE;
+
+ error = zfs_path_order(slice->rn_name, &slice->rn_order);
+ if (error == 0)
+ slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
+ else
+ slice->rn_order = IMPORT_ORDER_DEFAULT;
+
+ pthread_mutex_lock(lock);
+ if (avl_find(*slice_cache, slice, &where)) {
+ free(slice->rn_name);
+ free(slice);
+ } else {
+ avl_insert(*slice_cache, slice, where);
+ }
+ pthread_mutex_unlock(lock);
+ }
+
+ blkid_dev_iterate_end(iter);
+ blkid_put_cache(cache);
+
+ return (0);
+}
+
+/*
+ * Linux persistent device strings for vdev labels
+ *
+ * based on libudev for consistency with libudev disk add/remove events
+ */
+
+typedef struct vdev_dev_strs {
+ char vds_devid[128];
+ char vds_devphys[128];
+} vdev_dev_strs_t;
+
+#ifdef HAVE_LIBUDEV
+
+/*
+ * Obtain the persistent device id string (describes what)
+ *
+ * used by ZED vdev matching for auto-{online,expand,replace}
+ */
+int
+zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+ struct udev_list_entry *entry;
+ const char *bus;
+ char devbyid[MAXPATHLEN];
+
+ /* The bus based by-id path is preferred */
+ bus = udev_device_get_property_value(dev, "ID_BUS");
+
+ if (bus == NULL) {
+ const char *dm_uuid;
+
+ /*
+ * For multipath nodes use the persistent uuid based identifier
+ *
+ * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
+ */
+ dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
+ if (dm_uuid != NULL) {
+ (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
+ return (0);
+ }
+
+ /*
+ * For volumes use the persistent /dev/zvol/dataset identifier
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ const char *name;
+
+ name = udev_list_entry_get_name(entry);
+ if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+ (void) strlcpy(bufptr, name, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
+
+ /*
+ * NVME 'by-id' symlinks are similar to bus case
+ */
+ struct udev_device *parent;
+
+ parent = udev_device_get_parent_with_subsystem_devtype(dev,
+ "nvme", NULL);
+ if (parent != NULL)
+ bus = "nvme"; /* continue with bus symlink search */
+ else
+ return (ENODATA);
+ }
+
+ /*
+ * locate the bus specific by-id link
+ */
+ (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ const char *name;
+
+ name = udev_list_entry_get_name(entry);
+ if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
+ name += strlen(DEV_BYID_PATH);
+ (void) strlcpy(bufptr, name, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
+
+ return (ENODATA);
+}
+
+/*
+ * Obtain the persistent physical location string (describes where)
+ *
+ * used by ZED vdev matching for auto-{online,expand,replace}
+ */
+int
+zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+ const char *physpath = NULL;
+ struct udev_list_entry *entry;
+
+ /*
+ * Normal disks use ID_PATH for their physical path.
+ */
+ physpath = udev_device_get_property_value(dev, "ID_PATH");
+ if (physpath != NULL && strlen(physpath) > 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+
+ /*
+ * Device mapper devices are virtual and don't have a physical
+ * path. For them we use ID_VDEV instead, which is setup via the
+ * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
+ * to a virtual device. If you don't have vdev_id.conf setup,
+ * you cannot use multipath autoreplace with device mapper.
+ */
+ physpath = udev_device_get_property_value(dev, "ID_VDEV");
+ if (physpath != NULL && strlen(physpath) > 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+
+ /*
+ * For ZFS volumes use the persistent /dev/zvol/dataset identifier
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ physpath = udev_list_entry_get_name(entry);
+ if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
+
+ /*
+ * For all other devices fallback to using the by-uuid name.
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ physpath = udev_list_entry_get_name(entry);
+ if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
+
+ return (ENODATA);
+}
+
+/*
+ * A disk is considered a multipath whole disk when:
+ * DEVNAME key value has "dm-"
+ * DM_NAME key value has "mpath" prefix
+ * DM_UUID key exists
+ * ID_PART_TABLE_TYPE key does not exist or is not gpt
+ */
+static boolean_t
+udev_mpath_whole_disk(struct udev_device *dev)
+{
+ const char *devname, *type, *uuid;
+
+ devname = udev_device_get_property_value(dev, "DEVNAME");
+ type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
+ uuid = udev_device_get_property_value(dev, "DM_UUID");
+
+ if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
+ ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
+ (uuid != NULL)) {
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static int
+udev_device_is_ready(struct udev_device *dev)
+{
+#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
+ return (udev_device_get_is_initialized(dev));
+#else
+ /* wait for DEVLINKS property to be initialized */
+ return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
+#endif
+}
+
+#else
+
+/* ARGSUSED */
+int
+zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+ return (ENODATA);
+}
+
+/* ARGSUSED */
+int
+zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+ return (ENODATA);
+}
+
+#endif /* HAVE_LIBUDEV */
+
+/*
+ * Wait up to timeout_ms for udev to set up the device node. The device is
+ * considered ready when libudev determines it has been initialized, all of
+ * the device links have been verified to exist, and it has been allowed to
+ * settle. At this point the device the device can be accessed reliably.
+ * Depending on the complexity of the udev rules this process could take
+ * several seconds.
+ */
+int
+zpool_label_disk_wait(const char *path, int timeout_ms)
+{
+#ifdef HAVE_LIBUDEV
+ struct udev *udev;
+ struct udev_device *dev = NULL;
+ char nodepath[MAXPATHLEN];
+ char *sysname = NULL;
+ int ret = ENODEV;
+ int settle_ms = 50;
+ long sleep_ms = 10;
+ hrtime_t start, settle;
+
+ if ((udev = udev_new()) == NULL)
+ return (ENXIO);
+
+ start = gethrtime();
+ settle = 0;
+
+ do {
+ if (sysname == NULL) {
+ if (realpath(path, nodepath) != NULL) {
+ sysname = strrchr(nodepath, '/') + 1;
+ } else {
+ (void) usleep(sleep_ms * MILLISEC);
+ continue;
+ }
+ }
+
+ dev = udev_device_new_from_subsystem_sysname(udev,
+ "block", sysname);
+ if ((dev != NULL) && udev_device_is_ready(dev)) {
+ struct udev_list_entry *links, *link = NULL;
+
+ ret = 0;
+ links = udev_device_get_devlinks_list_entry(dev);
+
+ udev_list_entry_foreach(link, links) {
+ struct stat64 statbuf;
+ const char *name;
+
+ name = udev_list_entry_get_name(link);
+ errno = 0;
+ if (stat64(name, &statbuf) == 0 && errno == 0)
+ continue;
+
+ settle = 0;
+ ret = ENODEV;
+ break;
+ }
+
+ if (ret == 0) {
+ if (settle == 0) {
+ settle = gethrtime();
+ } else if (NSEC2MSEC(gethrtime() - settle) >=
+ settle_ms) {
+ udev_device_unref(dev);
+ break;
+ }
+ }
+ }
+
+ udev_device_unref(dev);
+ (void) usleep(sleep_ms * MILLISEC);
+
+ } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+ udev_unref(udev);
+
+ return (ret);
+#else
+ int settle_ms = 50;
+ long sleep_ms = 10;
+ hrtime_t start, settle;
+ struct stat64 statbuf;
+
+ start = gethrtime();
+ settle = 0;
+
+ do {
+ errno = 0;
+ if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
+ if (settle == 0)
+ settle = gethrtime();
+ else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
+ return (0);
+ } else if (errno != ENOENT) {
+ return (errno);
+ }
+
+ usleep(sleep_ms * MILLISEC);
+ } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+ return (ENODEV);
+#endif /* HAVE_LIBUDEV */
+}
+
+/*
+ * Encode the persistent devices strings
+ * used for the vdev disk label
+ */
+static int
+encode_device_strings(const char *path, vdev_dev_strs_t *ds,
+ boolean_t wholedisk)
+{
+#ifdef HAVE_LIBUDEV
+ struct udev *udev;
+ struct udev_device *dev = NULL;
+ char nodepath[MAXPATHLEN];
+ char *sysname;
+ int ret = ENODEV;
+ hrtime_t start;
+
+ if ((udev = udev_new()) == NULL)
+ return (ENXIO);
+
+ /* resolve path to a runtime device node instance */
+ if (realpath(path, nodepath) == NULL)
+ goto no_dev;
+
+ sysname = strrchr(nodepath, '/') + 1;
+
+ /*
+ * Wait up to 3 seconds for udev to set up the device node context
+ */
+ start = gethrtime();
+ do {
+ dev = udev_device_new_from_subsystem_sysname(udev, "block",
+ sysname);
+ if (dev == NULL)
+ goto no_dev;
+ if (udev_device_is_ready(dev))
+ break; /* udev ready */
+
+ udev_device_unref(dev);
+ dev = NULL;
+
+ if (NSEC2MSEC(gethrtime() - start) < 10)
+ (void) sched_yield(); /* yield/busy wait up to 10ms */
+ else
+ (void) usleep(10 * MILLISEC);
+
+ } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
+
+ if (dev == NULL)
+ goto no_dev;
+
+ /*
+ * Only whole disks require extra device strings
+ */
+ if (!wholedisk && !udev_mpath_whole_disk(dev))
+ goto no_dev;
+
+ ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
+ if (ret != 0)
+ goto no_dev_ref;
+
+ /* physical location string (optional) */
+ if (zfs_device_get_physical(dev, ds->vds_devphys,
+ sizeof (ds->vds_devphys)) != 0) {
+ ds->vds_devphys[0] = '\0'; /* empty string --> not available */
+ }
+
+no_dev_ref:
+ udev_device_unref(dev);
+no_dev:
+ udev_unref(udev);
+
+ return (ret);
+#else
+ return (ENOENT);
+#endif
+}
+
+/*
+ * Update a leaf vdev's persistent device strings
+ *
+ * - only applies for a dedicated leaf vdev (aka whole disk)
+ * - updated during pool create|add|attach|import
+ * - used for matching device matching during auto-{online,expand,replace}
+ * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
+ * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
+ *
+ * single device node example:
+ * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
+ * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
+ *
+ * multipath device node example:
+ * devid: 'dm-uuid-mpath-35000c5006304de3f'
+ *
+ * We also store the enclosure sysfs path for turning on enclosure LEDs
+ * (if applicable):
+ * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
+ */
+void
+update_vdev_config_dev_strs(nvlist_t *nv)
+{
+ vdev_dev_strs_t vds;
+ char *env, *type, *path;
+ uint64_t wholedisk = 0;
+ char *upath, *spath;
+
+ /*
+ * For the benefit of legacy ZFS implementations, allow
+ * for opting out of devid strings in the vdev label.
+ *
+ * example use:
+ * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
+ *
+ * explanation:
+ * Older ZFS on Linux implementations had issues when attempting to
+ * display pool config VDEV names if a "devid" NVP value is present
+ * in the pool's config.
+ *
+ * For example, a pool that originated on illumos platform would
+ * have a devid value in the config and "zpool status" would fail
+ * when listing the config.
+ *
+ * A pool can be stripped of any "devid" values on import or
+ * prevented from adding them on zpool create|add by setting
+ * ZFS_VDEV_DEVID_OPT_OUT.
+ */
+ env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
+ if (env && (strtoul(env, NULL, 0) > 0 ||
+ !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
+ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+ return;
+ }
+
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
+ strcmp(type, VDEV_TYPE_DISK) != 0) {
+ return;
+ }
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+ return;
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
+
+ /*
+ * Update device string values in the config nvlist.
+ */
+ if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
+ (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
+ if (vds.vds_devphys[0] != '\0') {
+ (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
+ vds.vds_devphys);
+ }
+
+ /* Add enclosure sysfs path (if disk is in an enclosure). */
+ upath = zfs_get_underlying_path(path);
+ spath = zfs_get_enclosure_sysfs_path(upath);
+ if (spath)
+ nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+ spath);
+ else
+ nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+
+ free(upath);
+ free(spath);
+ } else {
+ /* Clear out any stale entries. */
+ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+ }
+}
diff --git a/lib/libzutil/zutil_device_path.c b/lib/libzutil/zutil_device_path.c
index 1dc0d4d1d..27ca80e50 100644
--- a/lib/libzutil/zutil_device_path.c
+++ b/lib/libzutil/zutil_device_path.c
@@ -23,54 +23,13 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#include <ctype.h>
#include <errno.h>
-#include <dirent.h>
-#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <sys/efi_partition.h>
#include <libzutil.h>
-#ifdef HAVE_LIBUDEV
-#include <libudev.h>
-#endif
-
-/*
- * Append partition suffix to an otherwise fully qualified device path.
- * This is used to generate the name the full path as its stored in
- * ZPOOL_CONFIG_PATH for whole disk devices. On success the new length
- * of 'path' will be returned on error a negative value is returned.
- */
-int
-zfs_append_partition(char *path, size_t max_len)
-{
- int len = strlen(path);
-
- if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
- (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
- if (len + 6 >= max_len)
- return (-1);
-
- (void) strcat(path, "-part1");
- len += 6;
- } else {
- if (len + 2 >= max_len)
- return (-1);
-
- if (isdigit(path[len-1])) {
- (void) strcat(path, "p1");
- len += 2;
- } else {
- (void) strcat(path, "1");
- len += 1;
- }
- }
-
- return (len);
-}
/*
* Given a shorthand device name check if a file by that name exists in any
@@ -213,413 +172,3 @@ zfs_strcmp_pathname(const char *name, const char *cmp, int wholedisk)
return (0);
}
-
-/*
- * Allocate and return the underlying device name for a device mapper device.
- * If a device mapper device maps to multiple devices, return the first device.
- *
- * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
- * DM device (like /dev/disk/by-vdev/A0) are also allowed.
- *
- * Returns device name, or NULL on error or no match. If dm_name is not a DM
- * device then return NULL.
- *
- * NOTE: The returned name string must be *freed*.
- */
-static char *
-dm_get_underlying_path(const char *dm_name)
-{
- DIR *dp = NULL;
- struct dirent *ep;
- char *realp;
- char *tmp = NULL;
- char *path = NULL;
- char *dev_str;
- int size;
-
- if (dm_name == NULL)
- return (NULL);
-
- /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
- realp = realpath(dm_name, NULL);
- if (realp == NULL)
- return (NULL);
-
- /*
- * If they preface 'dev' with a path (like "/dev") then strip it off.
- * We just want the 'dm-N' part.
- */
- tmp = strrchr(realp, '/');
- if (tmp != NULL)
- dev_str = tmp + 1; /* +1 since we want the chr after '/' */
- else
- dev_str = tmp;
-
- size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
- if (size == -1 || !tmp)
- goto end;
-
- dp = opendir(tmp);
- if (dp == NULL)
- goto end;
-
- /* Return first sd* entry in /sys/block/dm-N/slaves/ */
- while ((ep = readdir(dp))) {
- if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
- size = asprintf(&path, "/dev/%s", ep->d_name);
- break;
- }
- }
-
-end:
- if (dp != NULL)
- closedir(dp);
- free(tmp);
- free(realp);
- return (path);
-}
-
-/*
- * Return 1 if device is a device mapper or multipath device.
- * Return 0 if not.
- */
-int
-zfs_dev_is_dm(const char *dev_name)
-{
-
- char *tmp;
- tmp = dm_get_underlying_path(dev_name);
- if (tmp == NULL)
- return (0);
-
- free(tmp);
- return (1);
-}
-
-/*
- * By "whole disk" we mean an entire physical disk (something we can
- * label, toggle the write cache on, etc.) as opposed to the full
- * capacity of a pseudo-device such as lofi or did. We act as if we
- * are labeling the disk, which should be a pretty good test of whether
- * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
- * it isn't.
- */
-int
-zfs_dev_is_whole_disk(const char *dev_name)
-{
- struct dk_gpt *label;
- int fd;
-
- if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
- return (0);
-
- if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
- (void) close(fd);
- return (0);
- }
-
- efi_free(label);
- (void) close(fd);
-
- return (1);
-}
-
-/*
- * Lookup the underlying device for a device name
- *
- * Often you'll have a symlink to a device, a partition device,
- * or a multipath device, and want to look up the underlying device.
- * This function returns the underlying device name. If the device
- * name is already the underlying device, then just return the same
- * name. If the device is a DM device with multiple underlying devices
- * then return the first one.
- *
- * For example:
- *
- * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
- * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
- * returns: /dev/sda
- *
- * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
- * dev_name: /dev/mapper/mpatha
- * returns: /dev/sda (first device)
- *
- * 3. /dev/sda (already the underlying device)
- * dev_name: /dev/sda
- * returns: /dev/sda
- *
- * 4. /dev/dm-3 (mapped to /dev/sda)
- * dev_name: /dev/dm-3
- * returns: /dev/sda
- *
- * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
- * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
- * returns: /dev/sdb
- *
- * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
- * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
- * returns: /dev/sda
- *
- * Returns underlying device name, or NULL on error or no match.
- *
- * NOTE: The returned name string must be *freed*.
- */
-char *
-zfs_get_underlying_path(const char *dev_name)
-{
- char *name = NULL;
- char *tmp;
-
- if (dev_name == NULL)
- return (NULL);
-
- tmp = dm_get_underlying_path(dev_name);
-
- /* dev_name not a DM device, so just un-symlinkize it */
- if (tmp == NULL)
- tmp = realpath(dev_name, NULL);
-
- if (tmp != NULL) {
- name = zfs_strip_partition_path(tmp);
- free(tmp);
- }
-
- return (name);
-}
-
-/*
- * Given a dev name like "sda", return the full enclosure sysfs path to
- * the disk. You can also pass in the name with "/dev" prepended
- * to it (like /dev/sda).
- *
- * For example, disk "sda" in enclosure slot 1:
- * dev: "sda"
- * returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
- *
- * 'dev' must be a non-devicemapper device.
- *
- * Returned string must be freed.
- */
-char *
-zfs_get_enclosure_sysfs_path(const char *dev_name)
-{
- DIR *dp = NULL;
- struct dirent *ep;
- char buf[MAXPATHLEN];
- char *tmp1 = NULL;
- char *tmp2 = NULL;
- char *tmp3 = NULL;
- char *path = NULL;
- size_t size;
- int tmpsize;
-
- if (dev_name == NULL)
- return (NULL);
-
- /* If they preface 'dev' with a path (like "/dev") then strip it off */
- tmp1 = strrchr(dev_name, '/');
- if (tmp1 != NULL)
- dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
-
- tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
- if (tmpsize == -1 || tmp1 == NULL) {
- tmp1 = NULL;
- goto end;
- }
-
- dp = opendir(tmp1);
- if (dp == NULL) {
- tmp1 = NULL; /* To make free() at the end a NOP */
- goto end;
- }
-
- /*
- * Look though all sysfs entries in /sys/block/<dev>/device for
- * the enclosure symlink.
- */
- while ((ep = readdir(dp))) {
- /* Ignore everything that's not our enclosure_device link */
- if (strstr(ep->d_name, "enclosure_device") == NULL)
- continue;
-
- if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
- tmp2 == NULL)
- break;
-
- size = readlink(tmp2, buf, sizeof (buf));
-
- /* Did readlink fail or crop the link name? */
- if (size == -1 || size >= sizeof (buf)) {
- free(tmp2);
- tmp2 = NULL; /* To make free() at the end a NOP */
- break;
- }
-
- /*
- * We got a valid link. readlink() doesn't terminate strings
- * so we have to do it.
- */
- buf[size] = '\0';
-
- /*
- * Our link will look like:
- *
- * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
- *
- * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
- */
- tmp3 = strstr(buf, "enclosure");
- if (tmp3 == NULL)
- break;
-
- if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
- /* If asprintf() fails, 'path' is undefined */
- path = NULL;
- break;
- }
-
- if (path == NULL)
- break;
- }
-
-end:
- free(tmp2);
- free(tmp1);
-
- if (dp != NULL)
- closedir(dp);
-
- return (path);
-}
-
-/*
- * Remove partition suffix from a vdev path. Partition suffixes may take three
- * forms: "-partX", "pX", or "X", where X is a string of digits. The second
- * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
- * third case only occurs when preceded by a string matching the regular
- * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
- *
- * caller must free the returned string
- */
-char *
-zfs_strip_partition(char *path)
-{
- char *tmp = strdup(path);
- char *part = NULL, *d = NULL;
- if (!tmp)
- return (NULL);
-
- if ((part = strstr(tmp, "-part")) && part != tmp) {
- d = part + 5;
- } else if ((part = strrchr(tmp, 'p')) &&
- part > tmp + 1 && isdigit(*(part-1))) {
- d = part + 1;
- } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
- tmp[1] == 'd') {
- for (d = &tmp[2]; isalpha(*d); part = ++d) { }
- } else if (strncmp("xvd", tmp, 3) == 0) {
- for (d = &tmp[3]; isalpha(*d); part = ++d) { }
- }
- if (part && d && *d != '\0') {
- for (; isdigit(*d); d++) { }
- if (*d == '\0')
- *part = '\0';
- }
-
- return (tmp);
-}
-
-/*
- * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
- *
- * path: /dev/sda1
- * returns: /dev/sda
- *
- * Returned string must be freed.
- */
-char *
-zfs_strip_partition_path(char *path)
-{
- char *newpath = strdup(path);
- char *sd_offset;
- char *new_sd;
-
- if (!newpath)
- return (NULL);
-
- /* Point to "sda1" part of "/dev/sda1" */
- sd_offset = strrchr(newpath, '/') + 1;
-
- /* Get our new name "sda" */
- new_sd = zfs_strip_partition(sd_offset);
- if (!new_sd) {
- free(newpath);
- return (NULL);
- }
-
- /* Paste the "sda" where "sda1" was */
- strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
-
- /* Free temporary "sda" */
- free(new_sd);
-
- return (newpath);
-}
-
-#ifdef HAVE_LIBUDEV
-/*
- * A disk is considered a multipath whole disk when:
- * DEVNAME key value has "dm-"
- * DM_NAME key value has "mpath" prefix
- * DM_UUID key exists
- * ID_PART_TABLE_TYPE key does not exist or is not gpt
- */
-static boolean_t
-udev_mpath_whole_disk(struct udev_device *dev)
-{
- const char *devname, *type, *uuid;
-
- devname = udev_device_get_property_value(dev, "DEVNAME");
- type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
- uuid = udev_device_get_property_value(dev, "DM_UUID");
-
- if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
- ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
- (uuid != NULL)) {
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-/*
- * Check if a disk is effectively a multipath whole disk
- */
-boolean_t
-is_mpath_whole_disk(const char *path)
-{
- struct udev *udev;
- struct udev_device *dev = NULL;
- char nodepath[MAXPATHLEN];
- char *sysname;
- boolean_t wholedisk = B_FALSE;
-
- if (realpath(path, nodepath) == NULL)
- return (B_FALSE);
- sysname = strrchr(nodepath, '/') + 1;
- if (strncmp(sysname, "dm-", 3) != 0)
- return (B_FALSE);
- if ((udev = udev_new()) == NULL)
- return (B_FALSE);
- if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
- sysname)) == NULL) {
- udev_device_unref(dev);
- return (B_FALSE);
- }
-
- wholedisk = udev_mpath_whole_disk(dev);
-
- udev_device_unref(dev);
- return (wholedisk);
-}
-#endif
diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c
index bb7db259c..e85ce4594 100644
--- a/lib/libzutil/zutil_import.c
+++ b/lib/libzutil/zutil_import.c
@@ -51,10 +51,6 @@
#include <errno.h>
#include <libintl.h>
#include <libgen.h>
-#ifdef HAVE_LIBUDEV
-#include <libudev.h>
-#include <sched.h>
-#endif
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
@@ -66,34 +62,15 @@
#include <sys/fs/zfs.h>
#include <sys/vdev_impl.h>
-#include <blkid/blkid.h>
#include <thread_pool.h>
#include <libzutil.h>
#include <libnvpair.h>
-#define IMPORT_ORDER_PREFERRED_1 1
-#define IMPORT_ORDER_PREFERRED_2 2
-#define IMPORT_ORDER_SCAN_OFFSET 10
-#define IMPORT_ORDER_DEFAULT 100
-#define DEFAULT_IMPORT_PATH_SIZE 9
-
-#define EZFS_BADCACHE "invalid or missing cache file"
-#define EZFS_BADPATH "must be an absolute path"
-#define EZFS_NOMEM "out of memory"
-#define EZFS_EACESS "some devices require root privileges"
-
-typedef struct libpc_handle {
- boolean_t lpc_printerr;
- boolean_t lpc_open_access_error;
- boolean_t lpc_desc_active;
- char lpc_desc[1024];
- const pool_config_ops_t *lpc_ops;
- void *lpc_lib_handle;
-} libpc_handle_t;
+#include "zutil_import.h"
/*PRINTFLIKE2*/
static void
-zfs_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
+zutil_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
{
va_list ap;
@@ -106,7 +83,8 @@ zfs_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
}
static void
-zfs_verror(libpc_handle_t *hdl, const char *error, const char *fmt, va_list ap)
+zutil_verror(libpc_handle_t *hdl, const char *error, const char *fmt,
+ va_list ap)
{
char action[1024];
@@ -127,13 +105,13 @@ zfs_verror(libpc_handle_t *hdl, const char *error, const char *fmt, va_list ap)
/*PRINTFLIKE3*/
static int
-zfs_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...)
+zutil_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- zfs_verror(hdl, error, fmt, ap);
+ zutil_verror(hdl, error, fmt, ap);
va_end(ap);
@@ -141,36 +119,36 @@ zfs_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...)
}
static int
-zfs_error(libpc_handle_t *hdl, const char *error, const char *msg)
+zutil_error(libpc_handle_t *hdl, const char *error, const char *msg)
{
- return (zfs_error_fmt(hdl, error, "%s", msg));
+ return (zutil_error_fmt(hdl, error, "%s", msg));
}
static int
-no_memory(libpc_handle_t *hdl)
+zutil_no_memory(libpc_handle_t *hdl)
{
- zfs_error(hdl, EZFS_NOMEM, "internal error");
+ zutil_error(hdl, EZFS_NOMEM, "internal error");
exit(1);
}
-static void *
-zfs_alloc(libpc_handle_t *hdl, size_t size)
+void *
+zutil_alloc(libpc_handle_t *hdl, size_t size)
{
void *data;
if ((data = calloc(1, size)) == NULL)
- (void) no_memory(hdl);
+ (void) zutil_no_memory(hdl);
return (data);
}
-static char *
-zfs_strdup(libpc_handle_t *hdl, const char *str)
+char *
+zutil_strdup(libpc_handle_t *hdl, const char *str)
{
char *ret;
if ((ret = strdup(str)) == NULL)
- (void) no_memory(hdl);
+ (void) zutil_no_memory(hdl);
return (ret);
}
@@ -209,472 +187,6 @@ typedef struct pool_list {
name_entry_t *names;
} pool_list_t;
-#define ZVOL_ROOT "/dev/zvol"
-#define DEV_BYID_PATH "/dev/disk/by-id/"
-
-/*
- * Linux persistent device strings for vdev labels
- *
- * based on libudev for consistency with libudev disk add/remove events
- */
-
-typedef struct vdev_dev_strs {
- char vds_devid[128];
- char vds_devphys[128];
-} vdev_dev_strs_t;
-
-#ifdef HAVE_LIBUDEV
-/*
- * Obtain the persistent device id string (describes what)
- *
- * used by ZED vdev matching for auto-{online,expand,replace}
- */
-int
-zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
-{
- struct udev_list_entry *entry;
- const char *bus;
- char devbyid[MAXPATHLEN];
-
- /* The bus based by-id path is preferred */
- bus = udev_device_get_property_value(dev, "ID_BUS");
-
- if (bus == NULL) {
- const char *dm_uuid;
-
- /*
- * For multipath nodes use the persistent uuid based identifier
- *
- * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
- */
- dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
- if (dm_uuid != NULL) {
- (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
- return (0);
- }
-
- /*
- * For volumes use the persistent /dev/zvol/dataset identifier
- */
- entry = udev_device_get_devlinks_list_entry(dev);
- while (entry != NULL) {
- const char *name;
-
- name = udev_list_entry_get_name(entry);
- if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
- (void) strlcpy(bufptr, name, buflen);
- return (0);
- }
- entry = udev_list_entry_get_next(entry);
- }
-
- /*
- * NVME 'by-id' symlinks are similar to bus case
- */
- struct udev_device *parent;
-
- parent = udev_device_get_parent_with_subsystem_devtype(dev,
- "nvme", NULL);
- if (parent != NULL)
- bus = "nvme"; /* continue with bus symlink search */
- else
- return (ENODATA);
- }
-
- /*
- * locate the bus specific by-id link
- */
- (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
- entry = udev_device_get_devlinks_list_entry(dev);
- while (entry != NULL) {
- const char *name;
-
- name = udev_list_entry_get_name(entry);
- if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
- name += strlen(DEV_BYID_PATH);
- (void) strlcpy(bufptr, name, buflen);
- return (0);
- }
- entry = udev_list_entry_get_next(entry);
- }
-
- return (ENODATA);
-}
-
-/*
- * Obtain the persistent physical location string (describes where)
- *
- * used by ZED vdev matching for auto-{online,expand,replace}
- */
-int
-zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
-{
- const char *physpath = NULL;
- struct udev_list_entry *entry;
-
- /*
- * Normal disks use ID_PATH for their physical path.
- */
- physpath = udev_device_get_property_value(dev, "ID_PATH");
- if (physpath != NULL && strlen(physpath) > 0) {
- (void) strlcpy(bufptr, physpath, buflen);
- return (0);
- }
-
- /*
- * Device mapper devices are virtual and don't have a physical
- * path. For them we use ID_VDEV instead, which is setup via the
- * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
- * to a virtual device. If you don't have vdev_id.conf setup,
- * you cannot use multipath autoreplace with device mapper.
- */
- physpath = udev_device_get_property_value(dev, "ID_VDEV");
- if (physpath != NULL && strlen(physpath) > 0) {
- (void) strlcpy(bufptr, physpath, buflen);
- return (0);
- }
-
- /*
- * For ZFS volumes use the persistent /dev/zvol/dataset identifier
- */
- entry = udev_device_get_devlinks_list_entry(dev);
- while (entry != NULL) {
- physpath = udev_list_entry_get_name(entry);
- if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
- (void) strlcpy(bufptr, physpath, buflen);
- return (0);
- }
- entry = udev_list_entry_get_next(entry);
- }
-
- /*
- * For all other devices fallback to using the by-uuid name.
- */
- entry = udev_device_get_devlinks_list_entry(dev);
- while (entry != NULL) {
- physpath = udev_list_entry_get_name(entry);
- if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
- (void) strlcpy(bufptr, physpath, buflen);
- return (0);
- }
- entry = udev_list_entry_get_next(entry);
- }
-
- return (ENODATA);
-}
-
-/*
- * A disk is considered a multipath whole disk when:
- * DEVNAME key value has "dm-"
- * DM_NAME key value has "mpath" prefix
- * DM_UUID key exists
- * ID_PART_TABLE_TYPE key does not exist or is not gpt
- */
-static boolean_t
-udev_mpath_whole_disk(struct udev_device *dev)
-{
- const char *devname, *type, *uuid;
-
- devname = udev_device_get_property_value(dev, "DEVNAME");
- type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
- uuid = udev_device_get_property_value(dev, "DM_UUID");
-
- if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
- ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
- (uuid != NULL)) {
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-static int
-udev_device_is_ready(struct udev_device *dev)
-{
-#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
- return (udev_device_get_is_initialized(dev));
-#else
- /* wait for DEVLINKS property to be initialized */
- return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
-#endif
-}
-#endif /* HAVE_LIBUDEV */
-
-/*
- * Wait up to timeout_ms for udev to set up the device node. The device is
- * considered ready when libudev determines it has been initialized, all of
- * the device links have been verified to exist, and it has been allowed to
- * settle. At this point the device the device can be accessed reliably.
- * Depending on the complexity of the udev rules this process could take
- * several seconds.
- */
-int
-zpool_label_disk_wait(const char *path, int timeout_ms)
-{
-#ifdef HAVE_LIBUDEV
- struct udev *udev;
- struct udev_device *dev = NULL;
- char nodepath[MAXPATHLEN];
- char *sysname = NULL;
- int ret = ENODEV;
- int settle_ms = 50;
- long sleep_ms = 10;
- hrtime_t start, settle;
-
- if ((udev = udev_new()) == NULL)
- return (ENXIO);
-
- start = gethrtime();
- settle = 0;
-
- do {
- if (sysname == NULL) {
- if (realpath(path, nodepath) != NULL) {
- sysname = strrchr(nodepath, '/') + 1;
- } else {
- (void) usleep(sleep_ms * MILLISEC);
- continue;
- }
- }
-
- dev = udev_device_new_from_subsystem_sysname(udev,
- "block", sysname);
- if ((dev != NULL) && udev_device_is_ready(dev)) {
- struct udev_list_entry *links, *link = NULL;
-
- ret = 0;
- links = udev_device_get_devlinks_list_entry(dev);
-
- udev_list_entry_foreach(link, links) {
- struct stat64 statbuf;
- const char *name;
-
- name = udev_list_entry_get_name(link);
- errno = 0;
- if (stat64(name, &statbuf) == 0 && errno == 0)
- continue;
-
- settle = 0;
- ret = ENODEV;
- break;
- }
-
- if (ret == 0) {
- if (settle == 0) {
- settle = gethrtime();
- } else if (NSEC2MSEC(gethrtime() - settle) >=
- settle_ms) {
- udev_device_unref(dev);
- break;
- }
- }
- }
-
- udev_device_unref(dev);
- (void) usleep(sleep_ms * MILLISEC);
-
- } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
-
- udev_unref(udev);
-
- return (ret);
-#else
- int settle_ms = 50;
- long sleep_ms = 10;
- hrtime_t start, settle;
- struct stat64 statbuf;
-
- start = gethrtime();
- settle = 0;
-
- do {
- errno = 0;
- if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
- if (settle == 0)
- settle = gethrtime();
- else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
- return (0);
- } else if (errno != ENOENT) {
- return (errno);
- }
-
- usleep(sleep_ms * MILLISEC);
- } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
-
- return (ENODEV);
-#endif /* HAVE_LIBUDEV */
-}
-
-/*
- * Encode the persistent devices strings
- * used for the vdev disk label
- */
-static int
-encode_device_strings(const char *path, vdev_dev_strs_t *ds,
- boolean_t wholedisk)
-{
-#ifdef HAVE_LIBUDEV
- struct udev *udev;
- struct udev_device *dev = NULL;
- char nodepath[MAXPATHLEN];
- char *sysname;
- int ret = ENODEV;
- hrtime_t start;
-
- if ((udev = udev_new()) == NULL)
- return (ENXIO);
-
- /* resolve path to a runtime device node instance */
- if (realpath(path, nodepath) == NULL)
- goto no_dev;
-
- sysname = strrchr(nodepath, '/') + 1;
-
- /*
- * Wait up to 3 seconds for udev to set up the device node context
- */
- start = gethrtime();
- do {
- dev = udev_device_new_from_subsystem_sysname(udev, "block",
- sysname);
- if (dev == NULL)
- goto no_dev;
- if (udev_device_is_ready(dev))
- break; /* udev ready */
-
- udev_device_unref(dev);
- dev = NULL;
-
- if (NSEC2MSEC(gethrtime() - start) < 10)
- (void) sched_yield(); /* yield/busy wait up to 10ms */
- else
- (void) usleep(10 * MILLISEC);
-
- } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
-
- if (dev == NULL)
- goto no_dev;
-
- /*
- * Only whole disks require extra device strings
- */
- if (!wholedisk && !udev_mpath_whole_disk(dev))
- goto no_dev;
-
- ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
- if (ret != 0)
- goto no_dev_ref;
-
- /* physical location string (optional) */
- if (zfs_device_get_physical(dev, ds->vds_devphys,
- sizeof (ds->vds_devphys)) != 0) {
- ds->vds_devphys[0] = '\0'; /* empty string --> not available */
- }
-
-no_dev_ref:
- udev_device_unref(dev);
-no_dev:
- udev_unref(udev);
-
- return (ret);
-#else
- return (ENOENT);
-#endif
-}
-
-/*
- * Update a leaf vdev's persistent device strings (Linux only)
- *
- * - only applies for a dedicated leaf vdev (aka whole disk)
- * - updated during pool create|add|attach|import
- * - used for matching device matching during auto-{online,expand,replace}
- * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
- * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
- *
- * single device node example:
- * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
- * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
- *
- * multipath device node example:
- * devid: 'dm-uuid-mpath-35000c5006304de3f'
- *
- * We also store the enclosure sysfs path for turning on enclosure LEDs
- * (if applicable):
- * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
- */
-void
-update_vdev_config_dev_strs(nvlist_t *nv)
-{
- vdev_dev_strs_t vds;
- char *env, *type, *path;
- uint64_t wholedisk = 0;
- char *upath, *spath;
-
- /*
- * For the benefit of legacy ZFS implementations, allow
- * for opting out of devid strings in the vdev label.
- *
- * example use:
- * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
- *
- * explanation:
- * Older ZFS on Linux implementations had issues when attempting to
- * display pool config VDEV names if a "devid" NVP value is present
- * in the pool's config.
- *
- * For example, a pool that originated on illumos platform would
- * have a devid value in the config and "zpool status" would fail
- * when listing the config.
- *
- * A pool can be stripped of any "devid" values on import or
- * prevented from adding them on zpool create|add by setting
- * ZFS_VDEV_DEVID_OPT_OUT.
- */
- env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
- if (env && (strtoul(env, NULL, 0) > 0 ||
- !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
- return;
- }
-
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
- strcmp(type, VDEV_TYPE_DISK) != 0) {
- return;
- }
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
- return;
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
-
- /*
- * Update device string values in config nvlist
- */
- if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
- (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
- if (vds.vds_devphys[0] != '\0') {
- (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
- vds.vds_devphys);
- }
-
- /* Add enclosure sysfs path (if disk is in an enclosure) */
- upath = zfs_get_underlying_path(path);
- spath = zfs_get_enclosure_sysfs_path(upath);
- if (spath)
- nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
- spath);
- else
- nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
-
- free(upath);
- free(spath);
- } else {
- /* clear out any stale entries */
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
- }
-}
-
/*
* Go through and fix up any path and/or devid information for the given vdev
* configuration.
@@ -752,7 +264,6 @@ fix_paths(libpc_handle_t *hdl, nvlist_t *nv, name_entry_t *names)
if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
return (-1);
- /* Linux only - update ZPOOL_CONFIG_DEVID and ZPOOL_CONFIG_PHYS_PATH */
update_vdev_config_dev_strs(nv);
return (0);
@@ -780,10 +291,10 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
&state) == 0 &&
(state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
- if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+ if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
return (-1);
- if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+ if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
free(ne);
return (-1);
}
@@ -825,7 +336,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
}
if (pe == NULL) {
- if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+ if ((pe = zutil_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
return (-1);
}
pe->pe_guid = pool_guid;
@@ -843,7 +354,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
}
if (ve == NULL) {
- if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+ if ((ve = zutil_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
return (-1);
}
ve->ve_guid = top_guid;
@@ -862,7 +373,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
}
if (ce == NULL) {
- if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+ if ((ce = zutil_alloc(hdl, sizeof (config_entry_t))) == NULL) {
return (-1);
}
ce->ce_txg = txg;
@@ -877,10 +388,10 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
* mappings so that we can fix up the configuration as necessary before
* doing the import.
*/
- if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+ if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
return (-1);
- if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+ if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
free(ne);
return (-1);
}
@@ -895,7 +406,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
}
static int
-pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
+zutil_pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
boolean_t *isactive)
{
ASSERT(hdl->lpc_ops->pco_pool_active != NULL);
@@ -907,7 +418,7 @@ pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
}
static nvlist_t *
-refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig)
+zutil_refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig)
{
ASSERT(hdl->lpc_ops->pco_refresh_config != NULL);
@@ -1095,7 +606,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
if (id >= children) {
nvlist_t **newchild;
- newchild = zfs_alloc(hdl, (id + 1) *
+ newchild = zutil_alloc(hdl, (id + 1) *
sizeof (nvlist_t *));
if (newchild == NULL)
goto nomem;
@@ -1127,7 +638,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
} else if (max_id > children) {
nvlist_t **newchild;
- newchild = zfs_alloc(hdl, (max_id) *
+ newchild = zutil_alloc(hdl, (max_id) *
sizeof (nvlist_t *));
if (newchild == NULL)
goto nomem;
@@ -1265,7 +776,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&guid) == 0);
- if (pool_active(hdl, name, guid, &isactive) != 0)
+ if (zutil_pool_active(hdl, name, guid, &isactive) != 0)
goto error;
if (isactive) {
@@ -1280,7 +791,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
goto nomem;
}
- if ((nvl = refresh_config(hdl, config)) == NULL) {
+ if ((nvl = zutil_refresh_config(hdl, config)) == NULL) {
nvlist_free(config);
config = NULL;
continue;
@@ -1345,7 +856,7 @@ add_pool:
return (ret);
nomem:
- (void) no_memory(hdl);
+ (void) zutil_no_memory(hdl);
error:
nvlist_free(config);
nvlist_free(ret);
@@ -1443,19 +954,6 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels)
return (0);
}
-typedef struct rdsk_node {
- char *rn_name; /* Full path to device */
- int rn_order; /* Preferred order (low to high) */
- int rn_num_labels; /* Number of valid labels */
- uint64_t rn_vdev_guid; /* Expected vdev guid when set */
- libpc_handle_t *rn_hdl;
- nvlist_t *rn_config; /* Label config */
- avl_tree_t *rn_avl;
- avl_node_t rn_node;
- pthread_mutex_t *rn_lock;
- boolean_t rn_labelpaths;
-} rdsk_node_t;
-
/*
* Sorted by full path and then vdev guid to allow for multiple entries with
* the same full path name. This is required because it's possible to
@@ -1465,7 +963,7 @@ typedef struct rdsk_node {
* include overwritten pool labels, devices which are visible from multiple
* hosts and multipath devices.
*/
-static int
+int
slice_cache_compare(const void *arg1, const void *arg2)
{
const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
@@ -1481,20 +979,6 @@ slice_cache_compare(const void *arg1, const void *arg2)
return (AVL_CMP(guid1, guid2));
}
-static boolean_t
-is_watchdog_dev(char *dev)
-{
- /* For 'watchdog' dev */
- if (strcmp(dev, "watchdog") == 0)
- return (B_TRUE);
-
- /* For 'watchdog<digit><whatever> */
- if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
- return (B_TRUE);
-
- return (B_FALSE);
-}
-
static int
label_paths_impl(libpc_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid,
uint64_t vdev_guid, char **path, char **devid)
@@ -1539,7 +1023,7 @@ label_paths_impl(libpc_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid,
* and store these strings as config_path and devid_path respectively.
* The returned pointers are only valid as long as label remains valid.
*/
-static int
+int
label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, char **devid)
{
nvlist_t *nvroot;
@@ -1559,165 +1043,13 @@ label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, char **devid)
}
static void
-zpool_open_func(void *arg)
-{
- rdsk_node_t *rn = arg;
- libpc_handle_t *hdl = rn->rn_hdl;
- struct stat64 statbuf;
- nvlist_t *config;
- char *bname, *dupname;
- uint64_t vdev_guid = 0;
- int error;
- int num_labels = 0;
- int fd;
-
- /*
- * Skip devices with well known prefixes there can be side effects
- * when opening devices which need to be avoided.
- *
- * hpet - High Precision Event Timer
- * watchdog - Watchdog must be closed in a special way.
- */
- dupname = zfs_strdup(hdl, rn->rn_name);
- bname = basename(dupname);
- error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
- free(dupname);
- if (error)
- return;
-
- /*
- * Ignore failed stats. We only want regular files and block devices.
- */
- if (stat64(rn->rn_name, &statbuf) != 0 ||
- (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
- return;
-
- /*
- * Preferentially open using O_DIRECT to bypass the block device
- * cache which may be stale for multipath devices. An EINVAL errno
- * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
- */
- fd = open(rn->rn_name, O_RDONLY | O_DIRECT);
- if ((fd < 0) && (errno == EINVAL))
- fd = open(rn->rn_name, O_RDONLY);
-
- if ((fd < 0) && (errno == EACCES))
- hdl->lpc_open_access_error = B_TRUE;
-
- if (fd < 0)
- return;
-
- /*
- * This file is too small to hold a zpool
- */
- if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
- (void) close(fd);
- return;
- }
-
- error = zpool_read_label(fd, &config, &num_labels);
- if (error != 0) {
- (void) close(fd);
- return;
- }
-
- if (num_labels == 0) {
- (void) close(fd);
- nvlist_free(config);
- return;
- }
-
- /*
- * Check that the vdev is for the expected guid. Additional entries
- * are speculatively added based on the paths stored in the labels.
- * Entries with valid paths but incorrect guids must be removed.
- */
- error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
- if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
- (void) close(fd);
- nvlist_free(config);
- return;
- }
-
- (void) close(fd);
-
- rn->rn_config = config;
- rn->rn_num_labels = num_labels;
-
- /*
- * Add additional entries for paths described by this label.
- */
- if (rn->rn_labelpaths) {
- char *path = NULL;
- char *devid = NULL;
- rdsk_node_t *slice;
- avl_index_t where;
- int error;
-
- if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
- return;
-
- /*
- * Allow devlinks to stabilize so all paths are available.
- */
- zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT);
-
- if (path != NULL) {
- slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
- slice->rn_name = zfs_strdup(hdl, path);
- slice->rn_vdev_guid = vdev_guid;
- slice->rn_avl = rn->rn_avl;
- slice->rn_hdl = hdl;
- slice->rn_order = IMPORT_ORDER_PREFERRED_1;
- slice->rn_labelpaths = B_FALSE;
- pthread_mutex_lock(rn->rn_lock);
- if (avl_find(rn->rn_avl, slice, &where)) {
- pthread_mutex_unlock(rn->rn_lock);
- free(slice->rn_name);
- free(slice);
- } else {
- avl_insert(rn->rn_avl, slice, where);
- pthread_mutex_unlock(rn->rn_lock);
- zpool_open_func(slice);
- }
- }
-
- if (devid != NULL) {
- slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
- error = asprintf(&slice->rn_name, "%s%s",
- DEV_BYID_PATH, devid);
- if (error == -1) {
- free(slice);
- return;
- }
-
- slice->rn_vdev_guid = vdev_guid;
- slice->rn_avl = rn->rn_avl;
- slice->rn_hdl = hdl;
- slice->rn_order = IMPORT_ORDER_PREFERRED_2;
- slice->rn_labelpaths = B_FALSE;
- pthread_mutex_lock(rn->rn_lock);
- if (avl_find(rn->rn_avl, slice, &where)) {
- pthread_mutex_unlock(rn->rn_lock);
- free(slice->rn_name);
- free(slice);
- } else {
- avl_insert(rn->rn_avl, slice, where);
- pthread_mutex_unlock(rn->rn_lock);
- zpool_open_func(slice);
- }
- }
- }
-}
-
-static void
zpool_find_import_scan_add_slice(libpc_handle_t *hdl, pthread_mutex_t *lock,
avl_tree_t *cache, const char *path, const char *name, int order)
{
avl_index_t where;
rdsk_node_t *slice;
- slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+ slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
if (asprintf(&slice->rn_name, "%s/%s", path, name) == -1) {
free(slice);
return;
@@ -1753,8 +1085,8 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock,
if (error == ENOENT)
return (0);
- zfs_error_aux(hdl, strerror(error));
- (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+ zutil_error_aux(hdl, strerror(error));
+ (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(
TEXT_DOMAIN, "cannot resolve path '%s'"), dir);
return (error);
}
@@ -1762,8 +1094,8 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock,
dirp = opendir(path);
if (dirp == NULL) {
error = errno;
- zfs_error_aux(hdl, strerror(error));
- (void) zfs_error_fmt(hdl, EZFS_BADPATH,
+ zutil_error_aux(hdl, strerror(error));
+ (void) zutil_error_fmt(hdl, EZFS_BADPATH,
dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
return (error);
}
@@ -1798,8 +1130,8 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock,
* whole path because if it's a symlink, we want the
* path of the symlink not where it points to.
*/
- d = zfs_strdup(hdl, dir);
- b = zfs_strdup(hdl, dir);
+ d = zutil_strdup(hdl, dir);
+ b = zutil_strdup(hdl, dir);
dpath = dirname(d);
name = basename(b);
@@ -1810,8 +1142,8 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock,
goto out;
}
- zfs_error_aux(hdl, strerror(error));
- (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+ zutil_error_aux(hdl, strerror(error));
+ (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(
TEXT_DOMAIN, "cannot resolve path '%s'"), dir);
goto out;
}
@@ -1829,7 +1161,7 @@ out:
*/
static int
zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock,
- avl_tree_t **slice_cache, char **dir, int dirs)
+ avl_tree_t **slice_cache, const char * const *dir, size_t dirs)
{
avl_tree_t *cache;
rdsk_node_t *slice;
@@ -1837,7 +1169,7 @@ zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock,
int i, error;
*slice_cache = NULL;
- cache = zfs_alloc(hdl, sizeof (avl_tree_t));
+ cache = zutil_alloc(hdl, sizeof (avl_tree_t));
avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t),
offsetof(rdsk_node_t, rn_node));
@@ -1849,15 +1181,15 @@ zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock,
if (error == ENOENT)
continue;
- zfs_error_aux(hdl, strerror(error));
- (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+ zutil_error_aux(hdl, strerror(error));
+ (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(
TEXT_DOMAIN, "cannot resolve path '%s'"), dir[i]);
goto error;
}
/*
* If dir[i] is a directory, we walk through it and add all
- * the entry to the cache. If it's not a directory, we just
+ * the entries to the cache. If it's not a directory, we just
* add it to the cache.
*/
if (S_ISDIR(sbuf.st_mode)) {
@@ -1885,139 +1217,6 @@ error:
return (error);
}
-static char *
-zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
- "/dev/disk/by-vdev", /* Custom rules, use first if they exist */
- "/dev/mapper", /* Use multipath devices before components */
- "/dev/disk/by-partlabel", /* Single unique entry set by user */
- "/dev/disk/by-partuuid", /* Generated partition uuid */
- "/dev/disk/by-label", /* Custom persistent labels */
- "/dev/disk/by-uuid", /* Single unique entry and persistent */
- "/dev/disk/by-id", /* May be multiple entries and persistent */
- "/dev/disk/by-path", /* Encodes physical location and persistent */
- "/dev" /* UNSAFE device names will change */
-};
-
-const char * const *
-zpool_default_search_paths(size_t *count)
-{
- *count = DEFAULT_IMPORT_PATH_SIZE;
- return ((const char * const *)zpool_default_import_path);
-}
-
-/*
- * Given a full path to a device determine if that device appears in the
- * import search path. If it does return the first match and store the
- * index in the passed 'order' variable, otherwise return an error.
- */
-static int
-zfs_path_order(char *name, int *order)
-{
- int i = 0, error = ENOENT;
- char *dir, *env, *envdup;
-
- env = getenv("ZPOOL_IMPORT_PATH");
- if (env) {
- envdup = strdup(env);
- dir = strtok(envdup, ":");
- while (dir) {
- if (strncmp(name, dir, strlen(dir)) == 0) {
- *order = i;
- error = 0;
- break;
- }
- dir = strtok(NULL, ":");
- i++;
- }
- free(envdup);
- } else {
- for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
- if (strncmp(name, zpool_default_import_path[i],
- strlen(zpool_default_import_path[i])) == 0) {
- *order = i;
- error = 0;
- break;
- }
- }
- }
-
- return (error);
-}
-
-/*
- * Use libblkid to quickly enumerate all known zfs devices.
- */
-static int
-zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
- avl_tree_t **slice_cache)
-{
- rdsk_node_t *slice;
- blkid_cache cache;
- blkid_dev_iterate iter;
- blkid_dev dev;
- avl_index_t where;
- int error;
-
- *slice_cache = NULL;
-
- error = blkid_get_cache(&cache, NULL);
- if (error != 0)
- return (error);
-
- error = blkid_probe_all_new(cache);
- if (error != 0) {
- blkid_put_cache(cache);
- return (error);
- }
-
- iter = blkid_dev_iterate_begin(cache);
- if (iter == NULL) {
- blkid_put_cache(cache);
- return (EINVAL);
- }
-
- error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
- if (error != 0) {
- blkid_dev_iterate_end(iter);
- blkid_put_cache(cache);
- return (error);
- }
-
- *slice_cache = zfs_alloc(hdl, sizeof (avl_tree_t));
- avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
- offsetof(rdsk_node_t, rn_node));
-
- while (blkid_dev_next(iter, &dev) == 0) {
- slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
- slice->rn_name = zfs_strdup(hdl, blkid_dev_devname(dev));
- slice->rn_vdev_guid = 0;
- slice->rn_lock = lock;
- slice->rn_avl = *slice_cache;
- slice->rn_hdl = hdl;
- slice->rn_labelpaths = B_TRUE;
-
- error = zfs_path_order(slice->rn_name, &slice->rn_order);
- if (error == 0)
- slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
- else
- slice->rn_order = IMPORT_ORDER_DEFAULT;
-
- pthread_mutex_lock(lock);
- if (avl_find(*slice_cache, slice, &where)) {
- free(slice->rn_name);
- free(slice);
- } else {
- avl_insert(*slice_cache, slice, where);
- }
- pthread_mutex_unlock(lock);
- }
-
- blkid_dev_iterate_end(iter);
- blkid_put_cache(cache);
-
- return (0);
-}
-
/*
* Given a list of directories to search, find all pools stored on disk. This
* includes partial pools which are not available to import. If no args are
@@ -2044,21 +1243,19 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg)
pthread_mutex_init(&lock, NULL);
/*
- * Locate pool member vdevs using libblkid or by directory scanning.
+ * Locate pool member vdevs by blkid or by directory scanning.
* On success a newly allocated AVL tree which is populated with an
- * entry for each discovered vdev will be returned as the cache.
- * It's the callers responsibility to consume and destroy this tree.
+ * entry for each discovered vdev will be returned in the cache.
+ * It's the caller's responsibility to consume and destroy this tree.
*/
if (iarg->scan || iarg->paths != 0) {
- int dirs = iarg->paths;
- char **dir = iarg->path;
+ size_t dirs = iarg->paths;
+ const char * const *dir = (const char * const *)iarg->path;
- if (dirs == 0) {
- dir = zpool_default_import_path;
- dirs = DEFAULT_IMPORT_PATH_SIZE;
- }
+ if (dirs == 0)
+ dir = zpool_default_search_paths(&dirs);
- if (zpool_find_import_scan(hdl, &lock, &cache, dir, dirs) != 0)
+ if (zpool_find_import_scan(hdl, &lock, &cache, dir, dirs) != 0)
return (NULL);
} else {
if (zpool_find_import_blkid(hdl, &lock, &cache) != 0)
@@ -2192,21 +1389,21 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
verify(poolname == NULL || guid == 0);
if ((fd = open(cachefile, O_RDONLY)) < 0) {
- zfs_error_aux(hdl, "%s", strerror(errno));
- (void) zfs_error(hdl, EZFS_BADCACHE,
+ zutil_error_aux(hdl, "%s", strerror(errno));
+ (void) zutil_error(hdl, EZFS_BADCACHE,
dgettext(TEXT_DOMAIN, "failed to open cache file"));
return (NULL);
}
if (fstat64(fd, &statbuf) != 0) {
- zfs_error_aux(hdl, "%s", strerror(errno));
+ zutil_error_aux(hdl, "%s", strerror(errno));
(void) close(fd);
- (void) zfs_error(hdl, EZFS_BADCACHE,
+ (void) zutil_error(hdl, EZFS_BADCACHE,
dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
return (NULL);
}
- if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
+ if ((buf = zutil_alloc(hdl, statbuf.st_size)) == NULL) {
(void) close(fd);
return (NULL);
}
@@ -2214,7 +1411,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
(void) close(fd);
free(buf);
- (void) zfs_error(hdl, EZFS_BADCACHE,
+ (void) zutil_error(hdl, EZFS_BADCACHE,
dgettext(TEXT_DOMAIN,
"failed to read cache file contents"));
return (NULL);
@@ -2224,7 +1421,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
free(buf);
- (void) zfs_error(hdl, EZFS_BADCACHE,
+ (void) zutil_error(hdl, EZFS_BADCACHE,
dgettext(TEXT_DOMAIN,
"invalid or corrupt cache file contents"));
return (NULL);
@@ -2237,7 +1434,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
* state.
*/
if (nvlist_alloc(&pools, 0, 0) != 0) {
- (void) no_memory(hdl);
+ (void) zutil_no_memory(hdl);
nvlist_free(raw);
return (NULL);
}
@@ -2254,7 +1451,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
if (guid != 0 && guid != this_guid)
continue;
- if (pool_active(hdl, name, this_guid, &active) != 0) {
+ if (zutil_pool_active(hdl, name, this_guid, &active) != 0) {
nvlist_free(raw);
nvlist_free(pools);
return (NULL);
@@ -2265,20 +1462,20 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE,
cachefile) != 0) {
- (void) no_memory(hdl);
+ (void) zutil_no_memory(hdl);
nvlist_free(raw);
nvlist_free(pools);
return (NULL);
}
- if ((dst = refresh_config(hdl, src)) == NULL) {
+ if ((dst = zutil_refresh_config(hdl, src)) == NULL) {
nvlist_free(raw);
nvlist_free(pools);
return (NULL);
}
if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
- (void) no_memory(hdl);
+ (void) zutil_no_memory(hdl);
nvlist_free(dst);
nvlist_free(raw);
nvlist_free(pools);
@@ -2312,7 +1509,7 @@ zpool_search_import(void *hdl, importargs_t *import,
if ((pools == NULL || nvlist_empty(pools)) &&
handle.lpc_open_access_error && geteuid() != 0) {
- (void) zfs_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN,
+ (void) zutil_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN,
"no pools found"));
}
diff --git a/lib/libzutil/zutil_import.h b/lib/libzutil/zutil_import.h
new file mode 100644
index 000000000..0108eb45c
--- /dev/null
+++ b/lib/libzutil/zutil_import.h
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+#ifndef _LIBZUTIL_ZUTIL_IMPORT_H_
+#define _LIBZUTIL_ZUTIL_IMPORT_H_
+
+#define EZFS_BADCACHE "invalid or missing cache file"
+#define EZFS_BADPATH "must be an absolute path"
+#define EZFS_NOMEM "out of memory"
+#define EZFS_EACESS "some devices require root privileges"
+
+#define IMPORT_ORDER_PREFERRED_1 1
+#define IMPORT_ORDER_PREFERRED_2 2
+#define IMPORT_ORDER_SCAN_OFFSET 10
+#define IMPORT_ORDER_DEFAULT 100
+
+typedef struct libpc_handle {
+ boolean_t lpc_printerr;
+ boolean_t lpc_open_access_error;
+ boolean_t lpc_desc_active;
+ char lpc_desc[1024];
+ const pool_config_ops_t *lpc_ops;
+ void *lpc_lib_handle;
+} libpc_handle_t;
+
+
+int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path,
+ char **devid);
+int zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+ avl_tree_t **slice_cache);
+
+void * zutil_alloc(libpc_handle_t *hdl, size_t size);
+char *zutil_strdup(libpc_handle_t *hdl, const char *str);
+
+typedef struct rdsk_node {
+ char *rn_name; /* Full path to device */
+ int rn_order; /* Preferred order (low to high) */
+ int rn_num_labels; /* Number of valid labels */
+ uint64_t rn_vdev_guid; /* Expected vdev guid when set */
+ libpc_handle_t *rn_hdl;
+ nvlist_t *rn_config; /* Label config */
+ avl_tree_t *rn_avl;
+ avl_node_t rn_node;
+ pthread_mutex_t *rn_lock;
+ boolean_t rn_labelpaths;
+} rdsk_node_t;
+
+int slice_cache_compare(const void *, const void *);
+
+void zpool_open_func(void *);
+
+#endif /* _LIBZUTIL_ZUTIL_IMPORT_H_ */