diff options
author | Matthew Macy <[email protected]> | 2019-10-03 10:20:44 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-10-03 10:20:44 -0700 |
commit | 7c5eff9400d12ed8f2676a6d08f6ae099351397f (patch) | |
tree | a5efa173c07a6c1a962ac68f2af42d9cc31a4b8c | |
parent | e1c216fb0c33209471b51f80ba3a1ce8910913c7 (diff) |
OpenZFS restructuring - libzutil
Factor Linux specific functionality out of libzutil.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Matt Macy <[email protected]>
Signed-off-by: Ryan Moeller <[email protected]>
Closes #9356
-rw-r--r-- | cmd/zed/agents/zfs_mod.c | 10 | ||||
-rw-r--r-- | include/libzutil.h | 13 | ||||
-rw-r--r-- | lib/libzutil/Makefile.am | 9 | ||||
-rw-r--r-- | lib/libzutil/os/linux/zutil_device_path_os.c | 493 | ||||
-rw-r--r-- | lib/libzutil/os/linux/zutil_import_os.c | 856 | ||||
-rw-r--r-- | lib/libzutil/zutil_device_path.c | 451 | ||||
-rw-r--r-- | lib/libzutil/zutil_import.c | 937 | ||||
-rw-r--r-- | lib/libzutil/zutil_import.h | 76 |
8 files changed, 1508 insertions, 1337 deletions
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 959ed24cd..39ddd8140 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -191,8 +191,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; - int is_dm = 0; - int is_sd = 0; + boolean_t is_dm = B_FALSE; + boolean_t is_sd = B_FALSE; uint_t c; vdev_stat_t *vs; @@ -220,8 +220,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) is_dm = zfs_dev_is_dm(path); zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'" - " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path, - physpath ? physpath : "NULL", wholedisk, is_dm, + " wholedisk %d, %s dm (guid %llu)", zpool_get_name(zhp), path, + physpath ? physpath : "NULL", wholedisk, is_dm ? "is" : "not", (long long unsigned int)guid); /* @@ -266,7 +266,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) * testing) */ if (physpath != NULL && strcmp("scsidebug", physpath) == 0) - is_sd = 1; + is_sd = B_TRUE; /* * If the pool doesn't have the autoreplace property set, then use diff --git a/include/libzutil.h b/include/libzutil.h index 7c483f0d9..4a8df0132 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -79,15 +79,10 @@ extern const char * const * zpool_default_search_paths(size_t *count); extern int zpool_read_label(int, nvlist_t **, int *); extern int zpool_label_disk_wait(const char *, int); -#ifdef HAVE_LIBUDEV struct udev_device; extern int zfs_device_get_devid(struct udev_device *, char *, size_t); extern int zfs_device_get_physical(struct udev_device *, char *, size_t); -#else -#define zfs_device_get_devid(dev, bufptr, buflen) (ENODATA) -#define zfs_device_get_physical(dev, bufptr, buflen) (ENODATA) -#endif extern void update_vdev_config_dev_strs(nvlist_t *); @@ -106,16 +101,12 @@ extern char *zfs_strip_partition_path(char *); extern int zfs_strcmp_pathname(const char *, const char *, int); -extern int zfs_dev_is_dm(const char *); -extern int zfs_dev_is_whole_disk(const char *); +extern boolean_t zfs_dev_is_dm(const char *); +extern boolean_t zfs_dev_is_whole_disk(const char *); extern char *zfs_get_underlying_path(const char *); extern char *zfs_get_enclosure_sysfs_path(const char *); -#ifdef HAVE_LIBUDEV extern boolean_t is_mpath_whole_disk(const char *); -#else -#define is_mpath_whole_disk(path) (B_FALSE) -#endif extern boolean_t zfs_isnumber(const char *); diff --git a/lib/libzutil/Makefile.am b/lib/libzutil/Makefile.am index 62b0114ed..8b53c374e 100644 --- a/lib/libzutil/Makefile.am +++ b/lib/libzutil/Makefile.am @@ -3,14 +3,23 @@ include $(top_srcdir)/config/Rules.am # Suppress unused but set variable warnings often due to ASSERTs AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE) +DEFAULT_INCLUDES += -I. + noinst_LTLIBRARIES = libzutil.la USER_C = \ zutil_device_path.c \ zutil_import.c \ + zutil_import.h \ zutil_nicenum.c \ zutil_pool.c +if BUILD_LINUX +USER_C += \ + os/linux/zutil_device_path_os.c \ + os/linux/zutil_import_os.c +endif + nodist_libzutil_la_SOURCES = $(USER_C) libzutil_la_LIBADD = \ diff --git a/lib/libzutil/os/linux/zutil_device_path_os.c b/lib/libzutil/os/linux/zutil_device_path_os.c new file mode 100644 index 000000000..8ca4f8570 --- /dev/null +++ b/lib/libzutil/os/linux/zutil_device_path_os.c @@ -0,0 +1,493 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <ctype.h> +#include <dirent.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/efi_partition.h> + +#ifdef HAVE_LIBUDEV +#include <libudev.h> +#endif + +#include <libzutil.h> + +/* + * Append partition suffix to an otherwise fully qualified device path. + * This is used to generate the name the full path as its stored in + * ZPOOL_CONFIG_PATH for whole disk devices. On success the new length + * of 'path' will be returned on error a negative value is returned. + */ +int +zfs_append_partition(char *path, size_t max_len) +{ + int len = strlen(path); + + if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) || + (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) { + if (len + 6 >= max_len) + return (-1); + + (void) strcat(path, "-part1"); + len += 6; + } else { + if (len + 2 >= max_len) + return (-1); + + if (isdigit(path[len-1])) { + (void) strcat(path, "p1"); + len += 2; + } else { + (void) strcat(path, "1"); + len += 1; + } + } + + return (len); +} + +/* + * Allocate and return the underlying device name for a device mapper device. + * If a device mapper device maps to multiple devices, return the first device. + * + * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a + * DM device (like /dev/disk/by-vdev/A0) are also allowed. + * + * Returns device name, or NULL on error or no match. If dm_name is not a DM + * device then return NULL. + * + * NOTE: The returned name string must be *freed*. + */ +static char * +dm_get_underlying_path(const char *dm_name) +{ + DIR *dp = NULL; + struct dirent *ep; + char *realp; + char *tmp = NULL; + char *path = NULL; + char *dev_str; + int size; + + if (dm_name == NULL) + return (NULL); + + /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */ + realp = realpath(dm_name, NULL); + if (realp == NULL) + return (NULL); + + /* + * If they preface 'dev' with a path (like "/dev") then strip it off. + * We just want the 'dm-N' part. + */ + tmp = strrchr(realp, '/'); + if (tmp != NULL) + dev_str = tmp + 1; /* +1 since we want the chr after '/' */ + else + dev_str = tmp; + + size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str); + if (size == -1 || !tmp) + goto end; + + dp = opendir(tmp); + if (dp == NULL) + goto end; + + /* Return first sd* entry in /sys/block/dm-N/slaves/ */ + while ((ep = readdir(dp))) { + if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */ + size = asprintf(&path, "/dev/%s", ep->d_name); + break; + } + } + +end: + if (dp != NULL) + closedir(dp); + free(tmp); + free(realp); + return (path); +} + +/* + * Return B_TRUE if device is a device mapper or multipath device. + * Return B_FALSE if not. + */ +boolean_t +zfs_dev_is_dm(const char *dev_name) +{ + + char *tmp; + tmp = dm_get_underlying_path(dev_name); + if (tmp == NULL) + return (B_FALSE); + + free(tmp); + return (B_TRUE); +} + +/* + * By "whole disk" we mean an entire physical disk (something we can + * label, toggle the write cache on, etc.) as opposed to the full + * capacity of a pseudo-device such as lofi or did. We act as if we + * are labeling the disk, which should be a pretty good test of whether + * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if + * it isn't. + */ +boolean_t +zfs_dev_is_whole_disk(const char *dev_name) +{ + struct dk_gpt *label; + int fd; + + if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0) + return (B_FALSE); + + if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { + (void) close(fd); + return (B_FALSE); + } + + efi_free(label); + (void) close(fd); + + return (B_TRUE); +} + +/* + * Lookup the underlying device for a device name + * + * Often you'll have a symlink to a device, a partition device, + * or a multipath device, and want to look up the underlying device. + * This function returns the underlying device name. If the device + * name is already the underlying device, then just return the same + * name. If the device is a DM device with multiple underlying devices + * then return the first one. + * + * For example: + * + * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda + * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 + * returns: /dev/sda + * + * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb) + * dev_name: /dev/mapper/mpatha + * returns: /dev/sda (first device) + * + * 3. /dev/sda (already the underlying device) + * dev_name: /dev/sda + * returns: /dev/sda + * + * 4. /dev/dm-3 (mapped to /dev/sda) + * dev_name: /dev/dm-3 + * returns: /dev/sda + * + * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9 + * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 + * returns: /dev/sdb + * + * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2 + * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a + * returns: /dev/sda + * + * Returns underlying device name, or NULL on error or no match. + * + * NOTE: The returned name string must be *freed*. + */ +char * +zfs_get_underlying_path(const char *dev_name) +{ + char *name = NULL; + char *tmp; + + if (dev_name == NULL) + return (NULL); + + tmp = dm_get_underlying_path(dev_name); + + /* dev_name not a DM device, so just un-symlinkize it */ + if (tmp == NULL) + tmp = realpath(dev_name, NULL); + + if (tmp != NULL) { + name = zfs_strip_partition_path(tmp); + free(tmp); + } + + return (name); +} + +/* + * Given a dev name like "sda", return the full enclosure sysfs path to + * the disk. You can also pass in the name with "/dev" prepended + * to it (like /dev/sda). + * + * For example, disk "sda" in enclosure slot 1: + * dev: "sda" + * returns: "/sys/class/enclosure/1:0:3:0/Slot 1" + * + * 'dev' must be a non-devicemapper device. + * + * Returned string must be freed. + */ +char * +zfs_get_enclosure_sysfs_path(const char *dev_name) +{ + DIR *dp = NULL; + struct dirent *ep; + char buf[MAXPATHLEN]; + char *tmp1 = NULL; + char *tmp2 = NULL; + char *tmp3 = NULL; + char *path = NULL; + size_t size; + int tmpsize; + + if (dev_name == NULL) + return (NULL); + + /* If they preface 'dev' with a path (like "/dev") then strip it off */ + tmp1 = strrchr(dev_name, '/'); + if (tmp1 != NULL) + dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */ + + tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name); + if (tmpsize == -1 || tmp1 == NULL) { + tmp1 = NULL; + goto end; + } + + dp = opendir(tmp1); + if (dp == NULL) { + tmp1 = NULL; /* To make free() at the end a NOP */ + goto end; + } + + /* + * Look though all sysfs entries in /sys/block/<dev>/device for + * the enclosure symlink. + */ + while ((ep = readdir(dp))) { + /* Ignore everything that's not our enclosure_device link */ + if (strstr(ep->d_name, "enclosure_device") == NULL) + continue; + + if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 || + tmp2 == NULL) + break; + + size = readlink(tmp2, buf, sizeof (buf)); + + /* Did readlink fail or crop the link name? */ + if (size == -1 || size >= sizeof (buf)) { + free(tmp2); + tmp2 = NULL; /* To make free() at the end a NOP */ + break; + } + + /* + * We got a valid link. readlink() doesn't terminate strings + * so we have to do it. + */ + buf[size] = '\0'; + + /* + * Our link will look like: + * + * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1" + * + * We want to grab the "enclosure/1:0:3:0/SLOT 1" part + */ + tmp3 = strstr(buf, "enclosure"); + if (tmp3 == NULL) + break; + + if (asprintf(&path, "/sys/class/%s", tmp3) == -1) { + /* If asprintf() fails, 'path' is undefined */ + path = NULL; + break; + } + + if (path == NULL) + break; + } + +end: + free(tmp2); + free(tmp1); + + if (dp != NULL) + closedir(dp); + + return (path); +} + +/* + * Remove partition suffix from a vdev path. Partition suffixes may take three + * forms: "-partX", "pX", or "X", where X is a string of digits. The second + * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The + * third case only occurs when preceded by a string matching the regular + * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk. + * + * caller must free the returned string + */ +char * +zfs_strip_partition(char *path) +{ + char *tmp = strdup(path); + char *part = NULL, *d = NULL; + if (!tmp) + return (NULL); + + if ((part = strstr(tmp, "-part")) && part != tmp) { + d = part + 5; + } else if ((part = strrchr(tmp, 'p')) && + part > tmp + 1 && isdigit(*(part-1))) { + d = part + 1; + } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') && + tmp[1] == 'd') { + for (d = &tmp[2]; isalpha(*d); part = ++d) { } + } else if (strncmp("xvd", tmp, 3) == 0) { + for (d = &tmp[3]; isalpha(*d); part = ++d) { } + } + if (part && d && *d != '\0') { + for (; isdigit(*d); d++) { } + if (*d == '\0') + *part = '\0'; + } + + return (tmp); +} + +/* + * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname + * + * path: /dev/sda1 + * returns: /dev/sda + * + * Returned string must be freed. + */ +char * +zfs_strip_partition_path(char *path) +{ + char *newpath = strdup(path); + char *sd_offset; + char *new_sd; + + if (!newpath) + return (NULL); + + /* Point to "sda1" part of "/dev/sda1" */ + sd_offset = strrchr(newpath, '/') + 1; + + /* Get our new name "sda" */ + new_sd = zfs_strip_partition(sd_offset); + if (!new_sd) { + free(newpath); + return (NULL); + } + + /* Paste the "sda" where "sda1" was */ + strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1); + + /* Free temporary "sda" */ + free(new_sd); + + return (newpath); +} + +#ifdef HAVE_LIBUDEV + +/* + * A disk is considered a multipath whole disk when: + * DEVNAME key value has "dm-" + * DM_NAME key value has "mpath" prefix + * DM_UUID key exists + * ID_PART_TABLE_TYPE key does not exist or is not gpt + */ +static boolean_t +udev_mpath_whole_disk(struct udev_device *dev) +{ + const char *devname, *type, *uuid; + + devname = udev_device_get_property_value(dev, "DEVNAME"); + type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); + uuid = udev_device_get_property_value(dev, "DM_UUID"); + + if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && + ((type == NULL) || (strcmp(type, "gpt") != 0)) && + (uuid != NULL)) { + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Check if a disk is effectively a multipath whole disk + */ +boolean_t +is_mpath_whole_disk(const char *path) +{ + struct udev *udev; + struct udev_device *dev = NULL; + char nodepath[MAXPATHLEN]; + char *sysname; + boolean_t wholedisk = B_FALSE; + + if (realpath(path, nodepath) == NULL) + return (B_FALSE); + sysname = strrchr(nodepath, '/') + 1; + if (strncmp(sysname, "dm-", 3) != 0) + return (B_FALSE); + if ((udev = udev_new()) == NULL) + return (B_FALSE); + if ((dev = udev_device_new_from_subsystem_sysname(udev, "block", + sysname)) == NULL) { + udev_device_unref(dev); + return (B_FALSE); + } + + wholedisk = udev_mpath_whole_disk(dev); + + udev_device_unref(dev); + return (wholedisk); +} + +#else /* HAVE_LIBUDEV */ + +/* ARGSUSED */ +boolean_t +is_mpath_whole_disk(const char *path) +{ + return (B_FALSE); +} + +#endif /* HAVE_LIBUDEV */ diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c new file mode 100644 index 000000000..811eae397 --- /dev/null +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -0,0 +1,856 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright 2015 RackTop Systems. + * Copyright (c) 2016, Intel Corporation. + */ + +/* + * Pool import support functions. + * + * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since + * these commands are expected to run in the global zone, we can assume + * that the devices are all readable when called. + * + * To import a pool, we rely on reading the configuration information from the + * ZFS label of each device. If we successfully read the label, then we + * organize the configuration information in the following hierarchy: + * + * pool guid -> toplevel vdev guid -> label txg + * + * Duplicate entries matching this same tuple will be discarded. Once we have + * examined every device, we pick the best label txg config for each toplevel + * vdev. We then arrange these toplevel vdevs into a complete pool config, and + * update any paths that have changed. Finally, we attempt to import the pool + * using our derived config, and record the results. + */ + +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <libintl.h> +#include <libgen.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/dktp/fdisk.h> +#include <sys/vdev_impl.h> +#include <sys/fs/zfs.h> +#include <sys/vdev_impl.h> + +#include <thread_pool.h> +#include <libzutil.h> +#include <libnvpair.h> + +#include "zutil_import.h" + +#ifdef HAVE_LIBUDEV +#include <libudev.h> +#include <sched.h> +#endif +#include <blkid/blkid.h> + +#define DEFAULT_IMPORT_PATH_SIZE 9 +#define DEV_BYID_PATH "/dev/disk/by-id/" + +static boolean_t +is_watchdog_dev(char *dev) +{ + /* For 'watchdog' dev */ + if (strcmp(dev, "watchdog") == 0) + return (B_TRUE); + + /* For 'watchdog<digit><whatever> */ + if (strstr(dev, "watchdog") == dev && isdigit(dev[8])) + return (B_TRUE); + + return (B_FALSE); +} + +void +zpool_open_func(void *arg) +{ + rdsk_node_t *rn = arg; + libpc_handle_t *hdl = rn->rn_hdl; + struct stat64 statbuf; + nvlist_t *config; + char *bname, *dupname; + uint64_t vdev_guid = 0; + int error; + int num_labels = 0; + int fd; + + /* + * Skip devices with well known prefixes there can be side effects + * when opening devices which need to be avoided. + * + * hpet - High Precision Event Timer + * watchdog - Watchdog must be closed in a special way. + */ + dupname = zutil_strdup(hdl, rn->rn_name); + bname = basename(dupname); + error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname)); + free(dupname); + if (error) + return; + + /* + * Ignore failed stats. We only want regular files and block devices. + */ + if (stat64(rn->rn_name, &statbuf) != 0 || + (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode))) + return; + + /* + * Preferentially open using O_DIRECT to bypass the block device + * cache which may be stale for multipath devices. An EINVAL errno + * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. + */ + fd = open(rn->rn_name, O_RDONLY | O_DIRECT); + if ((fd < 0) && (errno == EINVAL)) + fd = open(rn->rn_name, O_RDONLY); + if ((fd < 0) && (errno == EACCES)) + hdl->lpc_open_access_error = B_TRUE; + if (fd < 0) + return; + + /* + * This file is too small to hold a zpool + */ + if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) { + (void) close(fd); + return; + } + + error = zpool_read_label(fd, &config, &num_labels); + if (error != 0) { + (void) close(fd); + return; + } + + if (num_labels == 0) { + (void) close(fd); + nvlist_free(config); + return; + } + + /* + * Check that the vdev is for the expected guid. Additional entries + * are speculatively added based on the paths stored in the labels. + * Entries with valid paths but incorrect guids must be removed. + */ + error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); + if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { + (void) close(fd); + nvlist_free(config); + return; + } + + (void) close(fd); + + rn->rn_config = config; + rn->rn_num_labels = num_labels; + + /* + * Add additional entries for paths described by this label. + */ + if (rn->rn_labelpaths) { + char *path = NULL; + char *devid = NULL; + rdsk_node_t *slice; + avl_index_t where; + int error; + + if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) + return; + + /* + * Allow devlinks to stabilize so all paths are available. + */ + zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT); + + if (path != NULL) { + slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); + slice->rn_name = zutil_strdup(hdl, path); + slice->rn_vdev_guid = vdev_guid; + slice->rn_avl = rn->rn_avl; + slice->rn_hdl = hdl; + slice->rn_order = IMPORT_ORDER_PREFERRED_1; + slice->rn_labelpaths = B_FALSE; + pthread_mutex_lock(rn->rn_lock); + if (avl_find(rn->rn_avl, slice, &where)) { + pthread_mutex_unlock(rn->rn_lock); + free(slice->rn_name); + free(slice); + } else { + avl_insert(rn->rn_avl, slice, where); + pthread_mutex_unlock(rn->rn_lock); + zpool_open_func(slice); + } + } + + if (devid != NULL) { + slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); + error = asprintf(&slice->rn_name, "%s%s", + DEV_BYID_PATH, devid); + if (error == -1) { + free(slice); + return; + } + + slice->rn_vdev_guid = vdev_guid; + slice->rn_avl = rn->rn_avl; + slice->rn_hdl = hdl; + slice->rn_order = IMPORT_ORDER_PREFERRED_2; + slice->rn_labelpaths = B_FALSE; + pthread_mutex_lock(rn->rn_lock); + if (avl_find(rn->rn_avl, slice, &where)) { + pthread_mutex_unlock(rn->rn_lock); + free(slice->rn_name); + free(slice); + } else { + avl_insert(rn->rn_avl, slice, where); + pthread_mutex_unlock(rn->rn_lock); + zpool_open_func(slice); + } + } + } +} + +static char * +zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { + "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ + "/dev/mapper", /* Use multipath devices before components */ + "/dev/disk/by-partlabel", /* Single unique entry set by user */ + "/dev/disk/by-partuuid", /* Generated partition uuid */ + "/dev/disk/by-label", /* Custom persistent labels */ + "/dev/disk/by-uuid", /* Single unique entry and persistent */ + "/dev/disk/by-id", /* May be multiple entries and persistent */ + "/dev/disk/by-path", /* Encodes physical location and persistent */ + "/dev" /* UNSAFE device names will change */ +}; + +const char * const * +zpool_default_search_paths(size_t *count) +{ + *count = DEFAULT_IMPORT_PATH_SIZE; + return ((const char * const *)zpool_default_import_path); +} + +/* + * Given a full path to a device determine if that device appears in the + * import search path. If it does return the first match and store the + * index in the passed 'order' variable, otherwise return an error. + */ +static int +zfs_path_order(char *name, int *order) +{ + int i = 0, error = ENOENT; + char *dir, *env, *envdup; + + env = getenv("ZPOOL_IMPORT_PATH"); + if (env) { + envdup = strdup(env); + dir = strtok(envdup, ":"); + while (dir) { + if (strncmp(name, dir, strlen(dir)) == 0) { + *order = i; + error = 0; + break; + } + dir = strtok(NULL, ":"); + i++; + } + free(envdup); + } else { + for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) { + if (strncmp(name, zpool_default_import_path[i], + strlen(zpool_default_import_path[i])) == 0) { + *order = i; + error = 0; + break; + } + } + } + + return (error); +} + +/* + * Use libblkid to quickly enumerate all known zfs devices. + */ +int +zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, + avl_tree_t **slice_cache) +{ + rdsk_node_t *slice; + blkid_cache cache; + blkid_dev_iterate iter; + blkid_dev dev; + avl_index_t where; + int error; + + *slice_cache = NULL; + + error = blkid_get_cache(&cache, NULL); + if (error != 0) + return (error); + + error = blkid_probe_all_new(cache); + if (error != 0) { + blkid_put_cache(cache); + return (error); + } + + iter = blkid_dev_iterate_begin(cache); + if (iter == NULL) { + blkid_put_cache(cache); + return (EINVAL); + } + + error = blkid_dev_set_search(iter, "TYPE", "zfs_member"); + if (error != 0) { + blkid_dev_iterate_end(iter); + blkid_put_cache(cache); + return (error); + } + + *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t)); + avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), + offsetof(rdsk_node_t, rn_node)); + + while (blkid_dev_next(iter, &dev) == 0) { + slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); + slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev)); + slice->rn_vdev_guid = 0; + slice->rn_lock = lock; + slice->rn_avl = *slice_cache; + slice->rn_hdl = hdl; + slice->rn_labelpaths = B_TRUE; + + error = zfs_path_order(slice->rn_name, &slice->rn_order); + if (error == 0) + slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; + else + slice->rn_order = IMPORT_ORDER_DEFAULT; + + pthread_mutex_lock(lock); + if (avl_find(*slice_cache, slice, &where)) { + free(slice->rn_name); + free(slice); + } else { + avl_insert(*slice_cache, slice, where); + } + pthread_mutex_unlock(lock); + } + + blkid_dev_iterate_end(iter); + blkid_put_cache(cache); + + return (0); +} + +/* + * Linux persistent device strings for vdev labels + * + * based on libudev for consistency with libudev disk add/remove events + */ + +typedef struct vdev_dev_strs { + char vds_devid[128]; + char vds_devphys[128]; +} vdev_dev_strs_t; + +#ifdef HAVE_LIBUDEV + +/* + * Obtain the persistent device id string (describes what) + * + * used by ZED vdev matching for auto-{online,expand,replace} + */ +int +zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) +{ + struct udev_list_entry *entry; + const char *bus; + char devbyid[MAXPATHLEN]; + + /* The bus based by-id path is preferred */ + bus = udev_device_get_property_value(dev, "ID_BUS"); + + if (bus == NULL) { + const char *dm_uuid; + + /* + * For multipath nodes use the persistent uuid based identifier + * + * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f + */ + dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); + if (dm_uuid != NULL) { + (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); + return (0); + } + + /* + * For volumes use the persistent /dev/zvol/dataset identifier + */ + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + const char *name; + + name = udev_list_entry_get_name(entry); + if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { + (void) strlcpy(bufptr, name, buflen); + return (0); + } + entry = udev_list_entry_get_next(entry); + } + + /* + * NVME 'by-id' symlinks are similar to bus case + */ + struct udev_device *parent; + + parent = udev_device_get_parent_with_subsystem_devtype(dev, + "nvme", NULL); + if (parent != NULL) + bus = "nvme"; /* continue with bus symlink search */ + else + return (ENODATA); + } + + /* + * locate the bus specific by-id link + */ + (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + const char *name; + + name = udev_list_entry_get_name(entry); + if (strncmp(name, devbyid, strlen(devbyid)) == 0) { + name += strlen(DEV_BYID_PATH); + (void) strlcpy(bufptr, name, buflen); + return (0); + } + entry = udev_list_entry_get_next(entry); + } + + return (ENODATA); +} + +/* + * Obtain the persistent physical location string (describes where) + * + * used by ZED vdev matching for auto-{online,expand,replace} + */ +int +zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) +{ + const char *physpath = NULL; + struct udev_list_entry *entry; + + /* + * Normal disks use ID_PATH for their physical path. + */ + physpath = udev_device_get_property_value(dev, "ID_PATH"); + if (physpath != NULL && strlen(physpath) > 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + + /* + * Device mapper devices are virtual and don't have a physical + * path. For them we use ID_VDEV instead, which is setup via the + * /etc/vdev_id.conf file. ID_VDEV provides a persistent path + * to a virtual device. If you don't have vdev_id.conf setup, + * you cannot use multipath autoreplace with device mapper. + */ + physpath = udev_device_get_property_value(dev, "ID_VDEV"); + if (physpath != NULL && strlen(physpath) > 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + + /* + * For ZFS volumes use the persistent /dev/zvol/dataset identifier + */ + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + physpath = udev_list_entry_get_name(entry); + if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + entry = udev_list_entry_get_next(entry); + } + + /* + * For all other devices fallback to using the by-uuid name. + */ + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + physpath = udev_list_entry_get_name(entry); + if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + entry = udev_list_entry_get_next(entry); + } + + return (ENODATA); +} + +/* + * A disk is considered a multipath whole disk when: + * DEVNAME key value has "dm-" + * DM_NAME key value has "mpath" prefix + * DM_UUID key exists + * ID_PART_TABLE_TYPE key does not exist or is not gpt + */ +static boolean_t +udev_mpath_whole_disk(struct udev_device *dev) +{ + const char *devname, *type, *uuid; + + devname = udev_device_get_property_value(dev, "DEVNAME"); + type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); + uuid = udev_device_get_property_value(dev, "DM_UUID"); + + if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && + ((type == NULL) || (strcmp(type, "gpt") != 0)) && + (uuid != NULL)) { + return (B_TRUE); + } + + return (B_FALSE); +} + +static int +udev_device_is_ready(struct udev_device *dev) +{ +#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED + return (udev_device_get_is_initialized(dev)); +#else + /* wait for DEVLINKS property to be initialized */ + return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); +#endif +} + +#else + +/* ARGSUSED */ +int +zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) +{ + return (ENODATA); +} + +/* ARGSUSED */ +int +zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) +{ + return (ENODATA); +} + +#endif /* HAVE_LIBUDEV */ + +/* + * Wait up to timeout_ms for udev to set up the device node. The device is + * considered ready when libudev determines it has been initialized, all of + * the device links have been verified to exist, and it has been allowed to + * settle. At this point the device the device can be accessed reliably. + * Depending on the complexity of the udev rules this process could take + * several seconds. + */ +int +zpool_label_disk_wait(const char *path, int timeout_ms) +{ +#ifdef HAVE_LIBUDEV + struct udev *udev; + struct udev_device *dev = NULL; + char nodepath[MAXPATHLEN]; + char *sysname = NULL; + int ret = ENODEV; + int settle_ms = 50; + long sleep_ms = 10; + hrtime_t start, settle; + + if ((udev = udev_new()) == NULL) + return (ENXIO); + + start = gethrtime(); + settle = 0; + + do { + if (sysname == NULL) { + if (realpath(path, nodepath) != NULL) { + sysname = strrchr(nodepath, '/') + 1; + } else { + (void) usleep(sleep_ms * MILLISEC); + continue; + } + } + + dev = udev_device_new_from_subsystem_sysname(udev, + "block", sysname); + if ((dev != NULL) && udev_device_is_ready(dev)) { + struct udev_list_entry *links, *link = NULL; + + ret = 0; + links = udev_device_get_devlinks_list_entry(dev); + + udev_list_entry_foreach(link, links) { + struct stat64 statbuf; + const char *name; + + name = udev_list_entry_get_name(link); + errno = 0; + if (stat64(name, &statbuf) == 0 && errno == 0) + continue; + + settle = 0; + ret = ENODEV; + break; + } + + if (ret == 0) { + if (settle == 0) { + settle = gethrtime(); + } else if (NSEC2MSEC(gethrtime() - settle) >= + settle_ms) { + udev_device_unref(dev); + break; + } + } + } + + udev_device_unref(dev); + (void) usleep(sleep_ms * MILLISEC); + + } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); + + udev_unref(udev); + + return (ret); +#else + int settle_ms = 50; + long sleep_ms = 10; + hrtime_t start, settle; + struct stat64 statbuf; + + start = gethrtime(); + settle = 0; + + do { + errno = 0; + if ((stat64(path, &statbuf) == 0) && (errno == 0)) { + if (settle == 0) + settle = gethrtime(); + else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) + return (0); + } else if (errno != ENOENT) { + return (errno); + } + + usleep(sleep_ms * MILLISEC); + } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); + + return (ENODEV); +#endif /* HAVE_LIBUDEV */ +} + +/* + * Encode the persistent devices strings + * used for the vdev disk label + */ +static int +encode_device_strings(const char *path, vdev_dev_strs_t *ds, + boolean_t wholedisk) +{ +#ifdef HAVE_LIBUDEV + struct udev *udev; + struct udev_device *dev = NULL; + char nodepath[MAXPATHLEN]; + char *sysname; + int ret = ENODEV; + hrtime_t start; + + if ((udev = udev_new()) == NULL) + return (ENXIO); + + /* resolve path to a runtime device node instance */ + if (realpath(path, nodepath) == NULL) + goto no_dev; + + sysname = strrchr(nodepath, '/') + 1; + + /* + * Wait up to 3 seconds for udev to set up the device node context + */ + start = gethrtime(); + do { + dev = udev_device_new_from_subsystem_sysname(udev, "block", + sysname); + if (dev == NULL) + goto no_dev; + if (udev_device_is_ready(dev)) + break; /* udev ready */ + + udev_device_unref(dev); + dev = NULL; + + if (NSEC2MSEC(gethrtime() - start) < 10) + (void) sched_yield(); /* yield/busy wait up to 10ms */ + else + (void) usleep(10 * MILLISEC); + + } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); + + if (dev == NULL) + goto no_dev; + + /* + * Only whole disks require extra device strings + */ + if (!wholedisk && !udev_mpath_whole_disk(dev)) + goto no_dev; + + ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); + if (ret != 0) + goto no_dev_ref; + + /* physical location string (optional) */ + if (zfs_device_get_physical(dev, ds->vds_devphys, + sizeof (ds->vds_devphys)) != 0) { + ds->vds_devphys[0] = '\0'; /* empty string --> not available */ + } + +no_dev_ref: + udev_device_unref(dev); +no_dev: + udev_unref(udev); + + return (ret); +#else + return (ENOENT); +#endif +} + +/* + * Update a leaf vdev's persistent device strings + * + * - only applies for a dedicated leaf vdev (aka whole disk) + * - updated during pool create|add|attach|import + * - used for matching device matching during auto-{online,expand,replace} + * - stored in a leaf disk config label (i.e. alongside 'path' NVP) + * - these strings are currently not used in kernel (i.e. for vdev_disk_open) + * + * single device node example: + * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' + * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' + * + * multipath device node example: + * devid: 'dm-uuid-mpath-35000c5006304de3f' + * + * We also store the enclosure sysfs path for turning on enclosure LEDs + * (if applicable): + * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' + */ +void +update_vdev_config_dev_strs(nvlist_t *nv) +{ + vdev_dev_strs_t vds; + char *env, *type, *path; + uint64_t wholedisk = 0; + char *upath, *spath; + + /* + * For the benefit of legacy ZFS implementations, allow + * for opting out of devid strings in the vdev label. + * + * example use: + * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer + * + * explanation: + * Older ZFS on Linux implementations had issues when attempting to + * display pool config VDEV names if a "devid" NVP value is present + * in the pool's config. + * + * For example, a pool that originated on illumos platform would + * have a devid value in the config and "zpool status" would fail + * when listing the config. + * + * A pool can be stripped of any "devid" values on import or + * prevented from adding them on zpool create|add by setting + * ZFS_VDEV_DEVID_OPT_OUT. + */ + env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); + if (env && (strtoul(env, NULL, 0) > 0 || + !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); + return; + } + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || + strcmp(type, VDEV_TYPE_DISK) != 0) { + return; + } + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) + return; + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); + + /* + * Update device string values in the config nvlist. + */ + if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { + (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); + if (vds.vds_devphys[0] != '\0') { + (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, + vds.vds_devphys); + } + + /* Add enclosure sysfs path (if disk is in an enclosure). */ + upath = zfs_get_underlying_path(path); + spath = zfs_get_enclosure_sysfs_path(upath); + if (spath) + nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + spath); + else + nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + + free(upath); + free(spath); + } else { + /* Clear out any stale entries. */ + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + } +} diff --git a/lib/libzutil/zutil_device_path.c b/lib/libzutil/zutil_device_path.c index 1dc0d4d1d..27ca80e50 100644 --- a/lib/libzutil/zutil_device_path.c +++ b/lib/libzutil/zutil_device_path.c @@ -23,54 +23,13 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#include <ctype.h> #include <errno.h> -#include <dirent.h> -#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <sys/efi_partition.h> #include <libzutil.h> -#ifdef HAVE_LIBUDEV -#include <libudev.h> -#endif - -/* - * Append partition suffix to an otherwise fully qualified device path. - * This is used to generate the name the full path as its stored in - * ZPOOL_CONFIG_PATH for whole disk devices. On success the new length - * of 'path' will be returned on error a negative value is returned. - */ -int -zfs_append_partition(char *path, size_t max_len) -{ - int len = strlen(path); - - if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) || - (strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) { - if (len + 6 >= max_len) - return (-1); - - (void) strcat(path, "-part1"); - len += 6; - } else { - if (len + 2 >= max_len) - return (-1); - - if (isdigit(path[len-1])) { - (void) strcat(path, "p1"); - len += 2; - } else { - (void) strcat(path, "1"); - len += 1; - } - } - - return (len); -} /* * Given a shorthand device name check if a file by that name exists in any @@ -213,413 +172,3 @@ zfs_strcmp_pathname(const char *name, const char *cmp, int wholedisk) return (0); } - -/* - * Allocate and return the underlying device name for a device mapper device. - * If a device mapper device maps to multiple devices, return the first device. - * - * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a - * DM device (like /dev/disk/by-vdev/A0) are also allowed. - * - * Returns device name, or NULL on error or no match. If dm_name is not a DM - * device then return NULL. - * - * NOTE: The returned name string must be *freed*. - */ -static char * -dm_get_underlying_path(const char *dm_name) -{ - DIR *dp = NULL; - struct dirent *ep; - char *realp; - char *tmp = NULL; - char *path = NULL; - char *dev_str; - int size; - - if (dm_name == NULL) - return (NULL); - - /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */ - realp = realpath(dm_name, NULL); - if (realp == NULL) - return (NULL); - - /* - * If they preface 'dev' with a path (like "/dev") then strip it off. - * We just want the 'dm-N' part. - */ - tmp = strrchr(realp, '/'); - if (tmp != NULL) - dev_str = tmp + 1; /* +1 since we want the chr after '/' */ - else - dev_str = tmp; - - size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str); - if (size == -1 || !tmp) - goto end; - - dp = opendir(tmp); - if (dp == NULL) - goto end; - - /* Return first sd* entry in /sys/block/dm-N/slaves/ */ - while ((ep = readdir(dp))) { - if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */ - size = asprintf(&path, "/dev/%s", ep->d_name); - break; - } - } - -end: - if (dp != NULL) - closedir(dp); - free(tmp); - free(realp); - return (path); -} - -/* - * Return 1 if device is a device mapper or multipath device. - * Return 0 if not. - */ -int -zfs_dev_is_dm(const char *dev_name) -{ - - char *tmp; - tmp = dm_get_underlying_path(dev_name); - if (tmp == NULL) - return (0); - - free(tmp); - return (1); -} - -/* - * By "whole disk" we mean an entire physical disk (something we can - * label, toggle the write cache on, etc.) as opposed to the full - * capacity of a pseudo-device such as lofi or did. We act as if we - * are labeling the disk, which should be a pretty good test of whether - * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if - * it isn't. - */ -int -zfs_dev_is_whole_disk(const char *dev_name) -{ - struct dk_gpt *label; - int fd; - - if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0) - return (0); - - if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { - (void) close(fd); - return (0); - } - - efi_free(label); - (void) close(fd); - - return (1); -} - -/* - * Lookup the underlying device for a device name - * - * Often you'll have a symlink to a device, a partition device, - * or a multipath device, and want to look up the underlying device. - * This function returns the underlying device name. If the device - * name is already the underlying device, then just return the same - * name. If the device is a DM device with multiple underlying devices - * then return the first one. - * - * For example: - * - * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda - * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 - * returns: /dev/sda - * - * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb) - * dev_name: /dev/mapper/mpatha - * returns: /dev/sda (first device) - * - * 3. /dev/sda (already the underlying device) - * dev_name: /dev/sda - * returns: /dev/sda - * - * 4. /dev/dm-3 (mapped to /dev/sda) - * dev_name: /dev/dm-3 - * returns: /dev/sda - * - * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9 - * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 - * returns: /dev/sdb - * - * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2 - * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a - * returns: /dev/sda - * - * Returns underlying device name, or NULL on error or no match. - * - * NOTE: The returned name string must be *freed*. - */ -char * -zfs_get_underlying_path(const char *dev_name) -{ - char *name = NULL; - char *tmp; - - if (dev_name == NULL) - return (NULL); - - tmp = dm_get_underlying_path(dev_name); - - /* dev_name not a DM device, so just un-symlinkize it */ - if (tmp == NULL) - tmp = realpath(dev_name, NULL); - - if (tmp != NULL) { - name = zfs_strip_partition_path(tmp); - free(tmp); - } - - return (name); -} - -/* - * Given a dev name like "sda", return the full enclosure sysfs path to - * the disk. You can also pass in the name with "/dev" prepended - * to it (like /dev/sda). - * - * For example, disk "sda" in enclosure slot 1: - * dev: "sda" - * returns: "/sys/class/enclosure/1:0:3:0/Slot 1" - * - * 'dev' must be a non-devicemapper device. - * - * Returned string must be freed. - */ -char * -zfs_get_enclosure_sysfs_path(const char *dev_name) -{ - DIR *dp = NULL; - struct dirent *ep; - char buf[MAXPATHLEN]; - char *tmp1 = NULL; - char *tmp2 = NULL; - char *tmp3 = NULL; - char *path = NULL; - size_t size; - int tmpsize; - - if (dev_name == NULL) - return (NULL); - - /* If they preface 'dev' with a path (like "/dev") then strip it off */ - tmp1 = strrchr(dev_name, '/'); - if (tmp1 != NULL) - dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */ - - tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name); - if (tmpsize == -1 || tmp1 == NULL) { - tmp1 = NULL; - goto end; - } - - dp = opendir(tmp1); - if (dp == NULL) { - tmp1 = NULL; /* To make free() at the end a NOP */ - goto end; - } - - /* - * Look though all sysfs entries in /sys/block/<dev>/device for - * the enclosure symlink. - */ - while ((ep = readdir(dp))) { - /* Ignore everything that's not our enclosure_device link */ - if (strstr(ep->d_name, "enclosure_device") == NULL) - continue; - - if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 || - tmp2 == NULL) - break; - - size = readlink(tmp2, buf, sizeof (buf)); - - /* Did readlink fail or crop the link name? */ - if (size == -1 || size >= sizeof (buf)) { - free(tmp2); - tmp2 = NULL; /* To make free() at the end a NOP */ - break; - } - - /* - * We got a valid link. readlink() doesn't terminate strings - * so we have to do it. - */ - buf[size] = '\0'; - - /* - * Our link will look like: - * - * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1" - * - * We want to grab the "enclosure/1:0:3:0/SLOT 1" part - */ - tmp3 = strstr(buf, "enclosure"); - if (tmp3 == NULL) - break; - - if (asprintf(&path, "/sys/class/%s", tmp3) == -1) { - /* If asprintf() fails, 'path' is undefined */ - path = NULL; - break; - } - - if (path == NULL) - break; - } - -end: - free(tmp2); - free(tmp1); - - if (dp != NULL) - closedir(dp); - - return (path); -} - -/* - * Remove partition suffix from a vdev path. Partition suffixes may take three - * forms: "-partX", "pX", or "X", where X is a string of digits. The second - * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The - * third case only occurs when preceded by a string matching the regular - * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk. - * - * caller must free the returned string - */ -char * -zfs_strip_partition(char *path) -{ - char *tmp = strdup(path); - char *part = NULL, *d = NULL; - if (!tmp) - return (NULL); - - if ((part = strstr(tmp, "-part")) && part != tmp) { - d = part + 5; - } else if ((part = strrchr(tmp, 'p')) && - part > tmp + 1 && isdigit(*(part-1))) { - d = part + 1; - } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') && - tmp[1] == 'd') { - for (d = &tmp[2]; isalpha(*d); part = ++d) { } - } else if (strncmp("xvd", tmp, 3) == 0) { - for (d = &tmp[3]; isalpha(*d); part = ++d) { } - } - if (part && d && *d != '\0') { - for (; isdigit(*d); d++) { } - if (*d == '\0') - *part = '\0'; - } - - return (tmp); -} - -/* - * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname - * - * path: /dev/sda1 - * returns: /dev/sda - * - * Returned string must be freed. - */ -char * -zfs_strip_partition_path(char *path) -{ - char *newpath = strdup(path); - char *sd_offset; - char *new_sd; - - if (!newpath) - return (NULL); - - /* Point to "sda1" part of "/dev/sda1" */ - sd_offset = strrchr(newpath, '/') + 1; - - /* Get our new name "sda" */ - new_sd = zfs_strip_partition(sd_offset); - if (!new_sd) { - free(newpath); - return (NULL); - } - - /* Paste the "sda" where "sda1" was */ - strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1); - - /* Free temporary "sda" */ - free(new_sd); - - return (newpath); -} - -#ifdef HAVE_LIBUDEV -/* - * A disk is considered a multipath whole disk when: - * DEVNAME key value has "dm-" - * DM_NAME key value has "mpath" prefix - * DM_UUID key exists - * ID_PART_TABLE_TYPE key does not exist or is not gpt - */ -static boolean_t -udev_mpath_whole_disk(struct udev_device *dev) -{ - const char *devname, *type, *uuid; - - devname = udev_device_get_property_value(dev, "DEVNAME"); - type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); - uuid = udev_device_get_property_value(dev, "DM_UUID"); - - if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && - ((type == NULL) || (strcmp(type, "gpt") != 0)) && - (uuid != NULL)) { - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * Check if a disk is effectively a multipath whole disk - */ -boolean_t -is_mpath_whole_disk(const char *path) -{ - struct udev *udev; - struct udev_device *dev = NULL; - char nodepath[MAXPATHLEN]; - char *sysname; - boolean_t wholedisk = B_FALSE; - - if (realpath(path, nodepath) == NULL) - return (B_FALSE); - sysname = strrchr(nodepath, '/') + 1; - if (strncmp(sysname, "dm-", 3) != 0) - return (B_FALSE); - if ((udev = udev_new()) == NULL) - return (B_FALSE); - if ((dev = udev_device_new_from_subsystem_sysname(udev, "block", - sysname)) == NULL) { - udev_device_unref(dev); - return (B_FALSE); - } - - wholedisk = udev_mpath_whole_disk(dev); - - udev_device_unref(dev); - return (wholedisk); -} -#endif diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index bb7db259c..e85ce4594 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -51,10 +51,6 @@ #include <errno.h> #include <libintl.h> #include <libgen.h> -#ifdef HAVE_LIBUDEV -#include <libudev.h> -#include <sched.h> -#endif #include <stddef.h> #include <stdlib.h> #include <string.h> @@ -66,34 +62,15 @@ #include <sys/fs/zfs.h> #include <sys/vdev_impl.h> -#include <blkid/blkid.h> #include <thread_pool.h> #include <libzutil.h> #include <libnvpair.h> -#define IMPORT_ORDER_PREFERRED_1 1 -#define IMPORT_ORDER_PREFERRED_2 2 -#define IMPORT_ORDER_SCAN_OFFSET 10 -#define IMPORT_ORDER_DEFAULT 100 -#define DEFAULT_IMPORT_PATH_SIZE 9 - -#define EZFS_BADCACHE "invalid or missing cache file" -#define EZFS_BADPATH "must be an absolute path" -#define EZFS_NOMEM "out of memory" -#define EZFS_EACESS "some devices require root privileges" - -typedef struct libpc_handle { - boolean_t lpc_printerr; - boolean_t lpc_open_access_error; - boolean_t lpc_desc_active; - char lpc_desc[1024]; - const pool_config_ops_t *lpc_ops; - void *lpc_lib_handle; -} libpc_handle_t; +#include "zutil_import.h" /*PRINTFLIKE2*/ static void -zfs_error_aux(libpc_handle_t *hdl, const char *fmt, ...) +zutil_error_aux(libpc_handle_t *hdl, const char *fmt, ...) { va_list ap; @@ -106,7 +83,8 @@ zfs_error_aux(libpc_handle_t *hdl, const char *fmt, ...) } static void -zfs_verror(libpc_handle_t *hdl, const char *error, const char *fmt, va_list ap) +zutil_verror(libpc_handle_t *hdl, const char *error, const char *fmt, + va_list ap) { char action[1024]; @@ -127,13 +105,13 @@ zfs_verror(libpc_handle_t *hdl, const char *error, const char *fmt, va_list ap) /*PRINTFLIKE3*/ static int -zfs_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...) +zutil_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - zfs_verror(hdl, error, fmt, ap); + zutil_verror(hdl, error, fmt, ap); va_end(ap); @@ -141,36 +119,36 @@ zfs_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...) } static int -zfs_error(libpc_handle_t *hdl, const char *error, const char *msg) +zutil_error(libpc_handle_t *hdl, const char *error, const char *msg) { - return (zfs_error_fmt(hdl, error, "%s", msg)); + return (zutil_error_fmt(hdl, error, "%s", msg)); } static int -no_memory(libpc_handle_t *hdl) +zutil_no_memory(libpc_handle_t *hdl) { - zfs_error(hdl, EZFS_NOMEM, "internal error"); + zutil_error(hdl, EZFS_NOMEM, "internal error"); exit(1); } -static void * -zfs_alloc(libpc_handle_t *hdl, size_t size) +void * +zutil_alloc(libpc_handle_t *hdl, size_t size) { void *data; if ((data = calloc(1, size)) == NULL) - (void) no_memory(hdl); + (void) zutil_no_memory(hdl); return (data); } -static char * -zfs_strdup(libpc_handle_t *hdl, const char *str) +char * +zutil_strdup(libpc_handle_t *hdl, const char *str) { char *ret; if ((ret = strdup(str)) == NULL) - (void) no_memory(hdl); + (void) zutil_no_memory(hdl); return (ret); } @@ -209,472 +187,6 @@ typedef struct pool_list { name_entry_t *names; } pool_list_t; -#define ZVOL_ROOT "/dev/zvol" -#define DEV_BYID_PATH "/dev/disk/by-id/" - -/* - * Linux persistent device strings for vdev labels - * - * based on libudev for consistency with libudev disk add/remove events - */ - -typedef struct vdev_dev_strs { - char vds_devid[128]; - char vds_devphys[128]; -} vdev_dev_strs_t; - -#ifdef HAVE_LIBUDEV -/* - * Obtain the persistent device id string (describes what) - * - * used by ZED vdev matching for auto-{online,expand,replace} - */ -int -zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) -{ - struct udev_list_entry *entry; - const char *bus; - char devbyid[MAXPATHLEN]; - - /* The bus based by-id path is preferred */ - bus = udev_device_get_property_value(dev, "ID_BUS"); - - if (bus == NULL) { - const char *dm_uuid; - - /* - * For multipath nodes use the persistent uuid based identifier - * - * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f - */ - dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); - if (dm_uuid != NULL) { - (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); - return (0); - } - - /* - * For volumes use the persistent /dev/zvol/dataset identifier - */ - entry = udev_device_get_devlinks_list_entry(dev); - while (entry != NULL) { - const char *name; - - name = udev_list_entry_get_name(entry); - if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { - (void) strlcpy(bufptr, name, buflen); - return (0); - } - entry = udev_list_entry_get_next(entry); - } - - /* - * NVME 'by-id' symlinks are similar to bus case - */ - struct udev_device *parent; - - parent = udev_device_get_parent_with_subsystem_devtype(dev, - "nvme", NULL); - if (parent != NULL) - bus = "nvme"; /* continue with bus symlink search */ - else - return (ENODATA); - } - - /* - * locate the bus specific by-id link - */ - (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); - entry = udev_device_get_devlinks_list_entry(dev); - while (entry != NULL) { - const char *name; - - name = udev_list_entry_get_name(entry); - if (strncmp(name, devbyid, strlen(devbyid)) == 0) { - name += strlen(DEV_BYID_PATH); - (void) strlcpy(bufptr, name, buflen); - return (0); - } - entry = udev_list_entry_get_next(entry); - } - - return (ENODATA); -} - -/* - * Obtain the persistent physical location string (describes where) - * - * used by ZED vdev matching for auto-{online,expand,replace} - */ -int -zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) -{ - const char *physpath = NULL; - struct udev_list_entry *entry; - - /* - * Normal disks use ID_PATH for their physical path. - */ - physpath = udev_device_get_property_value(dev, "ID_PATH"); - if (physpath != NULL && strlen(physpath) > 0) { - (void) strlcpy(bufptr, physpath, buflen); - return (0); - } - - /* - * Device mapper devices are virtual and don't have a physical - * path. For them we use ID_VDEV instead, which is setup via the - * /etc/vdev_id.conf file. ID_VDEV provides a persistent path - * to a virtual device. If you don't have vdev_id.conf setup, - * you cannot use multipath autoreplace with device mapper. - */ - physpath = udev_device_get_property_value(dev, "ID_VDEV"); - if (physpath != NULL && strlen(physpath) > 0) { - (void) strlcpy(bufptr, physpath, buflen); - return (0); - } - - /* - * For ZFS volumes use the persistent /dev/zvol/dataset identifier - */ - entry = udev_device_get_devlinks_list_entry(dev); - while (entry != NULL) { - physpath = udev_list_entry_get_name(entry); - if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { - (void) strlcpy(bufptr, physpath, buflen); - return (0); - } - entry = udev_list_entry_get_next(entry); - } - - /* - * For all other devices fallback to using the by-uuid name. - */ - entry = udev_device_get_devlinks_list_entry(dev); - while (entry != NULL) { - physpath = udev_list_entry_get_name(entry); - if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { - (void) strlcpy(bufptr, physpath, buflen); - return (0); - } - entry = udev_list_entry_get_next(entry); - } - - return (ENODATA); -} - -/* - * A disk is considered a multipath whole disk when: - * DEVNAME key value has "dm-" - * DM_NAME key value has "mpath" prefix - * DM_UUID key exists - * ID_PART_TABLE_TYPE key does not exist or is not gpt - */ -static boolean_t -udev_mpath_whole_disk(struct udev_device *dev) -{ - const char *devname, *type, *uuid; - - devname = udev_device_get_property_value(dev, "DEVNAME"); - type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); - uuid = udev_device_get_property_value(dev, "DM_UUID"); - - if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && - ((type == NULL) || (strcmp(type, "gpt") != 0)) && - (uuid != NULL)) { - return (B_TRUE); - } - - return (B_FALSE); -} - -static int -udev_device_is_ready(struct udev_device *dev) -{ -#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED - return (udev_device_get_is_initialized(dev)); -#else - /* wait for DEVLINKS property to be initialized */ - return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); -#endif -} -#endif /* HAVE_LIBUDEV */ - -/* - * Wait up to timeout_ms for udev to set up the device node. The device is - * considered ready when libudev determines it has been initialized, all of - * the device links have been verified to exist, and it has been allowed to - * settle. At this point the device the device can be accessed reliably. - * Depending on the complexity of the udev rules this process could take - * several seconds. - */ -int -zpool_label_disk_wait(const char *path, int timeout_ms) -{ -#ifdef HAVE_LIBUDEV - struct udev *udev; - struct udev_device *dev = NULL; - char nodepath[MAXPATHLEN]; - char *sysname = NULL; - int ret = ENODEV; - int settle_ms = 50; - long sleep_ms = 10; - hrtime_t start, settle; - - if ((udev = udev_new()) == NULL) - return (ENXIO); - - start = gethrtime(); - settle = 0; - - do { - if (sysname == NULL) { - if (realpath(path, nodepath) != NULL) { - sysname = strrchr(nodepath, '/') + 1; - } else { - (void) usleep(sleep_ms * MILLISEC); - continue; - } - } - - dev = udev_device_new_from_subsystem_sysname(udev, - "block", sysname); - if ((dev != NULL) && udev_device_is_ready(dev)) { - struct udev_list_entry *links, *link = NULL; - - ret = 0; - links = udev_device_get_devlinks_list_entry(dev); - - udev_list_entry_foreach(link, links) { - struct stat64 statbuf; - const char *name; - - name = udev_list_entry_get_name(link); - errno = 0; - if (stat64(name, &statbuf) == 0 && errno == 0) - continue; - - settle = 0; - ret = ENODEV; - break; - } - - if (ret == 0) { - if (settle == 0) { - settle = gethrtime(); - } else if (NSEC2MSEC(gethrtime() - settle) >= - settle_ms) { - udev_device_unref(dev); - break; - } - } - } - - udev_device_unref(dev); - (void) usleep(sleep_ms * MILLISEC); - - } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); - - udev_unref(udev); - - return (ret); -#else - int settle_ms = 50; - long sleep_ms = 10; - hrtime_t start, settle; - struct stat64 statbuf; - - start = gethrtime(); - settle = 0; - - do { - errno = 0; - if ((stat64(path, &statbuf) == 0) && (errno == 0)) { - if (settle == 0) - settle = gethrtime(); - else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) - return (0); - } else if (errno != ENOENT) { - return (errno); - } - - usleep(sleep_ms * MILLISEC); - } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); - - return (ENODEV); -#endif /* HAVE_LIBUDEV */ -} - -/* - * Encode the persistent devices strings - * used for the vdev disk label - */ -static int -encode_device_strings(const char *path, vdev_dev_strs_t *ds, - boolean_t wholedisk) -{ -#ifdef HAVE_LIBUDEV - struct udev *udev; - struct udev_device *dev = NULL; - char nodepath[MAXPATHLEN]; - char *sysname; - int ret = ENODEV; - hrtime_t start; - - if ((udev = udev_new()) == NULL) - return (ENXIO); - - /* resolve path to a runtime device node instance */ - if (realpath(path, nodepath) == NULL) - goto no_dev; - - sysname = strrchr(nodepath, '/') + 1; - - /* - * Wait up to 3 seconds for udev to set up the device node context - */ - start = gethrtime(); - do { - dev = udev_device_new_from_subsystem_sysname(udev, "block", - sysname); - if (dev == NULL) - goto no_dev; - if (udev_device_is_ready(dev)) - break; /* udev ready */ - - udev_device_unref(dev); - dev = NULL; - - if (NSEC2MSEC(gethrtime() - start) < 10) - (void) sched_yield(); /* yield/busy wait up to 10ms */ - else - (void) usleep(10 * MILLISEC); - - } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); - - if (dev == NULL) - goto no_dev; - - /* - * Only whole disks require extra device strings - */ - if (!wholedisk && !udev_mpath_whole_disk(dev)) - goto no_dev; - - ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); - if (ret != 0) - goto no_dev_ref; - - /* physical location string (optional) */ - if (zfs_device_get_physical(dev, ds->vds_devphys, - sizeof (ds->vds_devphys)) != 0) { - ds->vds_devphys[0] = '\0'; /* empty string --> not available */ - } - -no_dev_ref: - udev_device_unref(dev); -no_dev: - udev_unref(udev); - - return (ret); -#else - return (ENOENT); -#endif -} - -/* - * Update a leaf vdev's persistent device strings (Linux only) - * - * - only applies for a dedicated leaf vdev (aka whole disk) - * - updated during pool create|add|attach|import - * - used for matching device matching during auto-{online,expand,replace} - * - stored in a leaf disk config label (i.e. alongside 'path' NVP) - * - these strings are currently not used in kernel (i.e. for vdev_disk_open) - * - * single device node example: - * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' - * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' - * - * multipath device node example: - * devid: 'dm-uuid-mpath-35000c5006304de3f' - * - * We also store the enclosure sysfs path for turning on enclosure LEDs - * (if applicable): - * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' - */ -void -update_vdev_config_dev_strs(nvlist_t *nv) -{ - vdev_dev_strs_t vds; - char *env, *type, *path; - uint64_t wholedisk = 0; - char *upath, *spath; - - /* - * For the benefit of legacy ZFS implementations, allow - * for opting out of devid strings in the vdev label. - * - * example use: - * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer - * - * explanation: - * Older ZFS on Linux implementations had issues when attempting to - * display pool config VDEV names if a "devid" NVP value is present - * in the pool's config. - * - * For example, a pool that originated on illumos platform would - * have a devid value in the config and "zpool status" would fail - * when listing the config. - * - * A pool can be stripped of any "devid" values on import or - * prevented from adding them on zpool create|add by setting - * ZFS_VDEV_DEVID_OPT_OUT. - */ - env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); - if (env && (strtoul(env, NULL, 0) > 0 || - !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { - (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); - (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); - return; - } - - if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || - strcmp(type, VDEV_TYPE_DISK) != 0) { - return; - } - if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) - return; - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); - - /* - * Update device string values in config nvlist - */ - if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { - (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); - if (vds.vds_devphys[0] != '\0') { - (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, - vds.vds_devphys); - } - - /* Add enclosure sysfs path (if disk is in an enclosure) */ - upath = zfs_get_underlying_path(path); - spath = zfs_get_enclosure_sysfs_path(upath); - if (spath) - nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, - spath); - else - nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); - - free(upath); - free(spath); - } else { - /* clear out any stale entries */ - (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); - (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); - (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); - } -} - /* * Go through and fix up any path and/or devid information for the given vdev * configuration. @@ -752,7 +264,6 @@ fix_paths(libpc_handle_t *hdl, nvlist_t *nv, name_entry_t *names) if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) return (-1); - /* Linux only - update ZPOOL_CONFIG_DEVID and ZPOOL_CONFIG_PHYS_PATH */ update_vdev_config_dev_strs(nv); return (0); @@ -780,10 +291,10 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, &state) == 0 && (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { - if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) + if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL) return (-1); - if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { + if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) { free(ne); return (-1); } @@ -825,7 +336,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, } if (pe == NULL) { - if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { + if ((pe = zutil_alloc(hdl, sizeof (pool_entry_t))) == NULL) { return (-1); } pe->pe_guid = pool_guid; @@ -843,7 +354,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, } if (ve == NULL) { - if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { + if ((ve = zutil_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { return (-1); } ve->ve_guid = top_guid; @@ -862,7 +373,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, } if (ce == NULL) { - if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { + if ((ce = zutil_alloc(hdl, sizeof (config_entry_t))) == NULL) { return (-1); } ce->ce_txg = txg; @@ -877,10 +388,10 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, * mappings so that we can fix up the configuration as necessary before * doing the import. */ - if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) + if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL) return (-1); - if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { + if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) { free(ne); return (-1); } @@ -895,7 +406,7 @@ add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path, } static int -pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid, +zutil_pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid, boolean_t *isactive) { ASSERT(hdl->lpc_ops->pco_pool_active != NULL); @@ -907,7 +418,7 @@ pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid, } static nvlist_t * -refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig) +zutil_refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig) { ASSERT(hdl->lpc_ops->pco_refresh_config != NULL); @@ -1095,7 +606,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, if (id >= children) { nvlist_t **newchild; - newchild = zfs_alloc(hdl, (id + 1) * + newchild = zutil_alloc(hdl, (id + 1) * sizeof (nvlist_t *)); if (newchild == NULL) goto nomem; @@ -1127,7 +638,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, } else if (max_id > children) { nvlist_t **newchild; - newchild = zfs_alloc(hdl, (max_id) * + newchild = zutil_alloc(hdl, (max_id) * sizeof (nvlist_t *)); if (newchild == NULL) goto nomem; @@ -1265,7 +776,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); - if (pool_active(hdl, name, guid, &isactive) != 0) + if (zutil_pool_active(hdl, name, guid, &isactive) != 0) goto error; if (isactive) { @@ -1280,7 +791,7 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, goto nomem; } - if ((nvl = refresh_config(hdl, config)) == NULL) { + if ((nvl = zutil_refresh_config(hdl, config)) == NULL) { nvlist_free(config); config = NULL; continue; @@ -1345,7 +856,7 @@ add_pool: return (ret); nomem: - (void) no_memory(hdl); + (void) zutil_no_memory(hdl); error: nvlist_free(config); nvlist_free(ret); @@ -1443,19 +954,6 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels) return (0); } -typedef struct rdsk_node { - char *rn_name; /* Full path to device */ - int rn_order; /* Preferred order (low to high) */ - int rn_num_labels; /* Number of valid labels */ - uint64_t rn_vdev_guid; /* Expected vdev guid when set */ - libpc_handle_t *rn_hdl; - nvlist_t *rn_config; /* Label config */ - avl_tree_t *rn_avl; - avl_node_t rn_node; - pthread_mutex_t *rn_lock; - boolean_t rn_labelpaths; -} rdsk_node_t; - /* * Sorted by full path and then vdev guid to allow for multiple entries with * the same full path name. This is required because it's possible to @@ -1465,7 +963,7 @@ typedef struct rdsk_node { * include overwritten pool labels, devices which are visible from multiple * hosts and multipath devices. */ -static int +int slice_cache_compare(const void *arg1, const void *arg2) { const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; @@ -1481,20 +979,6 @@ slice_cache_compare(const void *arg1, const void *arg2) return (AVL_CMP(guid1, guid2)); } -static boolean_t -is_watchdog_dev(char *dev) -{ - /* For 'watchdog' dev */ - if (strcmp(dev, "watchdog") == 0) - return (B_TRUE); - - /* For 'watchdog<digit><whatever> */ - if (strstr(dev, "watchdog") == dev && isdigit(dev[8])) - return (B_TRUE); - - return (B_FALSE); -} - static int label_paths_impl(libpc_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid, uint64_t vdev_guid, char **path, char **devid) @@ -1539,7 +1023,7 @@ label_paths_impl(libpc_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid, * and store these strings as config_path and devid_path respectively. * The returned pointers are only valid as long as label remains valid. */ -static int +int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, char **devid) { nvlist_t *nvroot; @@ -1559,165 +1043,13 @@ label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, char **devid) } static void -zpool_open_func(void *arg) -{ - rdsk_node_t *rn = arg; - libpc_handle_t *hdl = rn->rn_hdl; - struct stat64 statbuf; - nvlist_t *config; - char *bname, *dupname; - uint64_t vdev_guid = 0; - int error; - int num_labels = 0; - int fd; - - /* - * Skip devices with well known prefixes there can be side effects - * when opening devices which need to be avoided. - * - * hpet - High Precision Event Timer - * watchdog - Watchdog must be closed in a special way. - */ - dupname = zfs_strdup(hdl, rn->rn_name); - bname = basename(dupname); - error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname)); - free(dupname); - if (error) - return; - - /* - * Ignore failed stats. We only want regular files and block devices. - */ - if (stat64(rn->rn_name, &statbuf) != 0 || - (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode))) - return; - - /* - * Preferentially open using O_DIRECT to bypass the block device - * cache which may be stale for multipath devices. An EINVAL errno - * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. - */ - fd = open(rn->rn_name, O_RDONLY | O_DIRECT); - if ((fd < 0) && (errno == EINVAL)) - fd = open(rn->rn_name, O_RDONLY); - - if ((fd < 0) && (errno == EACCES)) - hdl->lpc_open_access_error = B_TRUE; - - if (fd < 0) - return; - - /* - * This file is too small to hold a zpool - */ - if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) { - (void) close(fd); - return; - } - - error = zpool_read_label(fd, &config, &num_labels); - if (error != 0) { - (void) close(fd); - return; - } - - if (num_labels == 0) { - (void) close(fd); - nvlist_free(config); - return; - } - - /* - * Check that the vdev is for the expected guid. Additional entries - * are speculatively added based on the paths stored in the labels. - * Entries with valid paths but incorrect guids must be removed. - */ - error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); - if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { - (void) close(fd); - nvlist_free(config); - return; - } - - (void) close(fd); - - rn->rn_config = config; - rn->rn_num_labels = num_labels; - - /* - * Add additional entries for paths described by this label. - */ - if (rn->rn_labelpaths) { - char *path = NULL; - char *devid = NULL; - rdsk_node_t *slice; - avl_index_t where; - int error; - - if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) - return; - - /* - * Allow devlinks to stabilize so all paths are available. - */ - zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT); - - if (path != NULL) { - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - slice->rn_name = zfs_strdup(hdl, path); - slice->rn_vdev_guid = vdev_guid; - slice->rn_avl = rn->rn_avl; - slice->rn_hdl = hdl; - slice->rn_order = IMPORT_ORDER_PREFERRED_1; - slice->rn_labelpaths = B_FALSE; - pthread_mutex_lock(rn->rn_lock); - if (avl_find(rn->rn_avl, slice, &where)) { - pthread_mutex_unlock(rn->rn_lock); - free(slice->rn_name); - free(slice); - } else { - avl_insert(rn->rn_avl, slice, where); - pthread_mutex_unlock(rn->rn_lock); - zpool_open_func(slice); - } - } - - if (devid != NULL) { - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - error = asprintf(&slice->rn_name, "%s%s", - DEV_BYID_PATH, devid); - if (error == -1) { - free(slice); - return; - } - - slice->rn_vdev_guid = vdev_guid; - slice->rn_avl = rn->rn_avl; - slice->rn_hdl = hdl; - slice->rn_order = IMPORT_ORDER_PREFERRED_2; - slice->rn_labelpaths = B_FALSE; - pthread_mutex_lock(rn->rn_lock); - if (avl_find(rn->rn_avl, slice, &where)) { - pthread_mutex_unlock(rn->rn_lock); - free(slice->rn_name); - free(slice); - } else { - avl_insert(rn->rn_avl, slice, where); - pthread_mutex_unlock(rn->rn_lock); - zpool_open_func(slice); - } - } - } -} - -static void zpool_find_import_scan_add_slice(libpc_handle_t *hdl, pthread_mutex_t *lock, avl_tree_t *cache, const char *path, const char *name, int order) { avl_index_t where; rdsk_node_t *slice; - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); + slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); if (asprintf(&slice->rn_name, "%s/%s", path, name) == -1) { free(slice); return; @@ -1753,8 +1085,8 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock, if (error == ENOENT) return (0); - zfs_error_aux(hdl, strerror(error)); - (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext( + zutil_error_aux(hdl, strerror(error)); + (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext( TEXT_DOMAIN, "cannot resolve path '%s'"), dir); return (error); } @@ -1762,8 +1094,8 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock, dirp = opendir(path); if (dirp == NULL) { error = errno; - zfs_error_aux(hdl, strerror(error)); - (void) zfs_error_fmt(hdl, EZFS_BADPATH, + zutil_error_aux(hdl, strerror(error)); + (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); return (error); } @@ -1798,8 +1130,8 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, * whole path because if it's a symlink, we want the * path of the symlink not where it points to. */ - d = zfs_strdup(hdl, dir); - b = zfs_strdup(hdl, dir); + d = zutil_strdup(hdl, dir); + b = zutil_strdup(hdl, dir); dpath = dirname(d); name = basename(b); @@ -1810,8 +1142,8 @@ zpool_find_import_scan_path(libpc_handle_t *hdl, pthread_mutex_t *lock, goto out; } - zfs_error_aux(hdl, strerror(error)); - (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext( + zutil_error_aux(hdl, strerror(error)); + (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext( TEXT_DOMAIN, "cannot resolve path '%s'"), dir); goto out; } @@ -1829,7 +1161,7 @@ out: */ static int zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock, - avl_tree_t **slice_cache, char **dir, int dirs) + avl_tree_t **slice_cache, const char * const *dir, size_t dirs) { avl_tree_t *cache; rdsk_node_t *slice; @@ -1837,7 +1169,7 @@ zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock, int i, error; *slice_cache = NULL; - cache = zfs_alloc(hdl, sizeof (avl_tree_t)); + cache = zutil_alloc(hdl, sizeof (avl_tree_t)); avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); @@ -1849,15 +1181,15 @@ zpool_find_import_scan(libpc_handle_t *hdl, pthread_mutex_t *lock, if (error == ENOENT) continue; - zfs_error_aux(hdl, strerror(error)); - (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext( + zutil_error_aux(hdl, strerror(error)); + (void) zutil_error_fmt(hdl, EZFS_BADPATH, dgettext( TEXT_DOMAIN, "cannot resolve path '%s'"), dir[i]); goto error; } /* * If dir[i] is a directory, we walk through it and add all - * the entry to the cache. If it's not a directory, we just + * the entries to the cache. If it's not a directory, we just * add it to the cache. */ if (S_ISDIR(sbuf.st_mode)) { @@ -1885,139 +1217,6 @@ error: return (error); } -static char * -zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { - "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ - "/dev/mapper", /* Use multipath devices before components */ - "/dev/disk/by-partlabel", /* Single unique entry set by user */ - "/dev/disk/by-partuuid", /* Generated partition uuid */ - "/dev/disk/by-label", /* Custom persistent labels */ - "/dev/disk/by-uuid", /* Single unique entry and persistent */ - "/dev/disk/by-id", /* May be multiple entries and persistent */ - "/dev/disk/by-path", /* Encodes physical location and persistent */ - "/dev" /* UNSAFE device names will change */ -}; - -const char * const * -zpool_default_search_paths(size_t *count) -{ - *count = DEFAULT_IMPORT_PATH_SIZE; - return ((const char * const *)zpool_default_import_path); -} - -/* - * Given a full path to a device determine if that device appears in the - * import search path. If it does return the first match and store the - * index in the passed 'order' variable, otherwise return an error. - */ -static int -zfs_path_order(char *name, int *order) -{ - int i = 0, error = ENOENT; - char *dir, *env, *envdup; - - env = getenv("ZPOOL_IMPORT_PATH"); - if (env) { - envdup = strdup(env); - dir = strtok(envdup, ":"); - while (dir) { - if (strncmp(name, dir, strlen(dir)) == 0) { - *order = i; - error = 0; - break; - } - dir = strtok(NULL, ":"); - i++; - } - free(envdup); - } else { - for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) { - if (strncmp(name, zpool_default_import_path[i], - strlen(zpool_default_import_path[i])) == 0) { - *order = i; - error = 0; - break; - } - } - } - - return (error); -} - -/* - * Use libblkid to quickly enumerate all known zfs devices. - */ -static int -zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, - avl_tree_t **slice_cache) -{ - rdsk_node_t *slice; - blkid_cache cache; - blkid_dev_iterate iter; - blkid_dev dev; - avl_index_t where; - int error; - - *slice_cache = NULL; - - error = blkid_get_cache(&cache, NULL); - if (error != 0) - return (error); - - error = blkid_probe_all_new(cache); - if (error != 0) { - blkid_put_cache(cache); - return (error); - } - - iter = blkid_dev_iterate_begin(cache); - if (iter == NULL) { - blkid_put_cache(cache); - return (EINVAL); - } - - error = blkid_dev_set_search(iter, "TYPE", "zfs_member"); - if (error != 0) { - blkid_dev_iterate_end(iter); - blkid_put_cache(cache); - return (error); - } - - *slice_cache = zfs_alloc(hdl, sizeof (avl_tree_t)); - avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), - offsetof(rdsk_node_t, rn_node)); - - while (blkid_dev_next(iter, &dev) == 0) { - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - slice->rn_name = zfs_strdup(hdl, blkid_dev_devname(dev)); - slice->rn_vdev_guid = 0; - slice->rn_lock = lock; - slice->rn_avl = *slice_cache; - slice->rn_hdl = hdl; - slice->rn_labelpaths = B_TRUE; - - error = zfs_path_order(slice->rn_name, &slice->rn_order); - if (error == 0) - slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; - else - slice->rn_order = IMPORT_ORDER_DEFAULT; - - pthread_mutex_lock(lock); - if (avl_find(*slice_cache, slice, &where)) { - free(slice->rn_name); - free(slice); - } else { - avl_insert(*slice_cache, slice, where); - } - pthread_mutex_unlock(lock); - } - - blkid_dev_iterate_end(iter); - blkid_put_cache(cache); - - return (0); -} - /* * Given a list of directories to search, find all pools stored on disk. This * includes partial pools which are not available to import. If no args are @@ -2044,21 +1243,19 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg) pthread_mutex_init(&lock, NULL); /* - * Locate pool member vdevs using libblkid or by directory scanning. + * Locate pool member vdevs by blkid or by directory scanning. * On success a newly allocated AVL tree which is populated with an - * entry for each discovered vdev will be returned as the cache. - * It's the callers responsibility to consume and destroy this tree. + * entry for each discovered vdev will be returned in the cache. + * It's the caller's responsibility to consume and destroy this tree. */ if (iarg->scan || iarg->paths != 0) { - int dirs = iarg->paths; - char **dir = iarg->path; + size_t dirs = iarg->paths; + const char * const *dir = (const char * const *)iarg->path; - if (dirs == 0) { - dir = zpool_default_import_path; - dirs = DEFAULT_IMPORT_PATH_SIZE; - } + if (dirs == 0) + dir = zpool_default_search_paths(&dirs); - if (zpool_find_import_scan(hdl, &lock, &cache, dir, dirs) != 0) + if (zpool_find_import_scan(hdl, &lock, &cache, dir, dirs) != 0) return (NULL); } else { if (zpool_find_import_blkid(hdl, &lock, &cache) != 0) @@ -2192,21 +1389,21 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile, verify(poolname == NULL || guid == 0); if ((fd = open(cachefile, O_RDONLY)) < 0) { - zfs_error_aux(hdl, "%s", strerror(errno)); - (void) zfs_error(hdl, EZFS_BADCACHE, + zutil_error_aux(hdl, "%s", strerror(errno)); + (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "failed to open cache file")); return (NULL); } if (fstat64(fd, &statbuf) != 0) { - zfs_error_aux(hdl, "%s", strerror(errno)); + zutil_error_aux(hdl, "%s", strerror(errno)); (void) close(fd); - (void) zfs_error(hdl, EZFS_BADCACHE, + (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "failed to get size of cache file")); return (NULL); } - if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { + if ((buf = zutil_alloc(hdl, statbuf.st_size)) == NULL) { (void) close(fd); return (NULL); } @@ -2214,7 +1411,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile, if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { (void) close(fd); free(buf); - (void) zfs_error(hdl, EZFS_BADCACHE, + (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "failed to read cache file contents")); return (NULL); @@ -2224,7 +1421,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile, if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { free(buf); - (void) zfs_error(hdl, EZFS_BADCACHE, + (void) zutil_error(hdl, EZFS_BADCACHE, dgettext(TEXT_DOMAIN, "invalid or corrupt cache file contents")); return (NULL); @@ -2237,7 +1434,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile, * state. */ if (nvlist_alloc(&pools, 0, 0) != 0) { - (void) no_memory(hdl); + (void) zutil_no_memory(hdl); nvlist_free(raw); return (NULL); } @@ -2254,7 +1451,7 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile, if (guid != 0 && guid != this_guid) continue; - if (pool_active(hdl, name, this_guid, &active) != 0) { + if (zutil_pool_active(hdl, name, this_guid, &active) != 0) { nvlist_free(raw); nvlist_free(pools); return (NULL); @@ -2265,20 +1462,20 @@ zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile, if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE, cachefile) != 0) { - (void) no_memory(hdl); + (void) zutil_no_memory(hdl); nvlist_free(raw); nvlist_free(pools); return (NULL); } - if ((dst = refresh_config(hdl, src)) == NULL) { + if ((dst = zutil_refresh_config(hdl, src)) == NULL) { nvlist_free(raw); nvlist_free(pools); return (NULL); } if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { - (void) no_memory(hdl); + (void) zutil_no_memory(hdl); nvlist_free(dst); nvlist_free(raw); nvlist_free(pools); @@ -2312,7 +1509,7 @@ zpool_search_import(void *hdl, importargs_t *import, if ((pools == NULL || nvlist_empty(pools)) && handle.lpc_open_access_error && geteuid() != 0) { - (void) zfs_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN, + (void) zutil_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN, "no pools found")); } diff --git a/lib/libzutil/zutil_import.h b/lib/libzutil/zutil_import.h new file mode 100644 index 000000000..0108eb45c --- /dev/null +++ b/lib/libzutil/zutil_import.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright 2015 RackTop Systems. + * Copyright (c) 2016, Intel Corporation. + */ +#ifndef _LIBZUTIL_ZUTIL_IMPORT_H_ +#define _LIBZUTIL_ZUTIL_IMPORT_H_ + +#define EZFS_BADCACHE "invalid or missing cache file" +#define EZFS_BADPATH "must be an absolute path" +#define EZFS_NOMEM "out of memory" +#define EZFS_EACESS "some devices require root privileges" + +#define IMPORT_ORDER_PREFERRED_1 1 +#define IMPORT_ORDER_PREFERRED_2 2 +#define IMPORT_ORDER_SCAN_OFFSET 10 +#define IMPORT_ORDER_DEFAULT 100 + +typedef struct libpc_handle { + boolean_t lpc_printerr; + boolean_t lpc_open_access_error; + boolean_t lpc_desc_active; + char lpc_desc[1024]; + const pool_config_ops_t *lpc_ops; + void *lpc_lib_handle; +} libpc_handle_t; + + +int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path, + char **devid); +int zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, + avl_tree_t **slice_cache); + +void * zutil_alloc(libpc_handle_t *hdl, size_t size); +char *zutil_strdup(libpc_handle_t *hdl, const char *str); + +typedef struct rdsk_node { + char *rn_name; /* Full path to device */ + int rn_order; /* Preferred order (low to high) */ + int rn_num_labels; /* Number of valid labels */ + uint64_t rn_vdev_guid; /* Expected vdev guid when set */ + libpc_handle_t *rn_hdl; + nvlist_t *rn_config; /* Label config */ + avl_tree_t *rn_avl; + avl_node_t rn_node; + pthread_mutex_t *rn_lock; + boolean_t rn_labelpaths; +} rdsk_node_t; + +int slice_cache_compare(const void *, const void *); + +void zpool_open_func(void *); + +#endif /* _LIBZUTIL_ZUTIL_IMPORT_H_ */ |