summaryrefslogtreecommitdiffstats
path: root/lib/libzfs
diff options
context:
space:
mode:
authorTony Hutter <[email protected]>2016-10-19 12:55:59 -0700
committerBrian Behlendorf <[email protected]>2016-10-19 12:55:59 -0700
commit6078881aa18a45ea065a887e2a8606279cdc0329 (patch)
treed6af96c545969994afdf2bf84ee1484b09cdf76c /lib/libzfs
parent7c502b0b1de8d3d341c026760df5915ad4be794a (diff)
Multipath autoreplace, control enclosure LEDs, event rate limiting
1. Enable multipath autoreplace support for FMA. This extends FMA autoreplace to work with multipath disks. This requires libdevmapper to be installed at build time. 2. Turn on/off fault LEDs when VDEVs become degraded/faulted/online Set ZED_USE_ENCLOSURE_LEDS=1 in zed.rc to have ZED turn on/off the enclosure LED for a drive when a drive becomes FAULTED/DEGRADED. Your enclosure must be supported by the Linux SES driver for this to work. The enclosure LED scripts work for multipath devices as well. The scripts will clear the LED when the fault is cleared. 3. Rate limit ZIO delay and checksum events so as not to flood ZED ZIO delay and checksum events are rate limited to 5/sec in the zfs module. Reviewed-by: Richard Laager <[email protected]> Reviewed by: Don Brady <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tony Hutter <[email protected]> Closes #2449 Closes #3017 Closes #5159
Diffstat (limited to 'lib/libzfs')
-rw-r--r--lib/libzfs/Makefile.am2
-rw-r--r--lib/libzfs/libzfs_import.c38
-rw-r--r--lib/libzfs/libzfs_pool.c200
3 files changed, 222 insertions, 18 deletions
diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am
index f1260ea71..8e596b001 100644
--- a/lib/libzfs/Makefile.am
+++ b/lib/libzfs/Makefile.am
@@ -35,7 +35,7 @@ libzfs_la_LIBADD = \
$(top_builddir)/lib/libnvpair/libnvpair.la \
$(top_builddir)/lib/libzpool/libzpool.la
-libzfs_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV)
+libzfs_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV) $(LIBDEVMAPPER)
libzfs_la_LDFLAGS = -version-info 2:0:0
EXTRA_DIST = $(libzfs_pc_DATA) $(USER_C)
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index edd4e5d58..e76f7432a 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -172,22 +172,34 @@ zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
int
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
{
- const char *physpath, *value;
+ const char *physpath = NULL;
/*
- * Skip indirect multipath device nodes
+ * Normal disks use ID_PATH for their physical path. Device mapper
+ * devices are virtual and don't have a physical path. For them we
+ * use ID_VDEV instead, which is setup via the /etc/vdev_id.conf file.
+ * ID_VDEV provides a persistent path to a virtual device. If you
+ * don't have vdev_id.conf setup, you cannot use multipath autoreplace.
*/
- value = udev_device_get_property_value(dev, "DM_MULTIPATH_DEVICE_PATH");
- if (value != NULL && strcmp(value, "1") == 0)
- return (ENODATA); /* skip physical for multipath nodes */
-
- physpath = udev_device_get_property_value(dev, "ID_PATH");
- if (physpath != NULL && physpath[0] != '\0') {
- (void) strlcpy(bufptr, physpath, buflen);
- return (0);
+ if (!((physpath = udev_device_get_property_value(dev, "ID_PATH")) &&
+ physpath[0])) {
+ if (!((physpath =
+ udev_device_get_property_value(dev, "ID_VDEV")) &&
+ physpath[0])) {
+ return (ENODATA);
+ }
}
- return (ENODATA);
+ (void) strlcpy(bufptr, physpath, buflen);
+
+ return (0);
+}
+
+boolean_t
+udev_is_mpath(struct udev_device *dev)
+{
+ return udev_device_get_property_value(dev, "DM_UUID") &&
+ udev_device_get_property_value(dev, "MPATH_SBIN_PATH");
}
/*
@@ -200,15 +212,13 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
static boolean_t
udev_mpath_whole_disk(struct udev_device *dev)
{
- const char *devname, *mapname, *type, *uuid;
+ const char *devname, *type, *uuid;
devname = udev_device_get_property_value(dev, "DEVNAME");
- mapname = udev_device_get_property_value(dev, "DM_NAME");
type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
uuid = udev_device_get_property_value(dev, "DM_UUID");
if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
- (mapname != NULL && strncmp(mapname, "mpath", 5) == 0) &&
((type == NULL) || (strcmp(type, "gpt") != 0)) &&
(uuid != NULL)) {
return (B_TRUE);
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 68cc5f3ee..ebca76834 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -41,6 +41,9 @@
#include <sys/vtoc.h>
#include <sys/zfs_ioctl.h>
#include <dlfcn.h>
+#if HAVE_LIBDEVMAPPER
+#include <libdevmapper.h>
+#endif
#include "zfs_namecheck.h"
#include "zfs_prop.h"
@@ -3401,10 +3404,12 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
* caller must free the returned string
*/
char *
-zfs_strip_partition(libzfs_handle_t *hdl, char *path)
+zfs_strip_partition(char *path)
{
- char *tmp = zfs_strdup(hdl, path);
+ char *tmp = strdup(path);
char *part = NULL, *d = NULL;
+ if (!tmp)
+ return (NULL);
if ((part = strstr(tmp, "-part")) && part != tmp) {
d = part + 5;
@@ -3422,6 +3427,7 @@ zfs_strip_partition(libzfs_handle_t *hdl, char *path)
if (*d == '\0')
*part = '\0';
}
+
return (tmp);
}
@@ -3544,7 +3550,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
*/
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
== 0 && value && !(name_flags & VDEV_NAME_PATH)) {
- return (zfs_strip_partition(hdl, path));
+ return (zfs_strip_partition(path));
}
} else {
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
@@ -4310,3 +4316,191 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
return (0);
}
+
+#if HAVE_LIBDEVMAPPER
+static void libdevmapper_dummy_log(int level, const char *file, int line,
+ int dm_errno_or_class, const char *f, ...) {}
+
+/* Disable libdevmapper error logging */
+static void disable_libdevmapper_errors(void) {
+ dm_log_with_errno_init(libdevmapper_dummy_log);
+}
+/* Enable libdevmapper error logging */
+static void enable_libdevmapper_errors(void) {
+ dm_log_with_errno_init(NULL);
+}
+#endif
+
+/*
+ * Allocate and return the underlying device name for a device mapper device.
+ * If a device mapper device maps to multiple devices, return the first device.
+ *
+ * For example, dm_name = "/dev/dm-0" could return "/dev/sda"
+ *
+ * dm_name should include the "/dev[/mapper]" prefix.
+ *
+ * Returns device name, or NULL on error or no match. If dm_name is not a DM
+ * device then return NULL.
+ *
+ * NOTE: The returned name string must be *freed*.
+ */
+static char * dm_get_underlying_path(char *dm_name)
+{
+ char *name = NULL;
+#if HAVE_LIBDEVMAPPER
+ char *tmp;
+ struct dm_task *dmt = NULL;
+ struct dm_tree *dt = NULL;
+ struct dm_tree_node *root, *child;
+ void *handle = NULL;
+ struct dm_info info;
+ const struct dm_info *child_info;
+
+ /*
+ * Disable libdevmapper errors. It's entirely possible user is not
+ * running devmapper, or that dm_name is not a devmapper device.
+ * That's totally ok, we will just harmlessly and silently return NULL.
+ */
+ disable_libdevmapper_errors();
+
+ /*
+ * libdevmapper tutorial
+ *
+ * libdevmapper is basically a fancy wrapper for its ioctls. You
+ * create a "task", fill in the needed info to the task (fill in the
+ * ioctl fields), then run the task (call the ioctl).
+ *
+ * First we need the major/minor number for our DM device.
+ */
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+ goto end;
+
+ /* Lookup the name in libdevmapper */
+ if (!dm_task_set_name(dmt, dm_name)) {
+ enable_libdevmapper_errors();
+ goto end;
+ }
+
+ if (!dm_task_run(dmt))
+ goto end;
+
+ /* Get DM device's major/minor */
+ if (!dm_task_get_info(dmt, &info))
+ goto end;
+
+ /* We have major/minor number. Lookup the dm device's children */
+ if (!(dt = dm_tree_create()))
+ goto end;
+
+ /* We add the device into the tree and its children get populated */
+ if (!dm_tree_add_dev(dt, info.major, info.minor))
+ goto end;
+
+ if (!(root = dm_tree_find_node(dt, 0, 0)))
+ goto end;
+
+ if (!(child = dm_tree_next_child(&handle, root, 1)))
+ goto end;
+
+ /* Get child's major/minor numbers */
+ if (!(child_info = dm_tree_node_get_info(child)))
+ goto end;
+
+ if ((asprintf(&tmp, "/dev/block/%d:%d", child_info->major,
+ child_info->minor) == -1) || !tmp)
+ goto end;
+
+ /* Further translate /dev/block/ name into the normal name */
+ name = realpath(tmp, NULL);
+ free(tmp);
+
+end:
+ if (dmt)
+ dm_task_destroy(dmt);
+ if (dt)
+ dm_tree_free(dt);
+ enable_libdevmapper_errors();
+#endif /* HAVE_LIBDEVMAPPER */
+
+ return (name);
+}
+
+/*
+ * Return 1 if device is a device mapper or multipath device.
+ * Return 0 if not.
+ */
+int
+dev_is_dm(char *devname)
+{
+
+ char *tmp;
+ tmp = dm_get_underlying_path(devname);
+ if (!tmp)
+ return (0);
+
+ free(tmp);
+ return (1);
+}
+
+/*
+ * Lookup the underlying device for a device name
+ *
+ * Often you'll have a symlink to a device, a partition device,
+ * or a multipath device, and want to look up the underlying device.
+ * This function returns the underlying device name. If the device
+ * name is already the underlying device, then just return the same
+ * name. If the device is a DM device with multiple underlying devices
+ * then return the first one.
+ *
+ * For example:
+ *
+ * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
+ * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
+ * returns: /dev/sda
+ *
+ * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
+ * dev_name: /dev/mapper/mpatha
+ * returns: /dev/sda (first device)
+ *
+ * 3. /dev/sda (already the underlying device)
+ * dev_name: /dev/sda
+ * returns: /dev/sda
+ *
+ * 4. /dev/dm-3 (mapped to /dev/sda)
+ * dev_name: /dev/dm-3
+ * returns: /dev/sda
+ *
+ * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
+ * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
+ * returns: /dev/sdb
+ *
+ * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
+ * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
+ * returns: /dev/sda
+ *
+ * Returns underlying device name, or NULL on error or no match.
+ *
+ * NOTE: The returned name string must be *freed*.
+ */
+char *
+get_underlying_path(libzfs_handle_t *hdl, char *dev_name)
+{
+ char *name = NULL;
+ char *tmp;
+
+ if (!dev_name)
+ return (NULL);
+
+ tmp = dm_get_underlying_path(dev_name);
+
+ /* dev_name not a DM device, so just un-symlinkize it */
+ if (!tmp)
+ tmp = realpath(dev_name, NULL);
+
+ if (tmp) {
+ name = zfs_strip_partition(tmp);
+ free(tmp);
+ }
+
+ return (name);
+}