aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Hutter <[email protected]>2021-11-09 16:50:18 -0800
committerTony Hutter <[email protected]>2021-11-05 07:51:21 -0700
commit1fca9586154cc17637c099112b28dcb5d3950f8b (patch)
tree757c5c4390e765a0f77fef97ea4235f3c98702a0
parent22b0891dbb6af5663201a035ab759d1f51fef3cd (diff)
zed: Control NVMe fault LEDs
The ZED code currently can only turn on the fault LED for a faulted disk in a JBOD enclosure. This extends support for faulted NVMe disks as well. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tony Hutter <[email protected]> Closes #12648 Closes #12695
-rwxr-xr-xcmd/zed/zed.d/statechange-led.sh73
-rw-r--r--cmd/zed/zed.d/zed.rc4
-rwxr-xr-xcmd/zpool/zpool.d/ses8
-rw-r--r--lib/libzutil/os/linux/zutil_device_path_os.c146
-rw-r--r--lib/libzutil/zutil_nicenum.c9
5 files changed, 229 insertions, 11 deletions
diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh
index 0f9da3204..26e6064fa 100755
--- a/cmd/zed/zed.d/statechange-led.sh
+++ b/cmd/zed/zed.d/statechange-led.sh
@@ -29,7 +29,8 @@
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-if [ ! -d /sys/class/enclosure ] ; then
+if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
+ # No JBOD enclosure or NVMe slots
exit 1
fi
@@ -92,6 +93,29 @@ check_and_set_led()
done
}
+# Fault LEDs for JBODs and NVMe drives are handled a little differently.
+#
+# On JBODs the fault LED is called 'fault' and on a path like this:
+#
+# /sys/class/enclosure/0:0:1:0/SLOT 10/fault
+#
+# On NVMe it's called 'attention' and on a path like this:
+#
+# /sys/bus/pci/slot/0/attention
+#
+# This function returns the full path to the fault LED file for a given
+# enclosure/slot directory.
+#
+path_to_led()
+{
+ dir=$1
+ if [ -f "$dir/fault" ] ; then
+ echo "$dir/fault"
+ elif [ -f "$dir/attention" ] ; then
+ echo "$dir/attention"
+ fi
+}
+
state_to_val()
{
state="$1"
@@ -105,6 +129,38 @@ state_to_val()
esac
}
+#
+# Given a nvme name like 'nvme0n1', pass back its slot directory
+# like "/sys/bus/pci/slots/0"
+#
+nvme_dev_to_slot()
+{
+ dev="$1"
+
+ # Get the address "0000:01:00.0"
+ address=$(cat "/sys/class/block/$dev/device/address")
+
+ # For each /sys/bus/pci/slots subdir that is an actual number
+ # (rather than weird directories like "1-3/").
+ # shellcheck disable=SC2010
+ for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
+ this_address=$(cat "/sys/bus/pci/slots/$i/address")
+
+ # The format of address is a little different between
+ # /sys/class/block/$dev/device/address and
+ # /sys/bus/pci/slots/
+ #
+ # address= "0000:01:00.0"
+ # this_address = "0000:01:00"
+ #
+ if echo "$address" | grep -Eq ^"$this_address" ; then
+ echo "/sys/bus/pci/slots/$i"
+ break
+ fi
+ done
+}
+
+
# process_pool (pool)
#
# Iterate through a pool and set the vdevs' enclosure slot LEDs to
@@ -134,6 +190,11 @@ process_pool()
# Get dev name (like 'sda')
dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
+ if [ ! -d "$vdev_enc_sysfs_path" ] ; then
+ # This is not a JBOD disk, but it could be a PCI NVMe drive
+ vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
+ fi
+
current_val=$(echo "$therest" | awk '{print $NF}')
if [ "$current_val" != "0" ] ; then
@@ -145,9 +206,10 @@ process_pool()
continue
fi
- if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
+ led_path=$(path_to_led "$vdev_enc_sysfs_path")
+ if [ ! -e "$led_path" ] ; then
rc=3
- zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
+ zed_log_msg "vdev $vdev '$led_path' doesn't exist"
continue
fi
@@ -158,7 +220,7 @@ process_pool()
continue
fi
- if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
+ if ! check_and_set_led "$led_path" "$val"; then
rc=3
fi
done
@@ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the
# Got a statechange for an individual vdev
val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
vdev=$(basename "$ZEVENT_VDEV_PATH")
- check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
+ ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
+ check_and_set_led "$ledpath" "$val"
else
# Process the entire pool
poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index df560f921..1c278b2ef 100644
--- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc
@@ -89,8 +89,8 @@
##
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
-# device mapper and multipath devices as well. Your enclosure must be
-# supported by the Linux SES driver for this to work.
+# device mapper and multipath devices as well. This works with JBOD enclosures
+# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
#
ZED_USE_ENCLOSURE_LEDS=1
diff --git a/cmd/zpool/zpool.d/ses b/cmd/zpool/zpool.d/ses
index f6b7520df..b1836d676 100755
--- a/cmd/zpool/zpool.d/ses
+++ b/cmd/zpool/zpool.d/ses
@@ -41,7 +41,13 @@ for i in $scripts ; do
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
;;
fault_led)
- val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
+ # JBODs fault LED is called 'fault', NVMe fault LED is called
+ # 'attention'.
+ if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
+ val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
+ elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
+ val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
+ fi
;;
locate_led)
val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
diff --git a/lib/libzutil/os/linux/zutil_device_path_os.c b/lib/libzutil/os/linux/zutil_device_path_os.c
index 2a6f4ae2a..13f8bd031 100644
--- a/lib/libzutil/os/linux/zutil_device_path_os.c
+++ b/lib/libzutil/os/linux/zutil_device_path_os.c
@@ -155,17 +155,147 @@ zfs_strip_path(char *path)
}
/*
+ * Read the contents of a sysfs file into an allocated buffer and remove the
+ * last newline.
+ *
+ * This is useful for reading sysfs files that return a single string. Return
+ * an allocated string pointer on success, NULL otherwise. Returned buffer
+ * must be freed by the user.
+ */
+static char *
+zfs_read_sysfs_file(char *filepath)
+{
+ char buf[4096]; /* all sysfs files report 4k size */
+ char *str = NULL;
+
+ FILE *fp = fopen(filepath, "r");
+ if (fp == NULL) {
+ return (NULL);
+ }
+ if (fgets(buf, sizeof (buf), fp) == buf) {
+ /* success */
+
+ /* Remove the last newline (if any) */
+ size_t len = strlen(buf);
+ if (buf[len - 1] == '\n') {
+ buf[len - 1] = '\0';
+ }
+ str = strdup(buf);
+ }
+
+ fclose(fp);
+
+ return (str);
+}
+
+/*
+ * Given a dev name like "nvme0n1", return the full PCI slot sysfs path to
+ * the drive (in /sys/bus/pci/slots).
+ *
+ * For example:
+ * dev: "nvme0n1"
+ * returns: "/sys/bus/pci/slots/0"
+ *
+ * 'dev' must be an NVMe device.
+ *
+ * Returned string must be freed. Returns NULL on error or no sysfs path.
+ */
+static char *
+zfs_get_pci_slots_sys_path(const char *dev_name)
+{
+ DIR *dp = NULL;
+ struct dirent *ep;
+ char *address1 = NULL;
+ char *address2 = NULL;
+ char *path = NULL;
+ char buf[MAXPATHLEN];
+ char *tmp;
+
+ /* If they preface 'dev' with a path (like "/dev") then strip it off */
+ tmp = strrchr(dev_name, '/');
+ if (tmp != NULL)
+ dev_name = tmp + 1; /* +1 since we want the chr after '/' */
+
+ if (strncmp("nvme", dev_name, 4) != 0)
+ return (NULL);
+
+ (void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address",
+ dev_name);
+
+ address1 = zfs_read_sysfs_file(buf);
+ if (!address1)
+ return (NULL);
+
+ /*
+ * /sys/block/nvme0n1/device/address format will
+ * be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be
+ * "0000:01:00". Just NULL terminate at the '.' so they match.
+ */
+ tmp = strrchr(address1, '.');
+ if (tmp != NULL)
+ *tmp = '\0';
+
+ dp = opendir("/sys/bus/pci/slots/");
+ if (dp == NULL) {
+ free(address1);
+ return (NULL);
+ }
+
+ /*
+ * Look through all the /sys/bus/pci/slots/ subdirs
+ */
+ while ((ep = readdir(dp))) {
+ /*
+ * We only care about directory names that are a single number.
+ * Sometimes there's other directories like
+ * "/sys/bus/pci/slots/0-3/" in there - skip those.
+ */
+ if (!zfs_isnumber(ep->d_name))
+ continue;
+
+ (void) snprintf(buf, sizeof (buf),
+ "/sys/bus/pci/slots/%s/address", ep->d_name);
+
+ address2 = zfs_read_sysfs_file(buf);
+ if (!address2)
+ continue;
+
+ if (strcmp(address1, address2) == 0) {
+ /* Addresses match, we're all done */
+ free(address2);
+ if (asprintf(&path, "/sys/bus/pci/slots/%s",
+ ep->d_name) == -1) {
+ free(tmp);
+ continue;
+ }
+ break;
+ }
+ free(address2);
+ }
+
+ closedir(dp);
+ free(address1);
+
+ return (path);
+}
+
+/*
* Given a dev name like "sda", return the full enclosure sysfs path to
* the disk. You can also pass in the name with "/dev" prepended
- * to it (like /dev/sda).
+ * to it (like /dev/sda). This works for both JBODs and NVMe PCI devices.
*
* For example, disk "sda" in enclosure slot 1:
- * dev: "sda"
+ * dev_name: "sda"
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
*
+ * Or:
+ *
+ * dev_name: "nvme0n1"
+ * returns: "/sys/bus/pci/slots/0"
+ *
* 'dev' must be a non-devicemapper device.
*
- * Returned string must be freed.
+ * Returned string must be freed. Returns NULL on error.
*/
char *
zfs_get_enclosure_sysfs_path(const char *dev_name)
@@ -252,6 +382,16 @@ end:
if (dp != NULL)
closedir(dp);
+ if (!path) {
+ /*
+ * This particular disk isn't in a JBOD. It could be an NVMe
+ * drive. If so, look up the NVMe device's path in
+ * /sys/bus/pci/slots/. Within that directory is a 'attention'
+ * file which controls the NVMe fault LED.
+ */
+ path = zfs_get_pci_slots_sys_path(dev_name);
+ }
+
return (path);
}
diff --git a/lib/libzutil/zutil_nicenum.c b/lib/libzutil/zutil_nicenum.c
index 1a19db0df..4dcac1f85 100644
--- a/lib/libzutil/zutil_nicenum.c
+++ b/lib/libzutil/zutil_nicenum.c
@@ -27,6 +27,7 @@
#include <math.h>
#include <stdio.h>
#include <libzutil.h>
+#include <string.h>
/*
* Return B_TRUE if "str" is a number string, B_FALSE otherwise.
@@ -42,6 +43,14 @@ zfs_isnumber(const char *str)
if (!(isdigit(*str) || (*str == '.')))
return (B_FALSE);
+ /*
+ * Numbers should not end with a period ("." ".." or "5." are
+ * not valid)
+ */
+ if (str[strlen(str) - 1] == '.') {
+ return (B_FALSE);
+ }
+
return (B_TRUE);
}