aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rwxr-xr-xcmd/zed/zed.d/statechange-slot_off.sh61
-rw-r--r--cmd/zed/zed.d/zed.rc5
2 files changed, 66 insertions, 0 deletions
diff --git a/cmd/zed/zed.d/statechange-slot_off.sh b/cmd/zed/zed.d/statechange-slot_off.sh
new file mode 100755
index 000000000..d6f3c94a4
--- /dev/null
+++ b/cmd/zed/zed.d/statechange-slot_off.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+#
+# Turn off disk's enclosure slot if it becomes FAULTED.
+#
+# Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos
+# as they flip between FAULTED and ONLINE. If
+# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets
+# FAULTED, then power down the slot via sysfs:
+#
+# /sys/class/enclosure/<enclosure>/<slot>/power_status
+#
+# We assume the user will be responsible for turning the slot back on again.
+#
+# Note that this script requires that your enclosure be supported by the
+# Linux SCSI Enclosure services (SES) driver. The script will do nothing
+# if you have no enclosure, or if your enclosure isn't supported.
+#
+# Exit codes:
+# 0: slot successfully powered off
+# 1: enclosure not available
+# 2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled
+# 3: vdev was not FAULTED
+# 4: The enclosure sysfs path passed from ZFS does not exist
+# 5: Enclosure slot didn't actually turn off after we told it to
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+if [ ! -d /sys/class/enclosure ] ; then
+ # No JBOD enclosure or NVMe slots
+ exit 1
+fi
+
+if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then
+ exit 2
+fi
+
+if [ "$ZEVENT_VDEV_STATE_STR" != "FAULTED" ] ; then
+ exit 3
+fi
+
+if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then
+ exit 4
+fi
+
+echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"
+
+# Wait for sysfs for report that the slot is off. It can take ~400ms on some
+# enclosures.
+for i in $(seq 1 20) ; do
+ if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then
+ break
+ fi
+ sleep 0.1
+done
+
+if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then
+ exit 5
+fi
+
+zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index c55a70c79..78dc1afc7 100644
--- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc
@@ -142,3 +142,8 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"
# Disabled by default, 1 to enable and 0 to disable.
#ZED_SYSLOG_DISPLAY_GUIDS=1
+##
+# Power off the drive's slot in the enclosure if it becomes FAULTED. This can
+# help silence misbehaving drives. This assumes your drive enclosure fully
+# supports slot power control via sysfs.
+#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1