summaryrefslogtreecommitdiffstats
path: root/cmd/zed/zed.d
diff options
context:
space:
mode:
authorTony Hutter <[email protected]>2016-10-24 10:45:59 -0700
committerBrian Behlendorf <[email protected]>2016-10-24 10:45:59 -0700
commit1bbd8770490f0e5b8c575865ab70f6853bca2a2a (patch)
tree302dc7e82db3c1b33739ec41998d95cfe0157450 /cmd/zed/zed.d
parenta85cefa35c00ab4999038fbed69a6c28d0244366 (diff)
Turn on/off enclosure slot fault LED even when disk isn't present
Previously when a drive faulted, the statechange-led.sh script would lookup the drive's LED sysfs entry in /sys/block/sd*/device/enclosure_device, and turn it on. During testing we noticed that if you pulled out a drive, or if the drive was so badly broken that it no longer appeared to Linux, that the /sys/block/sd* path would be removed, and the script could not lookup the LED entry. To fix this, this patch looks up the disks's more persistent "/sys/class/enclosure/X:X:X:X/Slot N" LED sysfs path at pool import. It then passes that path to the statechange-led script to use, rather than having the script look it up on the fly. This allows the script to turn on/off the slot LEDs even when the drive is missing. Closes #5309 Closes #2375
Diffstat (limited to 'cmd/zed/zed.d')
-rwxr-xr-xcmd/zed/zed.d/statechange-led.sh99
1 files changed, 55 insertions, 44 deletions
diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh
index ca911d2b9..af1a14a9a 100755
--- a/cmd/zed/zed.d/statechange-led.sh
+++ b/cmd/zed/zed.d/statechange-led.sh
@@ -2,8 +2,13 @@
#
# Turn off/on the VDEV's enclosure fault LEDs when the pool's state changes.
#
-# Turn LED on if the VDEV becomes faulted/degraded, and turn it back off when
-# it's healthy again. This requires that your enclosure be supported by the
+# Turn LED on if the VDEV becomes faulted or degraded, and turn it back off
+# when it's online again. It will also turn on the LED (or keep it on) if
+# the drive becomes unavailable, unless the drive was in was a previously
+# online state (online->unavail is a normal state transition during an
+# autoreplace).
+#
+# This script requires that your enclosure be supported by the
# Linux SCSI enclosure services (ses) driver. The script will do nothing
# if you have no enclosure, or if your enclosure isn't supported.
#
@@ -13,76 +18,82 @@
# 0: enclosure led successfully set
# 1: enclosure leds not not available
# 2: enclosure leds administratively disabled
-# 3: ZED built without libdevmapper
+# 3: ZED didn't pass enclosure sysfs path
+# 4: Enclosure sysfs path doesn't exist
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-# ZEVENT_VDEV_UPATH will not be present if ZFS is not built with libdevmapper
-[ -n "${ZEVENT_VDEV_UPATH}" ] || exit 3
+if [ ! -d /sys/class/enclosure ] ; then
+ exit 1
+fi
if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then
exit 2
fi
-if [ ! -d /sys/class/enclosure ] ; then
- exit 1
-fi
+[ -n "${ZEVENT_VDEV_ENC_SYSFS_PATH}" ] || exit 3
+
+[ -e "${ZEVENT_VDEV_ENC_SYSFS_PATH}/fault" ] || exit 4
# Turn on/off enclosure LEDs
function led
{
- name=$1
+ file="$1/fault"
val=$2
# We want to check the current state first, since writing to the
# 'fault' entry always always causes a SES command, even if the
# current state is already what you want.
- if [ -e /sys/block/$name/device/enclosure_device*/fault ] ; then
- # We have to do some monkey business to deal with spaces in
- # enclosure_device names. I've seen horrible things like this:
- #
- # '/sys/block/sdfw/device/enclosure_device:SLOT 43 41 /fault'
- #
- # ...so escape all spaces.
- file=`ls /sys/block/$name/device/enclosure_device*/fault | sed 's/\s/\\ /g'`
-
- current=`cat "$file"`
+ current=$(cat "${file}")
- # On some enclosures if you write 1 to fault, and read it back,
- # it will return 2. Treat all non-zero values as 1 for
- # simplicity.
- if [ "$current" != "0" ] ; then
- current=1
- fi
+ # On some enclosures if you write 1 to fault, and read it back,
+ # it will return 2. Treat all non-zero values as 1 for
+ # simplicity.
+ if [ "$current" != "0" ] ; then
+ current=1
+ fi
- if [ "$current" != "$val" ] ; then
- # Set the value twice. I've seen enclosures that were
- # flakey about setting it the first time.
- echo $val > "$file"
- echo $val > "$file"
- fi
+ if [ "$current" != "$val" ] ; then
+ # Set the value twice. I've seen enclosures that were
+ # flakey about setting it the first time.
+ echo "$val" > "$file"
+ echo "$val" > "$file"
fi
}
# Decide whether to turn on/off an LED based on the state
# Pass in path name and fault string ("ONLINE"/"FAULTED"/"DEGRADED"...etc)
+#
+# We only turn on LEDs when a drive becomes FAULTED, DEGRADED, or UNAVAIL and
+# only turn it on when it comes back ONLINE. All other states are ignored, and
+# keep the previous LED state.
function process {
- # path=/dev/sda, fault=
-
- path=$1
+ path="$1"
fault=$2
- name=`basename $path`
-
- if [ -z "$name" ] ; then
- return
- fi
-
+ prev=$3
if [ "$fault" == "FAULTED" ] || [ "$fault" == "DEGRADED" ] ; then
- led $name 1
- else
- led $name 0
+ led "$path" 1
+ elif [ "$fault" == "UNAVAIL" ] && [ "$prev" != "ONLINE" ] ; then
+ # For the most part, UNAVAIL should turn on the LED. However,
+ # during an autoreplace, we see our new drive go online,
+ # followed by our "old" drive going ONLINE->UNAVAIL. Since the
+ # "old" drive has the same slot information, we want to ignore
+ # the ONLINE->UNAVAIL event.
+ #
+ # NAME STATE READ WRITE CKSUM
+ # mypool3 DEGRADED 0 0 0
+ # mirror-0 DEGRADED 0 0 0
+ # A1 ONLINE 0 0 0
+ # A2 ONLINE 0 880 0
+ # replacing-3 UNAVAIL 0 0 0
+ # old UNAVAIL 0 2.93K 0 corrupted data
+ # A3 ONLINE 0 0 156 (resilvering)
+ led "$path" 1
+ elif [ "$fault" == "ONLINE" ] ; then
+ led "$path" 0
fi
}
-process "$ZEVENT_VDEV_UPATH" "$ZEVENT_VDEV_STATE_STR"
+process "$ZEVENT_VDEV_ENC_SYSFS_PATH" "$ZEVENT_VDEV_STATE_STR" \
+ "$ZEVENT_VDEV_LASTSTATE_STR"