diff options
author | Tony Hutter <[email protected]> | 2016-10-24 10:45:59 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-10-24 10:45:59 -0700 |
commit | 1bbd8770490f0e5b8c575865ab70f6853bca2a2a (patch) | |
tree | 302dc7e82db3c1b33739ec41998d95cfe0157450 /cmd/zed/zed.d | |
parent | a85cefa35c00ab4999038fbed69a6c28d0244366 (diff) |
Turn on/off enclosure slot fault LED even when disk isn't present
Previously when a drive faulted, the statechange-led.sh script would lookup
the drive's LED sysfs entry in /sys/block/sd*/device/enclosure_device, and
turn it on. During testing we noticed that if you pulled out a drive, or if
the drive was so badly broken that it no longer appeared to Linux, that the
/sys/block/sd* path would be removed, and the script could not lookup the
LED entry.
To fix this, this patch looks up the disks's more persistent
"/sys/class/enclosure/X:X:X:X/Slot N" LED sysfs path at pool import. It then
passes that path to the statechange-led script to use, rather than having the
script look it up on the fly. This allows the script to turn on/off the slot
LEDs even when the drive is missing.
Closes #5309
Closes #2375
Diffstat (limited to 'cmd/zed/zed.d')
-rwxr-xr-x | cmd/zed/zed.d/statechange-led.sh | 99 |
1 files changed, 55 insertions, 44 deletions
diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh index ca911d2b9..af1a14a9a 100755 --- a/cmd/zed/zed.d/statechange-led.sh +++ b/cmd/zed/zed.d/statechange-led.sh @@ -2,8 +2,13 @@ # # Turn off/on the VDEV's enclosure fault LEDs when the pool's state changes. # -# Turn LED on if the VDEV becomes faulted/degraded, and turn it back off when -# it's healthy again. This requires that your enclosure be supported by the +# Turn LED on if the VDEV becomes faulted or degraded, and turn it back off +# when it's online again. It will also turn on the LED (or keep it on) if +# the drive becomes unavailable, unless the drive was in was a previously +# online state (online->unavail is a normal state transition during an +# autoreplace). +# +# This script requires that your enclosure be supported by the # Linux SCSI enclosure services (ses) driver. The script will do nothing # if you have no enclosure, or if your enclosure isn't supported. # @@ -13,76 +18,82 @@ # 0: enclosure led successfully set # 1: enclosure leds not not available # 2: enclosure leds administratively disabled -# 3: ZED built without libdevmapper +# 3: ZED didn't pass enclosure sysfs path +# 4: Enclosure sysfs path doesn't exist [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" . "${ZED_ZEDLET_DIR}/zed-functions.sh" -# ZEVENT_VDEV_UPATH will not be present if ZFS is not built with libdevmapper -[ -n "${ZEVENT_VDEV_UPATH}" ] || exit 3 +if [ ! -d /sys/class/enclosure ] ; then + exit 1 +fi if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then exit 2 fi -if [ ! -d /sys/class/enclosure ] ; then - exit 1 -fi +[ -n "${ZEVENT_VDEV_ENC_SYSFS_PATH}" ] || exit 3 + +[ -e "${ZEVENT_VDEV_ENC_SYSFS_PATH}/fault" ] || exit 4 # Turn on/off enclosure LEDs function led { - name=$1 + file="$1/fault" val=$2 # We want to check the current state first, since writing to the # 'fault' entry always always causes a SES command, even if the # current state is already what you want. - if [ -e /sys/block/$name/device/enclosure_device*/fault ] ; then - # We have to do some monkey business to deal with spaces in - # enclosure_device names. I've seen horrible things like this: - # - # '/sys/block/sdfw/device/enclosure_device:SLOT 43 41 /fault' - # - # ...so escape all spaces. - file=`ls /sys/block/$name/device/enclosure_device*/fault | sed 's/\s/\\ /g'` - - current=`cat "$file"` + current=$(cat "${file}") - # On some enclosures if you write 1 to fault, and read it back, - # it will return 2. Treat all non-zero values as 1 for - # simplicity. - if [ "$current" != "0" ] ; then - current=1 - fi + # On some enclosures if you write 1 to fault, and read it back, + # it will return 2. Treat all non-zero values as 1 for + # simplicity. + if [ "$current" != "0" ] ; then + current=1 + fi - if [ "$current" != "$val" ] ; then - # Set the value twice. I've seen enclosures that were - # flakey about setting it the first time. - echo $val > "$file" - echo $val > "$file" - fi + if [ "$current" != "$val" ] ; then + # Set the value twice. I've seen enclosures that were + # flakey about setting it the first time. + echo "$val" > "$file" + echo "$val" > "$file" fi } # Decide whether to turn on/off an LED based on the state # Pass in path name and fault string ("ONLINE"/"FAULTED"/"DEGRADED"...etc) +# +# We only turn on LEDs when a drive becomes FAULTED, DEGRADED, or UNAVAIL and +# only turn it on when it comes back ONLINE. All other states are ignored, and +# keep the previous LED state. function process { - # path=/dev/sda, fault= - - path=$1 + path="$1" fault=$2 - name=`basename $path` - - if [ -z "$name" ] ; then - return - fi - + prev=$3 if [ "$fault" == "FAULTED" ] || [ "$fault" == "DEGRADED" ] ; then - led $name 1 - else - led $name 0 + led "$path" 1 + elif [ "$fault" == "UNAVAIL" ] && [ "$prev" != "ONLINE" ] ; then + # For the most part, UNAVAIL should turn on the LED. However, + # during an autoreplace, we see our new drive go online, + # followed by our "old" drive going ONLINE->UNAVAIL. Since the + # "old" drive has the same slot information, we want to ignore + # the ONLINE->UNAVAIL event. + # + # NAME STATE READ WRITE CKSUM + # mypool3 DEGRADED 0 0 0 + # mirror-0 DEGRADED 0 0 0 + # A1 ONLINE 0 0 0 + # A2 ONLINE 0 880 0 + # replacing-3 UNAVAIL 0 0 0 + # old UNAVAIL 0 2.93K 0 corrupted data + # A3 ONLINE 0 0 156 (resilvering) + led "$path" 1 + elif [ "$fault" == "ONLINE" ] ; then + led "$path" 0 fi } -process "$ZEVENT_VDEV_UPATH" "$ZEVENT_VDEV_STATE_STR" +process "$ZEVENT_VDEV_ENC_SYSFS_PATH" "$ZEVENT_VDEV_STATE_STR" \ + "$ZEVENT_VDEV_LASTSTATE_STR" |