diff options
-rw-r--r-- | cmd/zed/Makefile.am | 6 | ||||
l--------- | cmd/zed/zed.d/checksum-notify.sh | 1 | ||||
-rwxr-xr-x[l---------] | cmd/zed/zed.d/data-notify.sh | 44 | ||||
-rwxr-xr-x | cmd/zed/zed.d/io-notify.sh | 64 | ||||
-rwxr-xr-x | cmd/zed/zed.d/statechange-notify.sh | 74 | ||||
-rw-r--r-- | cmd/zed/zed.d/zed.rc | 18 |
6 files changed, 125 insertions, 82 deletions
diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 101db83c3..1fe2047c0 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -61,23 +61,21 @@ zedexecdir = $(libexecdir)/zfs/zed.d dist_zedexec_SCRIPTS = \ zed.d/all-debug.sh \ zed.d/all-syslog.sh \ - zed.d/checksum-notify.sh \ zed.d/data-notify.sh \ zed.d/generic-notify.sh \ - zed.d/io-notify.sh \ zed.d/resilver_finish-notify.sh \ zed.d/scrub_finish-notify.sh \ zed.d/statechange-led.sh \ + zed.d/statechange-notify.sh \ zed.d/vdev_clear-led.sh zedconfdefaults = \ all-syslog.sh \ - checksum-notify.sh \ data-notify.sh \ - io-notify.sh \ resilver_finish-notify.sh \ scrub_finish-notify.sh \ statechange-led.sh \ + statechange-notify.sh \ vdev_clear-led.sh install-data-hook: diff --git a/cmd/zed/zed.d/checksum-notify.sh b/cmd/zed/zed.d/checksum-notify.sh deleted file mode 120000 index 900873807..000000000 --- a/cmd/zed/zed.d/checksum-notify.sh +++ /dev/null @@ -1 +0,0 @@ -io-notify.sh
\ No newline at end of file diff --git a/cmd/zed/zed.d/data-notify.sh b/cmd/zed/zed.d/data-notify.sh index 900873807..639b459bd 120000..100755 --- a/cmd/zed/zed.d/data-notify.sh +++ b/cmd/zed/zed.d/data-notify.sh @@ -1 +1,43 @@ -io-notify.sh
\ No newline at end of file +#!/bin/sh +# +# Send notification in response to a DATA error. +# +# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given +# class/pool/[vdev] combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool/[vdev]. +# +# Exit codes: +# 0: notification sent +# 1: notification failed +# 2: notification not configured +# 3: notification suppressed +# 9: internal error + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +[ -n "${ZEVENT_POOL}" ] || exit 9 +[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 +[ -n "${ZED_NOTIFY_DATA}" ] || exit 3 + +rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify" +zed_rate_limit "${rate_limit_tag}" || exit 3 + +umask 077 +note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" +note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" +{ + echo "ZFS has detected a data error:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + echo " error: ${ZEVENT_ZIO_ERR}" + echo " objid: ${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}" + echo " pool: ${ZEVENT_POOL}" +} > "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/io-notify.sh b/cmd/zed/zed.d/io-notify.sh deleted file mode 100755 index 3ce918ad7..000000000 --- a/cmd/zed/zed.d/io-notify.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/sh -# -# Send notification in response to a CHECKSUM, DATA, or IO error. -# -# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given -# class/pool/[vdev] combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool/[vdev]. -# -# Exit codes: -# 0: notification sent -# 1: notification failed -# 2: notification not configured -# 3: notification suppressed -# 9: internal error - -[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" -. "${ZED_ZEDLET_DIR}/zed-functions.sh" - -[ -n "${ZEVENT_POOL}" ] || exit 9 -[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 - -if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \ - && [ "${ZEVENT_SUBCLASS}" != "data" ] \ - && [ "${ZEVENT_SUBCLASS}" != "io" ]; then - zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" - exit 9 -fi - -rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify" -zed_rate_limit "${rate_limit_tag}" || exit 3 - -umask 077 -note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" -note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" -{ - [ "${ZEVENT_SUBCLASS}" = "io" ] && article="an" || article="a" - - echo "ZFS has detected ${article} ${ZEVENT_SUBCLASS} error:" - echo - echo " eid: ${ZEVENT_EID}" - echo " class: ${ZEVENT_SUBCLASS}" - echo " host: $(hostname)" - echo " time: ${ZEVENT_TIME_STRING}" - - [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" - [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}" - [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" - - [ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \ - && echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}" - - [ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \ - && echo " read: ${ZEVENT_VDEV_READ_ERRORS}" - - [ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \ - && echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}" - - echo " pool: ${ZEVENT_POOL}" - -} > "${note_pathname}" - -zed_notify "${note_subject}" "${note_pathname}"; rv=$? -rm -f "${note_pathname}" -exit "${rv}" diff --git a/cmd/zed/zed.d/statechange-notify.sh b/cmd/zed/zed.d/statechange-notify.sh new file mode 100755 index 000000000..eba4ef9d8 --- /dev/null +++ b/cmd/zed/zed.d/statechange-notify.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License Version 1.0 (CDDL-1.0). +# You can obtain a copy of the license from the top-level file +# "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. +# You may not use this file except in compliance with the license. +# +# CDDL HEADER END +# + +# +# Send notification in response to a fault induced statechange +# +# ZEVENT_SUBCLASS: 'statechange' +# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED' or 'REMOVED' +# +# Exit codes: +# 0: notification sent +# 1: notification failed +# 2: notification not configured +# 3: statechange not relevant +# 4: statechange string missing (unexpected) + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +[ -n "${ZEVENT_VDEV_STATE_STR}" ] || exit 4 + +if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \ + && [ "${ZEVENT_VDEV_STATE_STR}" != "DEGRADED" ] \ + && [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ]; then + exit 3 +fi + +umask 077 +note_subject="ZFS device fault for pool ${ZEVENT_POOL_GUID} on $(hostname)" +note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" +{ + if [ "${ZEVENT_VDEV_STATE_STR}" == "FAULTED" ] ; then + echo "The number of I/O errors associated with a ZFS device exceeded" + echo "acceptable levels. ZFS has marked the device as faulted." + elif [ "${ZEVENT_VDEV_STATE_STR}" == "DEGRADED" ] ; then + echo "The number of checksum errors associated with a ZFS device" + echo "exceeded acceptable levels. ZFS has marked the device as" + echo "degraded." + else + echo "ZFS has detected that a device was removed." + fi + + echo + echo " impact: Fault tolerance of the pool may be compromised." + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " state: ${ZEVENT_VDEV_STATE_STR}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + + [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" + [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}" + [ -n "${ZEVENT_VDEV_PHYSPATH}" ] && echo " vphys: ${ZEVENT_VDEV_PHYSPATH}" + [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" + [ -n "${ZEVENT_VDEV_DEVID}" ] && echo " devid: ${ZEVENT_VDEV_DEVID}" + + echo " pool: ${ZEVENT_POOL_GUID}" + +} > "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? + +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 2dce04828..a1dd33704 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -51,6 +51,12 @@ #ZED_NOTIFY_VERBOSE=0 ## +# Send notifications for 'ereport.fs.zfs.data' events. +# Disabled by default +# +#ZED_NOTIFY_DATA=1 + +## # Pushbullet access token. # This grants full access to your account -- protect it accordingly! # <https://www.pushbullet.com/get-started> @@ -74,18 +80,6 @@ #ZED_RUNDIR="/var/run" ## -# Replace a device with a hot spare after N checksum errors are detected. -# Disabled by default; uncomment to enable. -# -#ZED_SPARE_ON_CHECKSUM_ERRORS=10 - -## -# Replace a device with a hot spare after N I/O errors are detected. -# Disabled by default; uncomment to enable. -# -#ZED_SPARE_ON_IO_ERRORS=1 - -## # Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for # device mapper and multipath devices as well. Your enclosure must be # supported by the Linux SES driver for this to work. |