aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2016-11-10 14:52:59 -0700
committerBrian Behlendorf <[email protected]>2016-11-10 13:52:59 -0800
commit0df15db98f185c948239c236c9e51d7ce14adb71 (patch)
treee36ffbd0bece284f47c4784818176d032c04fc83
parent32dec7bd1a0bce6039a65768ed8552164a0130bf (diff)
Add a statechange notify zedlet
Now that ZED has internal fault diagnosis and the statechange event is generated for faulted states, we can replace the io-notify and checksum-notify zedlets with one based on statechange. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #5383
-rw-r--r--cmd/zed/Makefile.am6
l---------cmd/zed/zed.d/checksum-notify.sh1
-rwxr-xr-x[l---------]cmd/zed/zed.d/data-notify.sh44
-rwxr-xr-xcmd/zed/zed.d/io-notify.sh64
-rwxr-xr-xcmd/zed/zed.d/statechange-notify.sh74
-rw-r--r--cmd/zed/zed.d/zed.rc18
6 files changed, 125 insertions, 82 deletions
diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am
index 101db83c3..1fe2047c0 100644
--- a/cmd/zed/Makefile.am
+++ b/cmd/zed/Makefile.am
@@ -61,23 +61,21 @@ zedexecdir = $(libexecdir)/zfs/zed.d
dist_zedexec_SCRIPTS = \
zed.d/all-debug.sh \
zed.d/all-syslog.sh \
- zed.d/checksum-notify.sh \
zed.d/data-notify.sh \
zed.d/generic-notify.sh \
- zed.d/io-notify.sh \
zed.d/resilver_finish-notify.sh \
zed.d/scrub_finish-notify.sh \
zed.d/statechange-led.sh \
+ zed.d/statechange-notify.sh \
zed.d/vdev_clear-led.sh
zedconfdefaults = \
all-syslog.sh \
- checksum-notify.sh \
data-notify.sh \
- io-notify.sh \
resilver_finish-notify.sh \
scrub_finish-notify.sh \
statechange-led.sh \
+ statechange-notify.sh \
vdev_clear-led.sh
install-data-hook:
diff --git a/cmd/zed/zed.d/checksum-notify.sh b/cmd/zed/zed.d/checksum-notify.sh
deleted file mode 120000
index 900873807..000000000
--- a/cmd/zed/zed.d/checksum-notify.sh
+++ /dev/null
@@ -1 +0,0 @@
-io-notify.sh \ No newline at end of file
diff --git a/cmd/zed/zed.d/data-notify.sh b/cmd/zed/zed.d/data-notify.sh
index 900873807..639b459bd 120000..100755
--- a/cmd/zed/zed.d/data-notify.sh
+++ b/cmd/zed/zed.d/data-notify.sh
@@ -1 +1,43 @@
-io-notify.sh \ No newline at end of file
+#!/bin/sh
+#
+# Send notification in response to a DATA error.
+#
+# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
+# class/pool/[vdev] combination. This protects against spamming the recipient
+# should multiple events occur together in time for the same pool/[vdev].
+#
+# Exit codes:
+# 0: notification sent
+# 1: notification failed
+# 2: notification not configured
+# 3: notification suppressed
+# 9: internal error
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+[ -n "${ZEVENT_POOL}" ] || exit 9
+[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
+[ -n "${ZED_NOTIFY_DATA}" ] || exit 3
+
+rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
+zed_rate_limit "${rate_limit_tag}" || exit 3
+
+umask 077
+note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
+note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+{
+ echo "ZFS has detected a data error:"
+ echo
+ echo " eid: ${ZEVENT_EID}"
+ echo " class: ${ZEVENT_SUBCLASS}"
+ echo " host: $(hostname)"
+ echo " time: ${ZEVENT_TIME_STRING}"
+ echo " error: ${ZEVENT_ZIO_ERR}"
+ echo " objid: ${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}"
+ echo " pool: ${ZEVENT_POOL}"
+} > "${note_pathname}"
+
+zed_notify "${note_subject}" "${note_pathname}"; rv=$?
+rm -f "${note_pathname}"
+exit "${rv}"
diff --git a/cmd/zed/zed.d/io-notify.sh b/cmd/zed/zed.d/io-notify.sh
deleted file mode 100755
index 3ce918ad7..000000000
--- a/cmd/zed/zed.d/io-notify.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/sh
-#
-# Send notification in response to a CHECKSUM, DATA, or IO error.
-#
-# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
-# class/pool/[vdev] combination. This protects against spamming the recipient
-# should multiple events occur together in time for the same pool/[vdev].
-#
-# Exit codes:
-# 0: notification sent
-# 1: notification failed
-# 2: notification not configured
-# 3: notification suppressed
-# 9: internal error
-
-[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
-. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-
-[ -n "${ZEVENT_POOL}" ] || exit 9
-[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
-
-if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \
- && [ "${ZEVENT_SUBCLASS}" != "data" ] \
- && [ "${ZEVENT_SUBCLASS}" != "io" ]; then
- zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
- exit 9
-fi
-
-rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
-zed_rate_limit "${rate_limit_tag}" || exit 3
-
-umask 077
-note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
-note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
-{
- [ "${ZEVENT_SUBCLASS}" = "io" ] && article="an" || article="a"
-
- echo "ZFS has detected ${article} ${ZEVENT_SUBCLASS} error:"
- echo
- echo " eid: ${ZEVENT_EID}"
- echo " class: ${ZEVENT_SUBCLASS}"
- echo " host: $(hostname)"
- echo " time: ${ZEVENT_TIME_STRING}"
-
- [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
- [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
- [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
-
- [ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \
- && echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}"
-
- [ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \
- && echo " read: ${ZEVENT_VDEV_READ_ERRORS}"
-
- [ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \
- && echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}"
-
- echo " pool: ${ZEVENT_POOL}"
-
-} > "${note_pathname}"
-
-zed_notify "${note_subject}" "${note_pathname}"; rv=$?
-rm -f "${note_pathname}"
-exit "${rv}"
diff --git a/cmd/zed/zed.d/statechange-notify.sh b/cmd/zed/zed.d/statechange-notify.sh
new file mode 100755
index 000000000..eba4ef9d8
--- /dev/null
+++ b/cmd/zed/zed.d/statechange-notify.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License Version 1.0 (CDDL-1.0).
+# You can obtain a copy of the license from the top-level file
+# "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
+# You may not use this file except in compliance with the license.
+#
+# CDDL HEADER END
+#
+
+#
+# Send notification in response to a fault induced statechange
+#
+# ZEVENT_SUBCLASS: 'statechange'
+# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED' or 'REMOVED'
+#
+# Exit codes:
+# 0: notification sent
+# 1: notification failed
+# 2: notification not configured
+# 3: statechange not relevant
+# 4: statechange string missing (unexpected)
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+[ -n "${ZEVENT_VDEV_STATE_STR}" ] || exit 4
+
+if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \
+ && [ "${ZEVENT_VDEV_STATE_STR}" != "DEGRADED" ] \
+ && [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ]; then
+ exit 3
+fi
+
+umask 077
+note_subject="ZFS device fault for pool ${ZEVENT_POOL_GUID} on $(hostname)"
+note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+{
+ if [ "${ZEVENT_VDEV_STATE_STR}" == "FAULTED" ] ; then
+ echo "The number of I/O errors associated with a ZFS device exceeded"
+ echo "acceptable levels. ZFS has marked the device as faulted."
+ elif [ "${ZEVENT_VDEV_STATE_STR}" == "DEGRADED" ] ; then
+ echo "The number of checksum errors associated with a ZFS device"
+ echo "exceeded acceptable levels. ZFS has marked the device as"
+ echo "degraded."
+ else
+ echo "ZFS has detected that a device was removed."
+ fi
+
+ echo
+ echo " impact: Fault tolerance of the pool may be compromised."
+ echo " eid: ${ZEVENT_EID}"
+ echo " class: ${ZEVENT_SUBCLASS}"
+ echo " state: ${ZEVENT_VDEV_STATE_STR}"
+ echo " host: $(hostname)"
+ echo " time: ${ZEVENT_TIME_STRING}"
+
+ [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
+ [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
+ [ -n "${ZEVENT_VDEV_PHYSPATH}" ] && echo " vphys: ${ZEVENT_VDEV_PHYSPATH}"
+ [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
+ [ -n "${ZEVENT_VDEV_DEVID}" ] && echo " devid: ${ZEVENT_VDEV_DEVID}"
+
+ echo " pool: ${ZEVENT_POOL_GUID}"
+
+} > "${note_pathname}"
+
+zed_notify "${note_subject}" "${note_pathname}"; rv=$?
+
+rm -f "${note_pathname}"
+exit "${rv}"
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index 2dce04828..a1dd33704 100644
--- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc
@@ -51,6 +51,12 @@
#ZED_NOTIFY_VERBOSE=0
##
+# Send notifications for 'ereport.fs.zfs.data' events.
+# Disabled by default
+#
+#ZED_NOTIFY_DATA=1
+
+##
# Pushbullet access token.
# This grants full access to your account -- protect it accordingly!
# <https://www.pushbullet.com/get-started>
@@ -74,18 +80,6 @@
#ZED_RUNDIR="/var/run"
##
-# Replace a device with a hot spare after N checksum errors are detected.
-# Disabled by default; uncomment to enable.
-#
-#ZED_SPARE_ON_CHECKSUM_ERRORS=10
-
-##
-# Replace a device with a hot spare after N I/O errors are detected.
-# Disabled by default; uncomment to enable.
-#
-#ZED_SPARE_ON_IO_ERRORS=1
-
-##
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
# device mapper and multipath devices as well. Your enclosure must be
# supported by the Linux SES driver for this to work.