aboutsummaryrefslogtreecommitdiffstats
path: root/cmd/zed/zed.d
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/zed/zed.d')
-rw-r--r--cmd/zed/zed.d/README30
-rwxr-xr-xcmd/zed/zed.d/all-debug.sh19
-rwxr-xr-xcmd/zed/zed.d/all-syslog.sh9
-rwxr-xr-xcmd/zed/zed.d/data-email.sh88
-rwxr-xr-xcmd/zed/zed.d/generic-email.sh90
-rwxr-xr-xcmd/zed/zed.d/io-email.sh95
-rwxr-xr-xcmd/zed/zed.d/io-spare.sh267
-rwxr-xr-xcmd/zed/zed.d/scrub.finish-email.sh92
-rw-r--r--cmd/zed/zed.d/zed-functions.sh230
-rw-r--r--cmd/zed/zed.d/zed.rc44
10 files changed, 627 insertions, 337 deletions
diff --git a/cmd/zed/zed.d/README b/cmd/zed/zed.d/README
new file mode 100644
index 000000000..b4cb11514
--- /dev/null
+++ b/cmd/zed/zed.d/README
@@ -0,0 +1,30 @@
+Shell scripts are the recommended choice for ZEDLETs that mostly call
+other utilities and do relatively little data manipulation.
+
+Shell scripts MUST work on both bash and dash.
+
+Shell scripts MUST run cleanly through ShellCheck:
+ http://www.shellcheck.net/
+
+General functions reside in "zed-functions.sh". Use them where applicable.
+
+Additional references that may be of use:
+
+ Google Shell Style Guide
+ https://google-styleguide.googlecode.com/svn/trunk/shell.xml
+
+ Dash as /bin/sh
+ https://wiki.ubuntu.com/DashAsBinSh
+
+ Common shell script mistakes
+ http://www.pixelbeat.org/programming/shell_script_mistakes.html
+
+ Filenames and Pathnames in Shell: How to do it Correctly
+ http://www.dwheeler.com/essays/filenames-in-shell.html
+
+ Autoconf: Portable Shell Programming
+ https://www.gnu.org/software/autoconf/manual/autoconf.html#Portable-Shell
+
+Please BE CONSISTENT with the existing style, check for errors,
+minimize dependencies where possible, try to be portable,
+and comment anything non-obvious. Festina lente.
diff --git a/cmd/zed/zed.d/all-debug.sh b/cmd/zed/zed.d/all-debug.sh
index aa20ef268..057e39b50 100755
--- a/cmd/zed/zed.d/all-debug.sh
+++ b/cmd/zed/zed.d/all-debug.sh
@@ -2,16 +2,23 @@
#
# Log all environment variables to ZED_DEBUG_LOG.
#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
+# This can be a useful aid when developing/debugging ZEDLETs since it shows the
+# environment variables defined for each zevent.
-# Override the default umask to restrict access to a newly-created logfile.
-umask 077
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+: "${ZED_DEBUG_LOG:="${TMPDIR:="/tmp"}/zed.debug.log"}"
-# Append stdout to the logfile after obtaining an advisory lock.
-exec >> "${ZED_DEBUG_LOG:=/tmp/zed.debug.log}"
-flock -x 1
+lockfile="$(basename -- "${ZED_DEBUG_LOG}").lock"
+
+umask 077
+zed_lock "${lockfile}"
+exec >> "${ZED_DEBUG_LOG}"
printenv | sort
echo
+exec >&-
+zed_unlock "${lockfile}"
exit 0
diff --git a/cmd/zed/zed.d/all-syslog.sh b/cmd/zed/zed.d/all-syslog.sh
index acf9e83bd..b34d17cef 100755
--- a/cmd/zed/zed.d/all-syslog.sh
+++ b/cmd/zed/zed.d/all-syslog.sh
@@ -1,11 +1,10 @@
#!/bin/sh
#
# Log the zevent via syslog.
-#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
-logger -t "${ZED_SYSLOG_TAG:=zed}" -p "${ZED_SYSLOG_PRIORITY:=daemon.notice}" \
- eid="${ZEVENT_EID}" class="${ZEVENT_SUBCLASS}" \
- "${ZEVENT_POOL:+pool=$ZEVENT_POOL}"
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \
+ "${ZEVENT_POOL:+"pool=${ZEVENT_POOL}"}"
exit 0
diff --git a/cmd/zed/zed.d/data-email.sh b/cmd/zed/zed.d/data-email.sh
index 543b8fe55..2dae8ff6b 100755
--- a/cmd/zed/zed.d/data-email.sh
+++ b/cmd/zed/zed.d/data-email.sh
@@ -1,81 +1,53 @@
#!/bin/sh
#
-# Send email to ZED_EMAIL in response to a DATA zevent.
-# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given
-# class/pool combination. This protects against spamming the recipient
-# should multiple events occur together in time for the same pool.
+# Send email to ZED_EMAIL in response to a DATA error.
+#
+# Only one email per ZED_EMAIL_INTERVAL_SECS will be sent for a given
+# class/pool combination. This protects against spamming the recipient
+# should multiple events occur together in time for the same pool.
+#
# Exit codes:
# 0: email sent
# 1: email failed
-# 2: email suppressed
-# 3: missing executable
-# 4: unsupported event class
-# 5: internal error
-# State File Format:
-# POOL;TIME_OF_LAST_EMAIL
-#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
+# 2: email not configured
+# 3: email suppressed
+# 9: internal error
-test -n "${ZEVENT_POOL}" || exit 5
-test -n "${ZEVENT_SUBCLASS}" || exit 5
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-if test "${ZEVENT_SUBCLASS}" != "data"; then \
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\"
- exit 4
-fi
+[ -n "${ZED_EMAIL}" ] || exit 2
-# Only send email if ZED_EMAIL has been configured.
-test -n "${ZED_EMAIL}" || exit 2
+[ -n "${ZEVENT_POOL}" ] || exit 9
+[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
-# Ensure requisite executables are installed.
-if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" not installed
- exit 3
+if [ "${ZEVENT_SUBCLASS}" != "data" ]; then \
+ zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
+ exit 9
fi
-NAME="zed.${ZEVENT_SUBCLASS}.email"
-LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock"
-STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state"
+zed_check_cmd "mail" || exit 9
-# Obtain lock to ensure mutual exclusion for accessing state.
-exec 8> "${LOCKFILE}"
-flock -x 8
+zed_rate_limit "${ZEVENT_POOL};${ZEVENT_SUBCLASS};email" || exit 3
-# Query state for last time email was sent for this pool.
-TIME_NOW=`date +%s`
-TIME_LAST=`egrep "^${ZEVENT_POOL};" "${STATEFILE}" 2>/dev/null | cut -d ";" -f2`
-if test -n "${TIME_LAST}"; then
- TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"`
- if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then
- exit 2
- fi
-fi
-
-"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \
- "${ZED_EMAIL}" <<EOF
+umask 077
+email_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
+email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+cat > "${email_pathname}" <<EOF
A ZFS ${ZEVENT_SUBCLASS} error has been detected:
eid: ${ZEVENT_EID}
- host: `hostname`
+ host: $(hostname)
time: ${ZEVENT_TIME_STRING}
pool: ${ZEVENT_POOL}
EOF
-MAIL_STATUS=$?
-# Update state.
-egrep -v "^${ZEVENT_POOL};" "${STATEFILE}" 2>/dev/null > "${STATEFILE}.$$"
-echo "${ZEVENT_POOL};${TIME_NOW}" >> "${STATEFILE}.$$"
-mv -f "${STATEFILE}.$$" "${STATEFILE}"
+mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}"
+mail_status=$?
-if test "${MAIL_STATUS}" -ne 0; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
- exit 1
+if [ "${mail_status}" -ne 0 ]; then
+ zed_log_msg "mail exit=${mail_status}"
+ exit 1
fi
-
+rm -f "${email_pathname}"
exit 0
diff --git a/cmd/zed/zed.d/generic-email.sh b/cmd/zed/zed.d/generic-email.sh
index 357aedee5..ad022e034 100755
--- a/cmd/zed/zed.d/generic-email.sh
+++ b/cmd/zed/zed.d/generic-email.sh
@@ -1,59 +1,59 @@
#!/bin/sh
#
# Send email to ZED_EMAIL in response to a given zevent.
-# This is a generic script than can be symlinked to a file in the zed
-# enabled-scripts directory in order to have email sent when a particular
-# class of zevents occurs. The symlink filename must begin with the zevent
-# (sub)class string (eg, "probe_failure-email.sh" for the "probe_failure"
-# subclass). Refer to the zed(8) manpage for details.
+#
+# This is a generic script than can be symlinked to a file in the
+# enabled-zedlets directory to have an email sent when a particular class of
+# zevents occurs. The symlink filename must begin with the zevent (sub)class
+# string (e.g., "probe_failure-email.sh" for the "probe_failure" subclass).
+# Refer to the zed(8) manpage for details.
+#
# Exit codes:
# 0: email sent
# 1: email failed
-# 2: email suppressed
-# 3: missing executable
-#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
+# 2: email not configured
+# 3: email suppressed
-# Only send email if ZED_EMAIL has been configured.
-test -n "${ZED_EMAIL}" || exit 2
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-# Ensure requisite executables are installed.
-if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" not installed
- exit 3
-fi
+[ -n "${ZED_EMAIL}" ] || exit 2
+
+# Rate-limit the message based in part on the filename.
+#
+rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")"
+rate_limit_interval="${ZED_EMAIL_INTERVAL_SECS}"
+zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3
-# Override the default umask to restrict access to the msgbody tmpfile.
umask 077
+pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}"
+host_str=" on $(hostname)"
+email_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}"
+email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+{
+ echo "ZFS has posted the following event:"
+ echo
+ echo " eid: ${ZEVENT_EID}"
+ echo " class: ${ZEVENT_SUBCLASS}"
+ echo " host: $(hostname)"
+ echo " time: ${ZEVENT_TIME_STRING}"
-SUBJECT="ZFS ${ZEVENT_SUBCLASS} event"
-test -n "${ZEVENT_POOL}" && SUBJECT="${SUBJECT} for ${ZEVENT_POOL}"
-SUBJECT="${SUBJECT} on `hostname`"
+ if [ -n "${ZEVENT_VDEV_PATH}" ]; then
+ echo " vpath: ${ZEVENT_VDEV_PATH}"
+ [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
+ fi
-MSGBODY="${TMPDIR:=/tmp}/`basename \"$0\"`.$$"
-{
- echo "A ZFS ${ZEVENT_SUBCLASS} event has been posted:"
- echo
- echo " eid: ${ZEVENT_EID}"
- echo " host: `hostname`"
- echo " time: ${ZEVENT_TIME_STRING}"
- test -n "${ZEVENT_VDEV_TYPE}" -a -n "${ZEVENT_VDEV_PATH}" && \
- echo " vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}"
- test -n "${ZEVENT_POOL}" -a -x "${ZPOOL}" && \
- "${ZPOOL}" status "${ZEVENT_POOL}"
-} > "${MSGBODY}"
-
-test -f "${MSGBODY}" && "${MAIL}" -s "${SUBJECT}" "${ZED_EMAIL}" < "${MSGBODY}"
-MAIL_STATUS=$?
-rm -f "${MSGBODY}"
-
-if test "${MAIL_STATUS}" -ne 0; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
- exit 1
-fi
+ [ -n "${ZEVENT_POOL}" ] && [ -x "${ZPOOL}" ] \
+ && "${ZPOOL}" status "${ZEVENT_POOL}"
+} > "${email_pathname}"
+
+mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}"
+mail_status=$?
+
+if [ "${mail_status}" -ne 0 ]; then
+ zed_log_msg "mail exit=${mail_status}"
+ exit 1
+fi
+rm -f "${email_pathname}"
exit 0
diff --git a/cmd/zed/zed.d/io-email.sh b/cmd/zed/zed.d/io-email.sh
index 9edbe6670..1854b1593 100755
--- a/cmd/zed/zed.d/io-email.sh
+++ b/cmd/zed/zed.d/io-email.sh
@@ -1,86 +1,57 @@
#!/bin/sh
#
-# Send email to ZED_EMAIL in response to a CHECKSUM or IO zevent.
-# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given
-# class/pool/vdev combination. This protects against spamming the recipient
-# should multiple events occur together in time for the same pool/device.
+# Send email to ZED_EMAIL in response to a CHECKSUM or IO error.
+#
+# Only one email per ZED_EMAIL_INTERVAL_SECS will be sent for a given
+# class/pool/vdev combination. This protects against spamming the recipient
+# should multiple events occur together in time for the same pool/device.
+#
# Exit codes:
# 0: email sent
# 1: email failed
-# 2: email suppressed
-# 3: missing executable
-# 4: unsupported event class
-# 5: internal error
-# State File Format:
-# POOL;VDEV_PATH;TIME_OF_LAST_EMAIL
-#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
+# 2: email not configured
+# 3: email suppressed
+# 9: internal error
-test -n "${ZEVENT_POOL}" || exit 5
-test -n "${ZEVENT_SUBCLASS}" || exit 5
-test -n "${ZEVENT_VDEV_PATH}" || exit 5
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-if test "${ZEVENT_SUBCLASS}" != "checksum" \
- -a "${ZEVENT_SUBCLASS}" != "io"; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\"
- exit 4
-fi
+[ -n "${ZED_EMAIL}" ] || exit 2
-# Only send email if ZED_EMAIL has been configured.
-test -n "${ZED_EMAIL}" || exit 2
+[ -n "${ZEVENT_POOL}" ] || exit 9
+[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
+[ -n "${ZEVENT_VDEV_PATH}" ] || exit 9
-# Ensure requisite executables are installed.
-if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" not installed
- exit 3
+if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \
+ && [ "${ZEVENT_SUBCLASS}" != "io" ]; then
+ zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
+ exit 9
fi
-NAME="zed.${ZEVENT_SUBCLASS}.email"
-LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock"
-STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state"
+zed_check_cmd "mail" || exit 9
-# Obtain lock to ensure mutual exclusion for accessing state.
-exec 8> "${LOCKFILE}"
-flock -x 8
+zed_rate_limit "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${ZEVENT_SUBCLASS};email" \
+ || exit 3
-# Query state for last time email was sent for this pool/vdev.
-TIME_NOW=`date +%s`
-TIME_LAST=`egrep "^${ZEVENT_POOL};${ZEVENT_VDEV_PATH};" "${STATEFILE}" \
- 2>/dev/null | cut -d ";" -f3`
-if test -n "${TIME_LAST}"; then
- TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"`
- if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then
- exit 2
- fi
-fi
-
-"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \
- "${ZED_EMAIL}" <<EOF
+umask 077
+email_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
+email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+cat > "${email_pathname}" <<EOF
A ZFS ${ZEVENT_SUBCLASS} error has been detected:
eid: ${ZEVENT_EID}
- host: `hostname`
+ host: $(hostname)
time: ${ZEVENT_TIME_STRING}
pool: ${ZEVENT_POOL}
vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}
EOF
-MAIL_STATUS=$?
-# Update state.
-egrep -v "^${ZEVENT_POOL};${ZEVENT_VDEV_PATH};" "${STATEFILE}" \
- 2>/dev/null > "${STATEFILE}.$$"
-echo "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${TIME_NOW}" >> "${STATEFILE}.$$"
-mv -f "${STATEFILE}.$$" "${STATEFILE}"
+mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}"
+mail_status=$?
-if test "${MAIL_STATUS}" -ne 0; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
- exit 1
+if [ "${mail_status}" -ne 0 ]; then
+ zed_log_msg "mail exit=${mail_status}"
+ exit 1
fi
-
+rm -f "${email_pathname}"
exit 0
diff --git a/cmd/zed/zed.d/io-spare.sh b/cmd/zed/zed.d/io-spare.sh
index b64b2a9f1..9667dedcb 100755
--- a/cmd/zed/zed.d/io-spare.sh
+++ b/cmd/zed/zed.d/io-spare.sh
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Replace a device with a hot spare in response to IO or checksum errors.
+# Replace a device with a hot spare in response to IO or CHECKSUM errors.
# The following actions will be performed automatically when the number
# of errors exceed the limit set by ZED_SPARE_ON_IO_ERRORS or
# ZED_SPARE_ON_CHECKSUM_ERRORS.
@@ -21,106 +21,171 @@
# the majority of the expected hot spare functionality.
#
# Exit codes:
-# 0: replaced by hot spare
-# 1: no hot spare device available
-# 2: hot sparing disabled
-# 3: already faulted or degraded
-# 4: unsupported event class
-# 5: internal error
+# 0: hot spare replacement successful
+# 1: hot spare device not available
+# 2: hot sparing disabled or threshold not reached
+# 3: device already faulted or degraded
+# 9: internal error
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+# Disabled by default. Enable in the zed.rc file.
+: "${ZED_SPARE_ON_CHECKSUM_ERRORS:=0}"
+: "${ZED_SPARE_ON_IO_ERRORS:=0}"
+
+
+# query_vdev_status (pool, vdev)
+#
+# Given a [pool] and [vdev], return the matching vdev path & status on stdout.
+#
+# Warning: This function does not handle the case of [pool] or [vdev]
+# containing whitespace. Beware of ShellCheck SC2046. Caveat emptor.
+#
+# Arguments
+# pool: pool name
+# vdev: virtual device name
+#
+# StdOut
+# arg1: vdev pathname
+# arg2: vdev status
#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
-
-test -n "${ZEVENT_POOL}" || exit 5
-test -n "${ZEVENT_SUBCLASS}" || exit 5
-test -n "${ZEVENT_VDEV_PATH}" || exit 5
-test -n "${ZEVENT_VDEV_GUID}" || exit 5
-
-# Defaults to disabled, enable in the zed.rc file.
-ZED_SPARE_ON_IO_ERRORS=${ZED_SPARE_ON_IO_ERRORS:-0}
-ZED_SPARE_ON_CHECKSUM_ERRORS=${ZED_SPARE_ON_CHECKSUM_ERRORS:-0}
-
-if [ ${ZED_SPARE_ON_IO_ERRORS} -eq 0 -a \
- ${ZED_SPARE_ON_CHECKSUM_ERRORS} -eq 0 ]; then
- exit 2
-fi
-
-# A lock file is used to serialize execution.
-ZED_LOCKDIR=${ZED_LOCKDIR:-/var/lock}
-LOCKFILE="${ZED_LOCKDIR}/zed.spare.lock"
-
-exec 8> "${LOCKFILE}"
-flock -x 8
-
-# Given a <pool> and <device> return the status, (ONLINE, FAULTED, etc...).
-vdev_status() {
- local POOL=$1
- local VDEV=`basename $2`
- local T=' ' # tab character since '\t' isn't portable
-
- ${ZPOOL} status ${POOL} | sed -n -e \
- "s,^[ $T]*\(.*$VDEV\(-part[0-9]\+\)\?\)[ $T]*\([A-Z]\+\).*,\1 \3,p"
- return 0
+query_vdev_status()
+{
+ local pool="$1"
+ local vdev="$2"
+ local t
+
+ vdev="$(basename -- "${vdev}")"
+ ([ -n "${pool}" ] && [ -n "${vdev}" ]) || return
+ t="$(printf '\t')"
+
+ "${ZPOOL}" status "${pool}" 2>/dev/null | sed -n -e \
+ "s,^[ $t]*\(.*${vdev}\(-part[0-9]\+\)\?\)[ $t]*\([A-Z]\+\).*,\1 \3,p" \
+ | tail -1
}
-# Fault devices after N I/O errors.
-if [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.io" ]; then
- ERRORS=`expr ${ZEVENT_VDEV_READ_ERRORS} + ${ZEVENT_VDEV_WRITE_ERRORS}`
-
- if [ ${ZED_SPARE_ON_IO_ERRORS} -gt 0 -a \
- ${ERRORS} -ge ${ZED_SPARE_ON_IO_ERRORS} ]; then
- ACTION="fault"
- fi
-# Degrade devices after N checksum errors.
-elif [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.checksum" ]; then
- ERRORS=${ZEVENT_VDEV_CKSUM_ERRORS}
-
- if [ ${ZED_SPARE_ON_CHECKSUM_ERRORS} -gt 0 -a \
- ${ERRORS} -ge ${ZED_SPARE_ON_CHECKSUM_ERRORS} ]; then
- ACTION="degrade"
- fi
-else
- ACTION=
-fi
-
-if [ -n "${ACTION}" ]; then
-
- # Device is already FAULTED or DEGRADED
- set -- `vdev_status ${ZEVENT_POOL} ${ZEVENT_VDEV_PATH}`
- ZEVENT_VDEV_PATH_FOUND=$1
- STATUS=$2
- if [ "${STATUS}" = "FAULTED" -o "${STATUS}" = "DEGRADED" ]; then
- exit 3
- fi
-
- # Step 1) FAULT or DEGRADE the device
- #
- ${ZINJECT} -d ${ZEVENT_VDEV_GUID} -A ${ACTION} ${ZEVENT_POOL}
-
- # Step 2) Set the SES fault beacon.
- #
- # XXX: Set the 'fault' or 'ident' beacon for the device. This can
- # be done through the sg_ses utility, the only hard part is to map
- # the sd device to its corresponding enclosure and slot. We may
- # be able to leverage the existing vdev_id scripts for this.
- #
- # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3
- # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3
-
- # Step 3) Replace the device with a hot spare.
- #
- # Round robin through the spares selecting those which are available.
- #
- for SPARE in ${ZEVENT_VDEV_SPARE_PATHS}; do
- set -- `vdev_status ${ZEVENT_POOL} ${SPARE}`
- SPARE_VDEV_FOUND=$1
- STATUS=$2
- if [ "${STATUS}" = "AVAIL" ]; then
- ${ZPOOL} replace ${ZEVENT_POOL} \
- ${ZEVENT_VDEV_GUID} ${SPARE_VDEV_FOUND} && exit 0
- fi
- done
-
- exit 1
-fi
-
-exit 4
+
+# main
+#
+# Arguments
+# none
+#
+# Return
+# see above
+#
+main()
+{
+ local num_errors
+ local action
+ local lockfile
+ local vdev_path
+ local vdev_status
+ local spare
+ local zpool_err
+ local zpool_rv
+ local rv
+
+ # Avoid hot-sparing a hot-spare.
+ #
+ # Note: ZEVENT_VDEV_PATH is not defined for ZEVENT_VDEV_TYPE=spare.
+ #
+ [ "${ZEVENT_VDEV_TYPE}" = "spare" ] && exit 2
+
+ [ -n "${ZEVENT_POOL}" ] || exit 9
+ [ -n "${ZEVENT_VDEV_GUID}" ] || exit 9
+ [ -n "${ZEVENT_VDEV_PATH}" ] || exit 9
+
+ zed_check_cmd "${ZPOOL}" "${ZINJECT}" || exit 9
+
+ # Fault the device after a given number of I/O errors.
+ #
+ if [ "${ZEVENT_SUBCLASS}" = "io" ]; then
+ if [ "${ZED_SPARE_ON_IO_ERRORS}" -gt 0 ]; then
+ num_errors=$((ZEVENT_VDEV_READ_ERRORS + ZEVENT_VDEV_WRITE_ERRORS))
+ [ "${num_errors}" -ge "${ZED_SPARE_ON_IO_ERRORS}" ] \
+ && action="fault"
+ fi 2>/dev/null
+
+ # Degrade the device after a given number of checksum errors.
+ #
+ elif [ "${ZEVENT_SUBCLASS}" = "checksum" ]; then
+ if [ "${ZED_SPARE_ON_CHECKSUM_ERRORS}" -gt 0 ]; then
+ num_errors="${ZEVENT_VDEV_CKSUM_ERRORS}"
+ [ "${num_errors}" -ge "${ZED_SPARE_ON_CHECKSUM_ERRORS}" ] \
+ && action="degrade"
+ fi 2>/dev/null
+
+ else
+ zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
+ exit 9
+ fi
+
+ # Error threshold not reached.
+ #
+ if [ -z "${action}" ]; then
+ exit 2
+ fi
+
+ lockfile="zed.spare.lock"
+ zed_lock "${lockfile}"
+
+ # shellcheck disable=SC2046
+ set -- $(query_vdev_status "${ZEVENT_POOL}" "${ZEVENT_VDEV_PATH}")
+ vdev_path="$1"
+ vdev_status="$2"
+
+ # Device is already FAULTED or DEGRADED.
+ #
+ if [ "${vdev_status}" = "FAULTED" ] \
+ || [ "${vdev_status}" = "DEGRADED" ]; then
+ rv=3
+
+ else
+ rv=1
+
+ # 1) FAULT or DEGRADE the device.
+ #
+ "${ZINJECT}" -d "${ZEVENT_VDEV_GUID}" -A "${action}" "${ZEVENT_POOL}"
+
+ # 2) Set the SES fault beacon.
+ #
+ # TODO: Set the 'fault' or 'ident' beacon for the device. This can
+ # be done through the sg_ses utility. The only hard part is to map
+ # the sd device to its corresponding enclosure and slot. We may
+ # be able to leverage the existing vdev_id scripts for this.
+ #
+ # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3
+ # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3
+
+ # 3) Replace the device with a hot spare.
+ #
+ # Round-robin through the spares trying those that are available.
+ #
+ for spare in ${ZEVENT_VDEV_SPARE_PATHS}; do
+
+ # shellcheck disable=SC2046
+ set -- $(query_vdev_status "${ZEVENT_POOL}" "${spare}")
+ vdev_path="$1"
+ vdev_status="$2"
+
+ [ "${vdev_status}" = "AVAIL" ] || continue
+
+ zpool_err="$("${ZPOOL}" replace "${ZEVENT_POOL}" \
+ "${ZEVENT_VDEV_GUID}" "${vdev_path}" 2>&1)"; zpool_rv=$?
+
+ if [ "${zpool_rv}" -ne 0 ]; then
+ [ -n "${zpool_err}" ] && zed_log_err "zpool ${zpool_err}"
+ else
+ rv=0
+ break
+ fi
+ done
+ fi
+
+ zed_unlock "${lockfile}"
+ exit "${rv}"
+}
+
+
+main "$@"
diff --git a/cmd/zed/zed.d/scrub.finish-email.sh b/cmd/zed/zed.d/scrub.finish-email.sh
index d92ccfea1..4a8155caf 100755
--- a/cmd/zed/zed.d/scrub.finish-email.sh
+++ b/cmd/zed/zed.d/scrub.finish-email.sh
@@ -1,73 +1,63 @@
#!/bin/sh
#
# Send email to ZED_EMAIL in response to a RESILVER.FINISH or SCRUB.FINISH.
-# By default, "zpool status" output will only be included in the email for
-# a scrub.finish zevent if the pool is not healthy; to always include its
-# output, set ZED_EMAIL_VERBOSE=1.
+#
+# By default, "zpool status" output will only be included for a scrub.finish
+# zevent if the pool is not healthy; to always include its output, set
+# ZED_EMAIL_VERBOSE=1.
+#
# Exit codes:
# 0: email sent
# 1: email failed
-# 2: email suppressed
-# 3: missing executable
-# 4: unsupported event class
-# 5: internal error
-#
-test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
+# 2: email not configured
+# 3: email suppressed
+# 9: internal error
-test -n "${ZEVENT_POOL}" || exit 5
-test -n "${ZEVENT_SUBCLASS}" || exit 5
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
-if test "${ZEVENT_SUBCLASS}" = "resilver.finish"; then
- ACTION="resilvering"
-elif test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then
- ACTION="scrubbing"
-else
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\"
- exit 4
-fi
+[ -n "${ZED_EMAIL}" ] || exit 2
-# Only send email if ZED_EMAIL has been configured.
-test -n "${ZED_EMAIL}" || exit 2
+[ -n "${ZEVENT_POOL}" ] || exit 9
+[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
-# Ensure requisite executables are installed.
-if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" not installed
- exit 3
-fi
-if ! test -x "${ZPOOL}"; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${ZPOOL}" not installed
- exit 3
+if [ "${ZEVENT_SUBCLASS}" = "resilver.finish" ]; then
+ action="resilver"
+elif [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then
+ action="scrub"
+else
+ zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
+ exit 9
fi
+zed_check_cmd "mail" "${ZPOOL}" || exit 9
+
# For scrub, suppress email if pool is healthy and verbosity is not enabled.
-if test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then
- HEALTHY=`"${ZPOOL}" status -x "${ZEVENT_POOL}" | \
- grep "'${ZEVENT_POOL}' is healthy"`
- test -n "${HEALTHY}" -a "${ZED_EMAIL_VERBOSE:=0}" = 0 && exit 2
+#
+if [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then
+ healthy="$("${ZPOOL}" status -x "${ZEVENT_POOL}" \
+ | grep "'${ZEVENT_POOL}' is healthy")"
+ [ -n "${healthy}" ] && [ "${ZED_EMAIL_VERBOSE}" -eq 0 ] && exit 3
fi
-"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on `hostname`" \
- "${ZED_EMAIL}" <<EOF
-A ZFS pool has finished ${ACTION}:
+umask 077
+email_subject="ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on $(hostname)"
+email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
+cat > "${email_pathname}" <<EOF
+ZFS has finished a ${action}:
eid: ${ZEVENT_EID}
- host: `hostname`
+ host: $(hostname)
time: ${ZEVENT_TIME_STRING}
-`"${ZPOOL}" status "${ZEVENT_POOL}"`
+$("${ZPOOL}" status "${ZEVENT_POOL}")
EOF
-MAIL_STATUS=$?
-if test "${MAIL_STATUS}" -ne 0; then
- logger -t "${ZED_SYSLOG_TAG:=zed}" \
- -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
- `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
- exit 1
-fi
+mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}"
+mail_status=$?
+if [ "${mail_status}" -ne 0 ]; then
+ zed_log_msg "mail exit=${mail_status}"
+ exit 1
+fi
+rm -f "${email_pathname}"
exit 0
diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh
new file mode 100644
index 000000000..b42911b2e
--- /dev/null
+++ b/cmd/zed/zed.d/zed-functions.sh
@@ -0,0 +1,230 @@
+# zed-functions.sh
+#
+# ZED helper functions for use in ZEDLETs
+
+
+# Variable Defaults
+#
+: "${ZED_EMAIL_INTERVAL_SECS:=3600}"
+: "${ZED_EMAIL_VERBOSE:=0}"
+: "${ZED_LOCKDIR:="/var/lock"}"
+: "${ZED_RUNDIR:="/var/run"}"
+: "${ZED_SYSLOG_PRIORITY:="daemon.notice"}"
+: "${ZED_SYSLOG_TAG:="zed"}"
+
+ZED_FLOCK_FD=8
+
+
+# zed_check_cmd (cmd, ...)
+#
+# For each argument given, search PATH for the executable command [cmd].
+# Log a message if [cmd] is not found.
+#
+# Arguments
+# cmd: name of executable command for which to search
+#
+# Return
+# 0 if all commands are found in PATH and are executable
+# n for a count of the command executables that are not found
+#
+zed_check_cmd()
+{
+ local cmd
+ local rv=0
+
+ for cmd; do
+ if ! command -v "${cmd}" >/dev/null 2>&1; then
+ zed_log_err "\"${cmd}\" not installed"
+ rv=$((rv + 1))
+ fi
+ done
+ return "${rv}"
+}
+
+
+# zed_log_msg (msg, ...)
+#
+# Write all argument strings to the system log.
+#
+# Globals
+# ZED_SYSLOG_PRIORITY
+# ZED_SYSLOG_TAG
+#
+# Return
+# nothing
+#
+zed_log_msg()
+{
+ logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "$@"
+}
+
+
+# zed_log_err (msg, ...)
+#
+# Write an error message to the system log. This message will contain the
+# script name, EID, and all argument strings.
+#
+# Globals
+# ZED_SYSLOG_PRIORITY
+# ZED_SYSLOG_TAG
+# ZEVENT_EID
+#
+# Return
+# nothing
+#
+zed_log_err()
+{
+ logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "error:" \
+ "$(basename -- "$0"):" "${ZEVENT_EID:+"eid=${ZEVENT_EID}:"}" "$@"
+}
+
+
+# zed_lock (lockfile, [fd])
+#
+# Obtain an exclusive (write) lock on [lockfile]. If the lock cannot be
+# immediately acquired, wait until it becomes available.
+#
+# Every zed_lock() must be paired with a corresponding zed_unlock().
+#
+# By default, flock-style locks associate the lockfile with file descriptor 8.
+# The bash manpage warns that file descriptors >9 should be used with care as
+# they may conflict with file descriptors used internally by the shell. File
+# descriptor 9 is reserved for zed_rate_limit(). If concurrent locks are held
+# within the same process, they must use different file descriptors (preferably
+# decrementing from 8); otherwise, obtaining a new lock with a given file
+# descriptor will release the previous lock associated with that descriptor.
+#
+# Arguments
+# lockfile: pathname of the lock file; the lock will be stored in
+# ZED_LOCKDIR unless the pathname contains a "/".
+# fd: integer for the file descriptor used by flock (OPTIONAL unless holding
+# concurrent locks)
+#
+# Globals
+# ZED_FLOCK_FD
+# ZED_LOCKDIR
+#
+# Return
+# nothing
+#
+zed_lock()
+{
+ local lockfile="$1"
+ local fd="${2:-${ZED_FLOCK_FD}}"
+ local umask_bak
+ local err
+
+ [ -n "${lockfile}" ] || return
+ if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then
+ lockfile="${ZED_LOCKDIR}/${lockfile}"
+ fi
+
+ umask_bak="$(umask)"
+ umask 077
+
+ # Obtain a lock on the file bound to the given file descriptor.
+ #
+ eval "exec ${fd}> '${lockfile}'"
+ err="$(flock --exclusive "${fd}" 2>&1)"
+ if [ $? -ne 0 ]; then
+ zed_log_err "failed to lock \"${lockfile}\": ${err}"
+ fi
+
+ umask "${umask_bak}"
+}
+
+
+# zed_unlock (lockfile, [fd])
+#
+# Release the lock on [lockfile].
+#
+# Arguments
+# lockfile: pathname of the lock file
+# fd: integer for the file descriptor used by flock (must match the file
+# descriptor passed to the zed_lock function call)
+#
+# Globals
+# ZED_FLOCK_FD
+# ZED_LOCKDIR
+#
+# Return
+# nothing
+#
+zed_unlock()
+{
+ local lockfile="$1"
+ local fd="${2:-${ZED_FLOCK_FD}}"
+ local err
+
+ [ -n "${lockfile}" ] || return
+ if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then
+ lockfile="${ZED_LOCKDIR}/${lockfile}"
+ fi
+
+ # Release the lock and close the file descriptor.
+ #
+ err="$(flock --unlock "${fd}" 2>&1)"
+ if [ $? -ne 0 ]; then
+ zed_log_err "failed to unlock \"${lockfile}\": ${err}"
+ fi
+ eval "exec ${fd}>&-"
+}
+
+
+# zed_rate_limit (tag, [interval])
+#
+# Check whether an event of a given type [tag] has already occurred within the
+# last [interval] seconds.
+#
+# This function obtains a lock on the statefile using file descriptor 9.
+#
+# Arguments
+# tag: arbitrary string for grouping related events to rate-limit
+# interval: time interval in seconds (OPTIONAL)
+#
+# Globals
+# ZED_EMAIL_INTERVAL_SECS
+# ZED_RUNDIR
+#
+# Return
+# 0 if the event should be processed
+# 1 if the event should be dropped
+#
+# State File Format
+# time;tag
+#
+zed_rate_limit()
+{
+ local tag="$1"
+ local interval="${2:-${ZED_EMAIL_INTERVAL_SECS}}"
+ local lockfile="zed.zedlet.state.lock"
+ local lockfile_fd=9
+ local statefile="${ZED_RUNDIR}/zed.zedlet.state"
+ local time_now
+ local time_prev
+ local umask_bak
+ local rv=0
+
+ [ -n "${tag}" ] || return 0
+
+ zed_lock "${lockfile}" "${lockfile_fd}"
+ time_now="$(date +%s)"
+ time_prev="$(egrep "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \
+ | tail -1 | cut -d\; -f1)"
+
+ if [ -n "${time_prev}" ] \
+ && [ "$((time_now - time_prev))" -lt "${interval}" ]; then
+ rv=1
+ else
+ umask_bak="$(umask)"
+ umask 077
+ egrep -v "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \
+ > "${statefile}.$$"
+ echo "${time_now};${tag}" >> "${statefile}.$$"
+ mv -f "${statefile}.$$" "${statefile}"
+ umask "${umask_bak}"
+ fi
+
+ zed_unlock "${lockfile}" "${lockfile_fd}"
+ return "${rv}"
+}
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index 69989f953..4c53207d7 100644
--- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc
@@ -1,34 +1,60 @@
+##
# zed.rc
+##
+##
# Absolute path to the debug output file.
+#
#ZED_DEBUG_LOG="/tmp/zed.debug.log"
+##
# Email address of the zpool administrator.
# Email will only be sent if ZED_EMAIL is defined.
+# Disabled by default; uncomment to enable.
+#
#ZED_EMAIL="root"
+##
+# Minimum number of seconds between emails for a similar event.
+#
+#ZED_EMAIL_INTERVAL_SECS=3600
+
+##
# Email verbosity.
# If set to 0, suppress email if the pool is healthy.
# If set to 1, send email regardless of pool health.
+#
#ZED_EMAIL_VERBOSE=0
-# Minimum number of seconds between emails sent for a similar event.
-#ZED_EMAIL_INTERVAL_SECS="3600"
-
+##
# Default directory for zed lock files.
+#
#ZED_LOCKDIR="/var/lock"
+##
# Default directory for zed state files.
+#
#ZED_RUNDIR="/var/run"
-# The syslog priority (eg, specified as a "facility.level" pair).
+##
+# Replace a device with a hot spare after N checksum errors are detected.
+# Disabled by default; uncomment to enable.
+#
+#ZED_SPARE_ON_CHECKSUM_ERRORS=10
+
+##
+# Replace a device with a hot spare after N I/O errors are detected.
+# Disabled by default; uncomment to enable.
+#
+#ZED_SPARE_ON_IO_ERRORS=1
+
+##
+# The syslog priority (e.g., specified as a "facility.level" pair).
+#
#ZED_SYSLOG_PRIORITY="daemon.notice"
+##
# The syslog tag for marking zed events.
+#
#ZED_SYSLOG_TAG="zed"
-# Replace a device with a hot spare after N I/O errors are detected.
-#ZED_SPARE_ON_IO_ERRORS=1
-
-# Replace a device with a hot spare after N checksum errors are detected.
-#ZED_SPARE_ON_CHECKSUM_ERRORS=10