aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dsl_scan.c9
-rw-r--r--module/zfs/fm.c844
-rw-r--r--module/zfs/include/sys/fm/fs/zfs.h19
-rw-r--r--module/zfs/include/sys/fm/protocol.h10
-rw-r--r--module/zfs/include/sys/fm/util.h42
-rw-r--r--module/zfs/include/sys/zfs_context.h7
-rw-r--r--module/zfs/include/sys/zfs_ioctl.h3
-rw-r--r--module/zfs/spa.c76
-rw-r--r--module/zfs/spa_config.c2
-rw-r--r--module/zfs/spa_misc.c3
-rw-r--r--module/zfs/vdev.c2
-rw-r--r--module/zfs/zfs_fm.c80
-rw-r--r--module/zfs/zfs_ioctl.c69
13 files changed, 678 insertions, 488 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 525832b45..c37a8224c 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -50,9 +50,7 @@
typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
-static scan_cb_t dsl_scan_defrag_cb;
static scan_cb_t dsl_scan_scrub_cb;
-static scan_cb_t dsl_scan_remove_cb;
static dsl_syncfunc_t dsl_scan_cancel_sync;
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
@@ -194,9 +192,9 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
if (vdev_resilver_needed(spa->spa_root_vdev,
&scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
- spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
+ spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START);
} else {
- spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
+ spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START);
}
spa->spa_scrub_started = B_TRUE;
@@ -297,7 +295,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
if (complete) {
spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
- ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
+ FM_EREPORT_ZFS_RESILVER_FINISH :
+ FM_EREPORT_ZFS_SCRUB_FINISH);
}
spa_errlog_rotate(spa);
diff --git a/module/zfs/fm.c b/module/zfs/fm.c
index 4efcff4f4..67d0c1a6e 100644
--- a/module/zfs/fm.c
+++ b/module/zfs/fm.c
@@ -53,51 +53,46 @@
#include <sys/types.h>
#include <sys/time.h>
-#include <sys/sysevent.h>
-#include <sys/sysevent_impl.h>
+#include <sys/list.h>
#include <sys/nvpair.h>
#include <sys/cmn_err.h>
-#include <sys/cpuvar.h>
#include <sys/sysmacros.h>
-#include <sys/systm.h>
-#include <sys/ddifm.h>
-#include <sys/ddifm_impl.h>
-#include <sys/spl.h>
-#include <sys/dumphdr.h>
#include <sys/compress.h>
-#include <sys/cpuvar.h>
-#include <sys/console.h>
-#include <sys/panic.h>
-#include <sys/kobj.h>
#include <sys/sunddi.h>
#include <sys/systeminfo.h>
-#include <sys/sysevent/eventdefs.h>
#include <sys/fm/util.h>
#include <sys/fm/protocol.h>
+#include <sys/kstat.h>
+#include <sys/zfs_context.h>
+#ifdef _KERNEL
+#include <sys/atomic.h>
+#include <sys/condvar.h>
+#include <sys/cpuvar.h>
+#include <sys/systm.h>
+#include <sys/dumphdr.h>
+#include <sys/cpuvar.h>
+#include <sys/console.h>
+#include <sys/kobj.h>
+#include <sys/time.h>
+#include <sys/zfs_ioctl.h>
-/*
- * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These
- * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
- */
-static const char *fm_url = "http://www.sun.com/msg";
-static const char *fm_msgid = "SUNOS-8000-0G";
-static char *volatile fm_panicstr = NULL;
+int zevent_len_max = 0;
+int zevent_cols = 80;
+int zevent_console = 0;
-errorq_t *ereport_errorq;
-void *ereport_dumpbuf;
-size_t ereport_dumplen;
+static int zevent_len_cur = 0;
+static int zevent_waiters = 0;
+static int zevent_flags = 0;
-static uint_t ereport_chanlen = ERPT_EVCH_MAX;
-static evchan_t *ereport_chan = NULL;
-static ulong_t ereport_qlen = 0;
-static size_t ereport_size = 0;
-static int ereport_cols = 80;
+static kmutex_t zevent_lock;
+static list_t zevent_list;
+static kcondvar_t zevent_cv;
+#endif /* _KERNEL */
extern void fastreboot_disable_highpil(void);
/*
- * Common fault management kstats to record ereport generation
- * failures
+ * Common fault management kstats to record event generation failures
*/
struct erpt_kstat {
@@ -114,57 +109,9 @@ static struct erpt_kstat erpt_kstat_data = {
{ "payload-set-failed", KSTAT_DATA_UINT64 }
};
-/*ARGSUSED*/
-static void
-fm_drain(void *private, void *data, errorq_elem_t *eep)
-{
- nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep);
-
- if (!panicstr)
- (void) fm_ereport_post(nvl, EVCH_TRYHARD);
- else
- fm_nvprint(nvl);
-}
-
-void
-fm_init(void)
-{
- kstat_t *ksp;
+kstat_t *fm_ksp;
- (void) sysevent_evc_bind(FM_ERROR_CHAN,
- &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND);
-
- (void) sysevent_evc_control(ereport_chan,
- EVCH_SET_CHAN_LEN, &ereport_chanlen);
-
- if (ereport_qlen == 0)
- ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
-
- if (ereport_size == 0)
- ereport_size = ERPT_DATA_SZ;
-
- ereport_errorq = errorq_nvcreate("fm_ereport_queue",
- (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size,
- FM_ERR_PIL, ERRORQ_VITAL);
- if (ereport_errorq == NULL)
- panic("failed to create required ereport error queue");
-
- ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP);
- ereport_dumplen = ereport_size;
-
- /* Initialize ereport allocation and generation kstats */
- ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED,
- sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
- KSTAT_FLAG_VIRTUAL);
-
- if (ksp != NULL) {
- ksp->ks_data = &erpt_kstat_data;
- kstat_install(ksp);
- } else {
- cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
-
- }
-}
+#ifdef _KERNEL
/*
* Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
@@ -183,7 +130,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...)
va_end(ap);
if (c + width >= cols) {
- console_printf("\n\r");
+ console_printf("\n");
c = 0;
if (format[0] != ' ' && depth > 0) {
console_printf(" ");
@@ -245,54 +192,54 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
case DATA_TYPE_BYTE:
(void) nvpair_value_byte(nvp, &i8);
- c = fm_printf(d + 1, c, cols, "%x", i8);
+ c = fm_printf(d + 1, c, cols, "0x%x", i8);
break;
case DATA_TYPE_INT8:
(void) nvpair_value_int8(nvp, (void *)&i8);
- c = fm_printf(d + 1, c, cols, "%x", i8);
+ c = fm_printf(d + 1, c, cols, "0x%x", i8);
break;
case DATA_TYPE_UINT8:
(void) nvpair_value_uint8(nvp, &i8);
- c = fm_printf(d + 1, c, cols, "%x", i8);
+ c = fm_printf(d + 1, c, cols, "0x%x", i8);
break;
case DATA_TYPE_INT16:
(void) nvpair_value_int16(nvp, (void *)&i16);
- c = fm_printf(d + 1, c, cols, "%x", i16);
+ c = fm_printf(d + 1, c, cols, "0x%x", i16);
break;
case DATA_TYPE_UINT16:
(void) nvpair_value_uint16(nvp, &i16);
- c = fm_printf(d + 1, c, cols, "%x", i16);
+ c = fm_printf(d + 1, c, cols, "0x%x", i16);
break;
case DATA_TYPE_INT32:
(void) nvpair_value_int32(nvp, (void *)&i32);
- c = fm_printf(d + 1, c, cols, "%x", i32);
+ c = fm_printf(d + 1, c, cols, "0x%x", i32);
break;
case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &i32);
- c = fm_printf(d + 1, c, cols, "%x", i32);
+ c = fm_printf(d + 1, c, cols, "0x%x", i32);
break;
case DATA_TYPE_INT64:
(void) nvpair_value_int64(nvp, (void *)&i64);
- c = fm_printf(d + 1, c, cols, "%llx",
+ c = fm_printf(d + 1, c, cols, "0x%llx",
(u_longlong_t)i64);
break;
case DATA_TYPE_UINT64:
(void) nvpair_value_uint64(nvp, &i64);
- c = fm_printf(d + 1, c, cols, "%llx",
+ c = fm_printf(d + 1, c, cols, "0x%llx",
(u_longlong_t)i64);
break;
case DATA_TYPE_HRTIME:
(void) nvpair_value_hrtime(nvp, (void *)&i64);
- c = fm_printf(d + 1, c, cols, "%llx",
+ c = fm_printf(d + 1, c, cols, "0x%llx",
(u_longlong_t)i64);
break;
@@ -322,19 +269,124 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
}
break;
+ case DATA_TYPE_INT8_ARRAY: {
+ int8_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_int8_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_UINT8_ARRAY: {
+ uint8_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_uint8_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_INT16_ARRAY: {
+ int16_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_int16_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_UINT16_ARRAY: {
+ uint16_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_uint16_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_INT32_ARRAY: {
+ int32_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_int32_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_UINT32_ARRAY: {
+ uint32_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_uint32_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_INT64_ARRAY: {
+ int64_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_int64_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_UINT64_ARRAY: {
+ uint64_t *val;
+ uint_t i, nelem;
+
+ c = fm_printf(d + 1, c, cols, "[ ");
+ (void) nvpair_value_uint64_array(nvp, &val, &nelem);
+ for (i = 0; i < nelem; i++)
+ c = fm_printf(d + 1, c, cols, "0x%llx ",
+ (u_longlong_t)val[i]);
+
+ c = fm_printf(d + 1, c, cols, "]");
+ break;
+ }
+
+ case DATA_TYPE_STRING_ARRAY:
case DATA_TYPE_BOOLEAN_ARRAY:
case DATA_TYPE_BYTE_ARRAY:
- case DATA_TYPE_INT8_ARRAY:
- case DATA_TYPE_UINT8_ARRAY:
- case DATA_TYPE_INT16_ARRAY:
- case DATA_TYPE_UINT16_ARRAY:
- case DATA_TYPE_INT32_ARRAY:
- case DATA_TYPE_UINT32_ARRAY:
- case DATA_TYPE_INT64_ARRAY:
- case DATA_TYPE_UINT64_ARRAY:
- case DATA_TYPE_STRING_ARRAY:
c = fm_printf(d + 1, c, cols, "[...]");
break;
+
case DATA_TYPE_UNKNOWN:
c = fm_printf(d + 1, c, cols, "<unknown>");
break;
@@ -350,191 +402,255 @@ fm_nvprint(nvlist_t *nvl)
char *class;
int c = 0;
- console_printf("\r");
+ console_printf("\n");
if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
- c = fm_printf(0, c, ereport_cols, "%s", class);
+ c = fm_printf(0, c, zevent_cols, "%s", class);
- if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0)
+ if (fm_nvprintr(nvl, 0, c, zevent_cols) != 0)
console_printf("\n");
console_printf("\n");
}
-/*
- * Wrapper for panic() that first produces an FMA-style message for admins.
- * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
- * is the one exception to that rule and the only error that gets messaged.
- * This function is intended for use by subsystems that have detected a fatal
- * error and enqueued appropriate ereports and wish to then force a panic.
- */
-/*PRINTFLIKE1*/
+static zevent_t *
+zfs_zevent_alloc(void)
+{
+ zevent_t *ev;
+
+ ev = kmem_zalloc(sizeof(zevent_t), KM_SLEEP);
+ if (ev == NULL)
+ return NULL;
+
+ list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t),
+ offsetof(zfs_zevent_t, ze_node));
+ list_link_init(&ev->ev_node);
+
+ return ev;
+}
+
+static void
+zfs_zevent_free(zevent_t *ev)
+{
+ /* Run provided cleanup callback */
+ ev->ev_cb(ev->ev_nvl, ev->ev_detector);
+
+ list_destroy(&ev->ev_ze_list);
+ kmem_free(ev, sizeof(zevent_t));
+}
+
+static void
+zfs_zevent_drain(zevent_t *ev)
+{
+ zfs_zevent_t *ze;
+
+ ASSERT(MUTEX_HELD(&zevent_lock));
+ list_remove(&zevent_list, ev);
+
+ /* Remove references to this event in all private file data */
+ while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
+ list_remove(&ev->ev_ze_list, ze);
+ ze->ze_zevent = NULL;
+ ze->ze_dropped++;
+ }
+
+ zfs_zevent_free(ev);
+}
+
void
-fm_panic(const char *format, ...)
+zfs_zevent_drain_all(int *count)
{
- va_list ap;
+ zevent_t *ev;
- (void) casptr((void *)&fm_panicstr, NULL, (void *)format);
-#if defined(__i386) || defined(__amd64)
- fastreboot_disable_highpil();
-#endif /* __i386 || __amd64 */
- va_start(ap, format);
- vpanic(format, ap);
- va_end(ap);
+ mutex_enter(&zevent_lock);
+ while ((ev = list_head(&zevent_list)) != NULL)
+ zfs_zevent_drain(ev);
+
+ *count = zevent_len_cur;
+ zevent_len_cur = 0;
+ mutex_exit(&zevent_lock);
}
/*
- * Simply tell the caller if fm_panicstr is set, ie. an fma event has
- * caused the panic. If so, something other than the default panic
- * diagnosis method will diagnose the cause of the panic.
+ * New zevents are inserted at the head. If the maximum queue
+ * length is exceeded a zevent will be drained from the tail.
+ * As part of this any user space processes which currently have
+ * a reference to this zevent_t in their private data will have
+ * this reference set to NULL.
*/
-int
-is_fm_panic()
+static void
+zfs_zevent_insert(zevent_t *ev)
{
- if (fm_panicstr)
- return (1);
+ mutex_enter(&zevent_lock);
+ list_insert_head(&zevent_list, ev);
+ if (zevent_len_cur >= zevent_len_max)
+ zfs_zevent_drain(list_tail(&zevent_list));
else
- return (0);
+ zevent_len_cur++;
+
+ mutex_exit(&zevent_lock);
}
/*
- * Print any appropriate FMA banner message before the panic message. This
- * function is called by panicsys() and prints the message for fm_panic().
- * We print the message here so that it comes after the system is quiesced.
- * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
- * The rest of the message is for the console only and not needed in the log,
- * so it is printed using console_printf(). We break it up into multiple
- * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
+ * Post a zevent
*/
void
-fm_banner(void)
+zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
{
- timespec_t tod;
- hrtime_t now;
+ int64_t tv_array[2];
+ timestruc_t tv;
+ size_t nvl_size = 0;
+ zevent_t *ev;
- if (!fm_panicstr)
- return; /* panic was not initiated by fm_panic(); do nothing */
+ gethrestime(&tv);
+ tv_array[0] = tv.tv_sec;
+ tv_array[1] = tv.tv_nsec;
+ if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) {
+ atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ return;
+ }
- if (panicstr) {
- tod = panic_hrestime;
- now = panic_hrtime;
- } else {
- gethrestime(&tod);
- now = gethrtime_waitfree();
+ (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
+ if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
+ atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+ return;
}
- cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, "
- "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid);
+ if (zevent_console)
+ fm_nvprint(nvl);
- console_printf(
-"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
-"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
- fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now);
+ ev = zfs_zevent_alloc();
+ if (ev == NULL) {
+ atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+ return;
+ }
- console_printf(
-"PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
-"SOURCE: %s, REV: %s %s\n",
- platform, utsname.nodename, utsname.sysname,
- utsname.release, utsname.version);
+ ev->ev_nvl = nvl;
+ ev->ev_detector = detector;
+ ev->ev_cb = cb;
+ zfs_zevent_insert(ev);
+ cv_broadcast(&zevent_cv);
+}
- console_printf(
-"DESC: Errors have been detected that require a reboot to ensure system\n"
-"integrity. See %s/%s for more information.\n",
- fm_url, fm_msgid);
+static int
+zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
+{
+ *ze = zfsdev_get_state(minor, ZST_ZEVENT);
+ if (*ze == NULL)
+ return (EBADF);
- console_printf(
-"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n"
-"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
-"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n");
+ return (0);
+}
- console_printf("\n");
+int
+zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
+{
+ file_t *fp;
+ int error;
+
+ fp = getf(fd);
+ if (fp == NULL)
+ return (EBADF);
+
+ *minorp = zfsdev_getminor(fp->f_file);
+ error = zfs_zevent_minor_to_state(*minorp, ze);
+
+ if (error)
+ zfs_zevent_fd_rele(fd);
+
+ return (error);
+}
+
+void
+zfs_zevent_fd_rele(int fd)
+{
+ releasef(fd);
}
/*
- * Utility function to write all of the pending ereports to the dump device.
- * This function is called at either normal reboot or panic time, and simply
- * iterates over the in-transit messages in the ereport sysevent channel.
+ * Get the next zevent in the stream and place a copy in 'event'.
*/
-void
-fm_ereport_dump(void)
+int
+zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *dropped)
{
- evchanq_t *chq;
- sysevent_t *sep;
- erpt_dump_t ed;
-
- timespec_t tod;
- hrtime_t now;
- char *buf;
- size_t len;
-
- if (panicstr) {
- tod = panic_hrestime;
- now = panic_hrtime;
+ zevent_t *ev;
+ int error;
+
+ mutex_enter(&zevent_lock);
+ if (ze->ze_zevent == NULL) {
+ /* New stream start at the beginning/tail */
+ ev = list_tail(&zevent_list);
+ if (ev == NULL) {
+ error = ENOENT;
+ goto out;
+ }
} else {
- if (ereport_errorq != NULL)
- errorq_drain(ereport_errorq);
- gethrestime(&tod);
- now = gethrtime_waitfree();
+ /* Existing stream continue with the next element and remove
+ * ourselves from the wait queue for the previous element */
+ ev = list_prev(&zevent_list, ze->ze_zevent);
+ if (ev == NULL) {
+ error = ENOENT;
+ goto out;
+ }
+
+ list_remove(&ze->ze_zevent->ev_ze_list, ze);
}
- /*
- * In the panic case, sysevent_evc_walk_init() will return NULL.
- */
- if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL &&
- !panicstr)
- return; /* event channel isn't initialized yet */
+ ze->ze_zevent = ev;
+ list_insert_head(&ev->ev_ze_list, ze);
+ nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
+ *dropped = ze->ze_dropped;
+ ze->ze_dropped = 0;
+out:
+ mutex_exit(&zevent_lock);
- while ((sep = sysevent_evc_walk_step(chq)) != NULL) {
- if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL)
- break;
+ return error;
+}
+
+int
+zfs_zevent_wait(zfs_zevent_t *ze)
+{
+ int error = 0;
+
+ mutex_enter(&zevent_lock);
- ed.ed_magic = ERPT_MAGIC;
- ed.ed_chksum = checksum32(buf, len);
- ed.ed_size = (uint32_t)len;
- ed.ed_pad = 0;
- ed.ed_hrt_nsec = SE_TIME(sep);
- ed.ed_hrt_base = now;
- ed.ed_tod_base.sec = tod.tv_sec;
- ed.ed_tod_base.nsec = tod.tv_nsec;
-
- dumpvp_write(&ed, sizeof (ed));
- dumpvp_write(buf, len);
+ if (zevent_flags & ZEVENT_SHUTDOWN) {
+ error = ESHUTDOWN;
+ goto out;
}
- sysevent_evc_walk_fini(chq);
+ zevent_waiters++;
+ cv_wait_interruptible(&zevent_cv, &zevent_lock);
+ if (issig(JUSTLOOKING))
+ error = EINTR;
+
+ zevent_waiters--;
+out:
+ mutex_exit(&zevent_lock);
+
+ return error;
}
-/*
- * Post an error report (ereport) to the sysevent error channel. The error
- * channel must be established with a prior call to sysevent_evc_create()
- * before publication may occur.
- */
void
-fm_ereport_post(nvlist_t *ereport, int evc_flag)
+zfs_zevent_init(zfs_zevent_t **zep)
{
- size_t nvl_size = 0;
- evchan_t *error_chan;
+ zfs_zevent_t *ze;
- (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE);
- if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
- atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
- return;
- }
+ ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
+ list_link_init(&ze->ze_node);
+}
- if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan,
- EVCH_CREAT|EVCH_HOLD_PEND) != 0) {
- atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
- return;
- }
+void
+zfs_zevent_destroy(zfs_zevent_t *ze)
+{
+ mutex_enter(&zevent_lock);
+ if (ze->ze_zevent)
+ list_remove(&ze->ze_zevent->ev_ze_list, ze);
+ mutex_exit(&zevent_lock);
- if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
- SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
- atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
- (void) sysevent_evc_unbind(error_chan);
- return;
- }
- (void) sysevent_evc_unbind(error_chan);
+ kmem_free(ze, sizeof (zfs_zevent_t));
}
+#endif /* _KERNEL */
/*
* Wrapppers for FM nvlist allocators
@@ -938,6 +1054,105 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
}
}
+void
+fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
+ nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
+{
+ nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
+ nvlist_t *pairs[HC_MAXPAIRS];
+ nvlist_t **hcl;
+ uint_t n;
+ int i, j;
+ va_list ap;
+ char *hcname, *hcid;
+
+ if (!fm_fmri_hc_set_common(fmri, version, auth))
+ return;
+
+ /*
+ * copy the bboard nvpairs to the pairs array
+ */
+ if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
+ != 0) {
+ atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
+ &hcname) != 0) {
+ atomic_add_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+ if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
+ atomic_add_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+
+ pairs[i] = fm_nvlist_create(nva);
+ if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
+ nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
+ for (j = 0; j <= i; j++) {
+ if (pairs[j] != NULL)
+ fm_nvlist_destroy(pairs[j],
+ FM_NVA_RETAIN);
+ }
+ atomic_add_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+ }
+
+ /*
+ * create the pairs from passed in pairs
+ */
+ npairs = MIN(npairs, HC_MAXPAIRS);
+
+ va_start(ap, npairs);
+ for (i = n; i < npairs + n; i++) {
+ const char *name = va_arg(ap, const char *);
+ uint32_t id = va_arg(ap, uint32_t);
+ char idstr[11];
+ (void) snprintf(idstr, sizeof (idstr), "%u", id);
+ pairs[i] = fm_nvlist_create(nva);
+ if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
+ nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
+ for (j = 0; j <= i; j++) {
+ if (pairs[j] != NULL)
+ fm_nvlist_destroy(pairs[j],
+ FM_NVA_RETAIN);
+ }
+ atomic_add_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+ }
+ va_end(ap);
+
+ /*
+ * Create the fmri hc list
+ */
+ if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
+ npairs + n) != 0) {
+ atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+
+ for (i = 0; i < npairs + n; i++) {
+ fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
+ }
+
+ if (snvl != NULL) {
+ if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
+ atomic_add_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ return;
+ }
+ }
+}
+
/*
* Set-up and validate the members of an dev fmri according to:
*
@@ -1167,7 +1382,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
ena = (uint64_t)((format & ENA_FORMAT_MASK) |
((cpuid << ENA_FMT1_CPUID_SHFT) &
ENA_FMT1_CPUID_MASK) |
- ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) &
+ ((gethrtime() << ENA_FMT1_TIME_SHFT) &
ENA_FMT1_TIME_MASK));
}
break;
@@ -1185,7 +1400,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
uint64_t
fm_ena_generate(uint64_t timestamp, uchar_t format)
{
- return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format));
+ return (fm_ena_generate_cpu(timestamp, getcpuid(), format));
}
uint64_t
@@ -1253,134 +1468,67 @@ fm_ena_time_get(uint64_t ena)
return (time);
}
-/*
- * Convert a getpcstack() trace to symbolic name+offset, and add the resulting
- * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK.
- */
+#ifdef _KERNEL
void
-fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth)
+fm_init(void)
{
- int i;
- char *sym;
- ulong_t off;
- char *stkpp[FM_STK_DEPTH];
- char buf[FM_STK_DEPTH * FM_SYM_SZ];
- char *stkp = buf;
-
- for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) {
- if ((sym = kobj_getsymname(stack[i], &off)) != NULL)
- (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off);
- else
- (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]);
- stkpp[i] = stkp;
- }
+ zevent_len_cur = 0;
+ zevent_flags = 0;
- fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK,
- DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL);
-}
+ if (zevent_len_max == 0)
+ zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
-void
-print_msg_hwerr(ctid_t ct_id, proc_t *p)
-{
- uprintf("Killed process %d (%s) in contract id %d "
- "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id);
+ /* Initialize zevent allocation and generation kstats */
+ fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
+ sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+
+ if (fm_ksp != NULL) {
+ fm_ksp->ks_data = &erpt_kstat_data;
+ kstat_install(fm_ksp);
+ } else {
+ cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
+ }
+
+ mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node));
+ cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
}
void
-fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
- nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
+fm_fini(void)
{
- nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
- nvlist_t *pairs[HC_MAXPAIRS];
- nvlist_t **hcl;
- uint_t n;
- int i, j;
- va_list ap;
- char *hcname, *hcid;
+ int count;
- if (!fm_fmri_hc_set_common(fmri, version, auth))
- return;
+ zfs_zevent_drain_all(&count);
+ cv_broadcast(&zevent_cv);
- /*
- * copy the bboard nvpairs to the pairs array
- */
- if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
- != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
+ mutex_enter(&zevent_lock);
+ zevent_flags |= ZEVENT_SHUTDOWN;
+ while (zevent_waiters > 0) {
+ mutex_exit(&zevent_lock);
+ schedule();
+ mutex_enter(&zevent_lock);
}
+ mutex_exit(&zevent_lock);
- for (i = 0; i < n; i++) {
- if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
- &hcname) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
- }
- if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
- }
+ cv_destroy(&zevent_cv);
+ list_destroy(&zevent_list);
+ mutex_destroy(&zevent_lock);
- pairs[i] = fm_nvlist_create(nva);
- if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
- nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
- for (j = 0; j <= i; j++) {
- if (pairs[j] != NULL)
- fm_nvlist_destroy(pairs[j],
- FM_NVA_RETAIN);
- }
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
- }
+ if (fm_ksp != NULL) {
+ kstat_delete(fm_ksp);
+ fm_ksp = NULL;
}
+}
- /*
- * create the pairs from passed in pairs
- */
- npairs = MIN(npairs, HC_MAXPAIRS);
-
- va_start(ap, npairs);
- for (i = n; i < npairs + n; i++) {
- const char *name = va_arg(ap, const char *);
- uint32_t id = va_arg(ap, uint32_t);
- char idstr[11];
- (void) snprintf(idstr, sizeof (idstr), "%u", id);
- pairs[i] = fm_nvlist_create(nva);
- if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
- nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
- for (j = 0; j <= i; j++) {
- if (pairs[j] != NULL)
- fm_nvlist_destroy(pairs[j],
- FM_NVA_RETAIN);
- }
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
- }
- }
- va_end(ap);
+module_param(zevent_len_max, int, 0644);
+MODULE_PARM_DESC(zevent_len_max, "Maximum event queue length");
- /*
- * Create the fmri hc list
- */
- if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
- npairs + n) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
- }
+module_param(zevent_cols, int, 0644);
+MODULE_PARM_DESC(zevent_cols, "Maximum event column width");
- for (i = 0; i < npairs + n; i++) {
- fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
- }
+module_param(zevent_console, int, 0644);
+MODULE_PARM_DESC(zevent_console, "Log events to the console");
- if (snvl != NULL) {
- if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
- return;
- }
- }
-}
+#endif /* _KERNEL */
diff --git a/module/zfs/include/sys/fm/fs/zfs.h b/module/zfs/include/sys/fm/fs/zfs.h
index c752edc99..d5c71d174 100644
--- a/module/zfs/include/sys/fm/fs/zfs.h
+++ b/module/zfs/include/sys/fm/fs/zfs.h
@@ -35,7 +35,9 @@ extern "C" {
#define FM_EREPORT_ZFS_CHECKSUM "checksum"
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
+#define FM_EREPORT_ZFS_CONFIG_SYNC "config.sync"
#define FM_EREPORT_ZFS_POOL "zpool"
+#define FM_EREPORT_ZFS_POOL_DESTROY "zpool.destroy"
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
#define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data"
@@ -43,9 +45,19 @@ extern "C" {
#define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum"
#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small"
#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label"
+#define FM_EREPORT_ZFS_DEVICE_REMOVE "vdev.remove"
+#define FM_EREPORT_ZFS_DEVICE_CLEAR "vdev.clear"
+#define FM_EREPORT_ZFS_DEVICE_CHECK "vdev.check"
+#define FM_EREPORT_ZFS_DEVICE_SPARE "vdev.spare"
+#define FM_EREPORT_ZFS_DEVICE_AUTOEXPAND "vdev.autoexpand"
#define FM_EREPORT_ZFS_IO_FAILURE "io_failure"
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
+#define FM_EREPORT_ZFS_RESILVER_START "resilver.start"
+#define FM_EREPORT_ZFS_RESILVER_FINISH "resilver.finish"
+#define FM_EREPORT_ZFS_SCRUB_START "scrub.start"
+#define FM_EREPORT_ZFS_SCRUB_FINISH "scrub.finish"
+#define FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH "bootfs.vdev.attach"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
@@ -56,6 +68,7 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
@@ -85,9 +98,9 @@ extern "C" {
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
#define FM_EREPORT_FAILMODE_PANIC "panic"
-#define FM_RESOURCE_REMOVED "removed"
-#define FM_RESOURCE_AUTOREPLACE "autoreplace"
-#define FM_RESOURCE_STATECHANGE "statechange"
+#define FM_EREPORT_RESOURCE_REMOVED "removed"
+#define FM_EREPORT_RESOURCE_AUTOREPLACE "autoreplace"
+#define FM_EREPORT_RESOURCE_STATECHANGE "statechange"
#ifdef __cplusplus
}
diff --git a/module/zfs/include/sys/fm/protocol.h b/module/zfs/include/sys/fm/protocol.h
index 5eca760da..1ee221286 100644
--- a/module/zfs/include/sys/fm/protocol.h
+++ b/module/zfs/include/sys/fm/protocol.h
@@ -69,6 +69,7 @@ extern "C" {
/* ereport payload member names */
#define FM_EREPORT_DETECTOR "detector"
#define FM_EREPORT_ENA "ena"
+#define FM_EREPORT_TIME "time"
/* list.* event payload member names */
#define FM_LIST_EVENT_SIZE "list-sz"
@@ -327,16 +328,13 @@ extern "C" {
#define FM_FMRI_SW_CTXT_ZONE "zone"
#define FM_FMRI_SW_CTXT_CTID "ctid"
#define FM_FMRI_SW_CTXT_STACK "stack"
+#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */
+#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */
extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
extern void fm_nva_xdestroy(nv_alloc_t *);
-
extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
extern void fm_nvlist_destroy(nvlist_t *, int);
-
-#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */
-#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */
-
extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t,
const nvlist_t *, ...);
extern void fm_payload_set(nvlist_t *, ...);
@@ -350,8 +348,6 @@ extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
uint8_t *, const char *);
extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
const char *, uint64_t);
-extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
- const char *, const char *);
extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
nvlist_t *, int, ...);
diff --git a/module/zfs/include/sys/fm/util.h b/module/zfs/include/sys/fm/util.h
index 37334101b..94947d67c 100644
--- a/module/zfs/include/sys/fm/util.h
+++ b/module/zfs/include/sys/fm/util.h
@@ -31,7 +31,6 @@ extern "C" {
#endif
#include <sys/nvpair.h>
-#include <sys/errorq.h>
/*
* Shared user/kernel definitions for class length, error channel name,
@@ -71,29 +70,42 @@ typedef struct erpt_dump {
} erpt_dump_t;
#ifdef _KERNEL
-#include <sys/systm.h>
-#define FM_STK_DEPTH 20 /* maximum stack depth */
-#define FM_SYM_SZ 64 /* maximum symbol size */
-#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */
+#define ZEVENT_SHUTDOWN 0x1
-#define FM_EREPORT_PAYLOAD_NAME_STACK "stack"
+typedef void zevent_cb_t(nvlist_t *, nvlist_t *);
-extern errorq_t *ereport_errorq;
-extern void *ereport_dumpbuf;
-extern size_t ereport_dumplen;
+typedef struct zevent_s {
+ nvlist_t *ev_nvl; /* protected by the zevent_lock */
+ nvlist_t *ev_detector; /* " */
+ list_t ev_ze_list; /* " */
+ list_node_t ev_node; /* " */
+ zevent_cb_t *ev_cb; /* " */
+} zevent_t;
+
+typedef struct zfs_zevent {
+ zevent_t *ze_zevent; /* protected by the zevent_lock */
+ list_node_t ze_node; /* " */
+ uint64_t ze_dropped; /* " */
+} zfs_zevent_t;
extern void fm_init(void);
+extern void fm_fini(void);
extern void fm_nvprint(nvlist_t *);
-extern void fm_panic(const char *, ...);
-extern void fm_banner(void);
+extern void zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
+extern void zfs_zevent_drain_all(int *);
+extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
+extern void zfs_zevent_fd_rele(int);
+extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *);
+extern int zfs_zevent_wait(zfs_zevent_t *);
+extern void zfs_zevent_init(zfs_zevent_t **);
+extern void zfs_zevent_destroy(zfs_zevent_t *);
-extern void fm_ereport_dump(void);
-extern void fm_ereport_post(nvlist_t *, int);
+#else
-extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int);
+static inline void fm_init(void) { }
+static inline void fm_fini(void) { }
-extern int is_fm_panic();
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/module/zfs/include/sys/zfs_context.h b/module/zfs/include/sys/zfs_context.h
index 558e9e188..af9275b19 100644
--- a/module/zfs/include/sys/zfs_context.h
+++ b/module/zfs/include/sys/zfs_context.h
@@ -58,14 +58,9 @@ extern "C" {
#include <sys/zone.h>
#include <sys/uio.h>
#include <sys/zfs_debug.h>
-#include <sys/sysevent.h>
-#include <sys/sysevent/eventdefs.h>
-#include <sys/sysevent/dev.h>
-#include <sys/fm/util.h>
+#include <sys/fm/fs/zfs.h>
#include <sys/sunddi.h>
-#define CPU_SEQID (CPU->cpu_seqid)
-
#ifdef __cplusplus
}
#endif
diff --git a/module/zfs/include/sys/zfs_ioctl.h b/module/zfs/include/sys/zfs_ioctl.h
index 84bf794fe..ad41561ad 100644
--- a/module/zfs/include/sys/zfs_ioctl.h
+++ b/module/zfs/include/sys/zfs_ioctl.h
@@ -236,6 +236,9 @@ typedef struct zinject_record {
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
+#define ZEVENT_NONBLOCK 0x1
+#define ZEVENT_SIZE 1024
+
typedef struct zfs_share {
uint64_t z_exportdata;
uint64_t z_sharedata;
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 606138a3e..e037f4133 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1293,8 +1293,9 @@ spa_check_removed(vdev_t *vd)
spa_check_removed(vd->vdev_child[c]);
if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
- zfs_post_autoreplace(vd->vdev_spa, vd);
- spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
+ zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE,
+ vd->vdev_spa, vd, NULL, 0, 0);
+ spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK);
}
}
@@ -3639,7 +3640,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
}
}
- spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
+ spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY);
if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
spa_unload(spa);
@@ -3970,7 +3971,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
if (newvd->vdev_isspare) {
spa_spare_activate(newvd);
- spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
+ spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE);
}
oldvdpath = spa_strdup(oldvd->vdev_path);
@@ -4002,7 +4003,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
spa_strfree(newvdpath);
if (spa->spa_bootfs)
- spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
+ spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH);
return (0);
}
@@ -4203,7 +4204,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
vd->vdev_detached = B_TRUE;
vdev_dirty(tvd, VDD_DTL, vd, txg);
- spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
+ spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE);
/* hang on to the spa before we release the lock */
spa_open_ref(spa, FTAG);
@@ -5034,9 +5035,6 @@ spa_async_probe(spa_t *spa, vdev_t *vd)
static void
spa_async_autoexpand(spa_t *spa, vdev_t *vd)
{
- sysevent_id_t eid;
- nvlist_t *attr;
- char *physpath;
int c;
if (!spa->spa_autoexpand)
@@ -5050,17 +5048,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
return;
- physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
- (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
-
- VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
-
- (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
- ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
-
- nvlist_free(attr);
- kmem_free(physpath, MAXPATHLEN);
+ spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND);
}
static void
@@ -5858,8 +5846,7 @@ spa_has_active_shared_spare(spa_t *spa)
}
/*
- * Post a sysevent corresponding to the given event. The 'name' must be one of
- * the event definitions in sys/sysevent/eventdefs.h. The payload will be
+ * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h. The payload will be
* filled in from the spa and (optionally) the vdev. This doesn't do anything
* in the userland libzpool, as we don't want consumers to misinterpret ztest
* or zdb as real changes.
@@ -5868,49 +5855,6 @@ void
spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
{
#ifdef _KERNEL
- sysevent_t *ev;
- sysevent_attr_list_t *attr = NULL;
- sysevent_value_t value;
- sysevent_id_t eid;
-
- ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
- SE_SLEEP);
-
- value.value_type = SE_DATA_TYPE_STRING;
- value.value.sv_string = spa_name(spa);
- if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
- goto done;
-
- value.value_type = SE_DATA_TYPE_UINT64;
- value.value.sv_uint64 = spa_guid(spa);
- if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
- goto done;
-
- if (vd) {
- value.value_type = SE_DATA_TYPE_UINT64;
- value.value.sv_uint64 = vd->vdev_guid;
- if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
- SE_SLEEP) != 0)
- goto done;
-
- if (vd->vdev_path) {
- value.value_type = SE_DATA_TYPE_STRING;
- value.value.sv_string = vd->vdev_path;
- if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
- &value, SE_SLEEP) != 0)
- goto done;
- }
- }
-
- if (sysevent_attach_attributes(ev, attr) != 0)
- goto done;
- attr = NULL;
-
- (void) log_sysevent(ev, SE_SLEEP, &eid);
-
-done:
- if (attr)
- sysevent_free_attr(attr);
- sysevent_free(ev);
+ zfs_ereport_post(name, spa, vd, NULL, 0, 0);
#endif
}
diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c
index 69d57f66d..1cf3950d4 100644
--- a/module/zfs/spa_config.c
+++ b/module/zfs/spa_config.c
@@ -258,7 +258,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
spa_config_generation++;
if (postsysevent)
- spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
+ spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
}
/*
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 32ef51db1..4027d0f4f 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -40,6 +40,7 @@
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_prop.h>
+#include <sys/fm/util.h>
#include <sys/dsl_scan.h>
#include <sys/fs/zfs.h>
#include <sys/metaslab_impl.h>
@@ -1540,6 +1541,7 @@ spa_init(int mode)
spa_mode_global = mode;
+ fm_init();
refcount_init();
unique_init();
zio_init();
@@ -1565,6 +1567,7 @@ spa_fini(void)
zio_fini();
unique_fini();
refcount_fini();
+ fm_fini();
avl_destroy(&spa_namespace_avl);
avl_destroy(&spa_spare_avl);
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 17b45b0e8..4613e951a 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2388,7 +2388,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
spa_async_request(spa, SPA_ASYNC_RESILVER);
- spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR);
+ spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR);
}
/*
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index 0b4812666..c93057e8e 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -99,6 +99,16 @@
*/
#ifdef _KERNEL
static void
+zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
+{
+ if (nvl)
+ fm_nvlist_destroy(nvl, FM_NVA_FREE);
+
+ if (detector)
+ fm_nvlist_destroy(detector, FM_NVA_FREE);
+}
+
+static void
zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
uint64_t stateoroffset, uint64_t size)
@@ -410,7 +420,7 @@ update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
* to the new smallest gap, to prepare for our next invocation.
*/
static void
-shrink_ranges(zfs_ecksum_info_t *eip)
+zei_shrink_ranges(zfs_ecksum_info_t *eip)
{
uint32_t mingap = UINT32_MAX;
uint32_t new_allowed_gap = eip->zei_mingap + 1;
@@ -429,12 +439,13 @@ shrink_ranges(zfs_ecksum_info_t *eip)
uint32_t end = r[idx].zr_end;
while (idx < max - 1) {
- idx++;
+ uint32_t nstart, nend, gap;
- uint32_t nstart = r[idx].zr_start;
- uint32_t nend = r[idx].zr_end;
+ idx++;
+ nstart = r[idx].zr_start;
+ nend = r[idx].zr_end;
- uint32_t gap = nstart - end;
+ gap = nstart - end;
if (gap < new_allowed_gap) {
end = nend;
continue;
@@ -454,13 +465,13 @@ shrink_ranges(zfs_ecksum_info_t *eip)
}
static void
-add_range(zfs_ecksum_info_t *eip, int start, int end)
+zei_add_range(zfs_ecksum_info_t *eip, int start, int end)
{
struct zei_ranges *r = eip->zei_ranges;
size_t count = eip->zei_range_count;
if (count >= MAX_RANGES) {
- shrink_ranges(eip);
+ zei_shrink_ranges(eip);
count = eip->zei_range_count;
}
if (count == 0) {
@@ -482,7 +493,7 @@ add_range(zfs_ecksum_info_t *eip, int start, int end)
}
static size_t
-range_total_size(zfs_ecksum_info_t *eip)
+zei_range_total_size(zfs_ecksum_info_t *eip)
{
struct zei_ranges *r = eip->zei_ranges;
size_t count = eip->zei_range_count;
@@ -559,7 +570,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
if (start == -1)
continue;
- add_range(eip, start, idx);
+ zei_add_range(eip, start, idx);
start = -1;
} else {
if (start != -1)
@@ -569,10 +580,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
}
}
if (start != -1)
- add_range(eip, start, idx);
+ zei_add_range(eip, start, idx);
/* See if it will fit in our inline buffers */
- inline_size = range_total_size(eip);
+ inline_size = zei_range_total_size(eip);
if (inline_size > ZFM_MAX_INLINE)
no_inline = 1;
@@ -675,10 +686,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
if (ereport == NULL)
return;
- fm_ereport_post(ereport, EVCH_SLEEP);
-
- fm_nvlist_destroy(ereport, FM_NVA_FREE);
- fm_nvlist_destroy(detector, FM_NVA_FREE);
+ /* Cleanup is handled by the callback function */
+ zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
#endif
}
@@ -730,12 +739,10 @@ zfs_ereport_finish_checksum(zio_cksum_report_t *report,
good_data, bad_data, report->zcr_length, drop_if_identical);
if (info != NULL)
- fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
+ zfs_zevent_post(report->zcr_ereport,
+ report->zcr_detector, zfs_zevent_post_cb);
- fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE);
- fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE);
report->zcr_ereport = report->zcr_detector = NULL;
-
if (info != NULL)
kmem_free(info, sizeof (*info));
#endif
@@ -764,7 +771,7 @@ void
zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
{
#ifdef _KERNEL
- fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
+ zfs_zevent_post(report->zcr_ereport, report->zcr_detector, NULL);
#endif
}
@@ -787,14 +794,10 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
B_FALSE);
- if (info != NULL)
- fm_ereport_post(ereport, EVCH_SLEEP);
-
- fm_nvlist_destroy(ereport, FM_NVA_FREE);
- fm_nvlist_destroy(detector, FM_NVA_FREE);
-
- if (info != NULL)
+ if (info != NULL) {
+ zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
kmem_free(info, sizeof (*info));
+ }
#endif
}
@@ -817,13 +820,14 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0);
VERIFY(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0);
- if (vd)
+ if (vd) {
VERIFY(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0);
+ VERIFY(nvlist_add_uint64(resource,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0);
+ }
- fm_ereport_post(resource, EVCH_SLEEP);
-
- fm_nvlist_destroy(resource, FM_NVA_FREE);
+ zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
#endif
}
@@ -836,7 +840,7 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
void
zfs_post_remove(spa_t *spa, vdev_t *vd)
{
- zfs_post_common(spa, vd, FM_RESOURCE_REMOVED);
+ zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED);
}
/*
@@ -847,7 +851,7 @@ zfs_post_remove(spa_t *spa, vdev_t *vd)
void
zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
{
- zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE);
+ zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE);
}
/*
@@ -859,5 +863,13 @@ zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
void
zfs_post_state_change(spa_t *spa, vdev_t *vd)
{
- zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE);
+ zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_ereport_post);
+EXPORT_SYMBOL(zfs_ereport_post_checksum);
+EXPORT_SYMBOL(zfs_post_remove);
+EXPORT_SYMBOL(zfs_post_autoreplace);
+EXPORT_SYMBOL(zfs_post_state_change);
+#endif /* _KERNEL */
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 3e149ab33..bcafcfbf6 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -1798,7 +1798,7 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc)
* local property values.
*/
static int
-zfs_ioc_objset_recvd_props(struct file *filp, zfs_cmd_t *zc)
+zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
{
objset_t *os = NULL;
int error;
@@ -4627,6 +4627,67 @@ zfs_ioc_get_holds(zfs_cmd_t *zc)
}
/*
+ * inputs:
+ * zc_guid flags (ZEVENT_NONBLOCK)
+ *
+ * outputs:
+ * zc_nvlist_dst next nvlist event
+ * zc_cookie dropped events since last get
+ * zc_cleanup_fd cleanup-on-exit file descriptor
+ */
+static int
+zfs_ioc_events_next(zfs_cmd_t *zc)
+{
+ zfs_zevent_t *ze;
+ nvlist_t *event = NULL;
+ minor_t minor;
+ uint64_t dropped = 0;
+ int error;
+
+ error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+ if (error != 0)
+ return (error);
+
+ do {
+ error = zfs_zevent_next(ze, &event, &dropped);
+ if (event != NULL) {
+ zc->zc_cookie = dropped;
+ error = put_nvlist(zc, event);
+ nvlist_free(event);
+ }
+
+ if (zc->zc_guid & ZEVENT_NONBLOCK)
+ break;
+
+ if ((error == 0) || (error != ENOENT))
+ break;
+
+ error = zfs_zevent_wait(ze);
+ if (error)
+ break;
+ } while (1);
+
+ zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+
+ return (error);
+}
+
+/*
+ * outputs:
+ * zc_cookie cleared events count
+ */
+static int
+zfs_ioc_events_clear(zfs_cmd_t *zc)
+{
+ int count;
+
+ zfs_zevent_drain_all(&count);
+ zc->zc_cookie = count;
+
+ return 0;
+}
+
+/*
* pool create, destroy, and export don't log the history as part of
* zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
* do the logging of those commands.
@@ -4747,7 +4808,11 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
- POOL_CHECK_SUSPENDED }
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
};
int