diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dsl_scan.c | 9 | ||||
-rw-r--r-- | module/zfs/fm.c | 844 | ||||
-rw-r--r-- | module/zfs/include/sys/fm/fs/zfs.h | 19 | ||||
-rw-r--r-- | module/zfs/include/sys/fm/protocol.h | 10 | ||||
-rw-r--r-- | module/zfs/include/sys/fm/util.h | 42 | ||||
-rw-r--r-- | module/zfs/include/sys/zfs_context.h | 7 | ||||
-rw-r--r-- | module/zfs/include/sys/zfs_ioctl.h | 3 | ||||
-rw-r--r-- | module/zfs/spa.c | 76 | ||||
-rw-r--r-- | module/zfs/spa_config.c | 2 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 3 | ||||
-rw-r--r-- | module/zfs/vdev.c | 2 | ||||
-rw-r--r-- | module/zfs/zfs_fm.c | 80 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 69 |
13 files changed, 678 insertions, 488 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 525832b45..c37a8224c 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -50,9 +50,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); -static scan_cb_t dsl_scan_defrag_cb; static scan_cb_t dsl_scan_scrub_cb; -static scan_cb_t dsl_scan_remove_cb; static dsl_syncfunc_t dsl_scan_cancel_sync; static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); @@ -194,9 +192,9 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (vdev_resilver_needed(spa->spa_root_vdev, &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { - spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START); } else { - spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START); } spa->spa_scrub_started = B_TRUE; @@ -297,7 +295,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE); if (complete) { spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ? - ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); + FM_EREPORT_ZFS_RESILVER_FINISH : + FM_EREPORT_ZFS_SCRUB_FINISH); } spa_errlog_rotate(spa); diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 4efcff4f4..67d0c1a6e 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -53,51 +53,46 @@ #include <sys/types.h> #include <sys/time.h> -#include <sys/sysevent.h> -#include <sys/sysevent_impl.h> +#include <sys/list.h> #include <sys/nvpair.h> #include <sys/cmn_err.h> -#include <sys/cpuvar.h> #include <sys/sysmacros.h> -#include <sys/systm.h> -#include <sys/ddifm.h> -#include <sys/ddifm_impl.h> -#include <sys/spl.h> -#include <sys/dumphdr.h> #include <sys/compress.h> -#include <sys/cpuvar.h> -#include <sys/console.h> -#include <sys/panic.h> -#include <sys/kobj.h> #include <sys/sunddi.h> #include <sys/systeminfo.h> -#include <sys/sysevent/eventdefs.h> #include <sys/fm/util.h> #include <sys/fm/protocol.h> +#include <sys/kstat.h> +#include <sys/zfs_context.h> +#ifdef _KERNEL +#include <sys/atomic.h> +#include <sys/condvar.h> +#include <sys/cpuvar.h> +#include <sys/systm.h> +#include <sys/dumphdr.h> +#include <sys/cpuvar.h> +#include <sys/console.h> +#include <sys/kobj.h> +#include <sys/time.h> +#include <sys/zfs_ioctl.h> -/* - * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These - * values must be kept in sync with the FMA source code in usr/src/cmd/fm. - */ -static const char *fm_url = "http://www.sun.com/msg"; -static const char *fm_msgid = "SUNOS-8000-0G"; -static char *volatile fm_panicstr = NULL; +int zevent_len_max = 0; +int zevent_cols = 80; +int zevent_console = 0; -errorq_t *ereport_errorq; -void *ereport_dumpbuf; -size_t ereport_dumplen; +static int zevent_len_cur = 0; +static int zevent_waiters = 0; +static int zevent_flags = 0; -static uint_t ereport_chanlen = ERPT_EVCH_MAX; -static evchan_t *ereport_chan = NULL; -static ulong_t ereport_qlen = 0; -static size_t ereport_size = 0; -static int ereport_cols = 80; +static kmutex_t zevent_lock; +static list_t zevent_list; +static kcondvar_t zevent_cv; +#endif /* _KERNEL */ extern void fastreboot_disable_highpil(void); /* - * Common fault management kstats to record ereport generation - * failures + * Common fault management kstats to record event generation failures */ struct erpt_kstat { @@ -114,57 +109,9 @@ static struct erpt_kstat erpt_kstat_data = { { "payload-set-failed", KSTAT_DATA_UINT64 } }; -/*ARGSUSED*/ -static void -fm_drain(void *private, void *data, errorq_elem_t *eep) -{ - nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep); - - if (!panicstr) - (void) fm_ereport_post(nvl, EVCH_TRYHARD); - else - fm_nvprint(nvl); -} - -void -fm_init(void) -{ - kstat_t *ksp; +kstat_t *fm_ksp; - (void) sysevent_evc_bind(FM_ERROR_CHAN, - &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND); - - (void) sysevent_evc_control(ereport_chan, - EVCH_SET_CHAN_LEN, &ereport_chanlen); - - if (ereport_qlen == 0) - ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4); - - if (ereport_size == 0) - ereport_size = ERPT_DATA_SZ; - - ereport_errorq = errorq_nvcreate("fm_ereport_queue", - (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size, - FM_ERR_PIL, ERRORQ_VITAL); - if (ereport_errorq == NULL) - panic("failed to create required ereport error queue"); - - ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP); - ereport_dumplen = ereport_size; - - /* Initialize ereport allocation and generation kstats */ - ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED, - sizeof (struct erpt_kstat) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - - if (ksp != NULL) { - ksp->ks_data = &erpt_kstat_data; - kstat_install(ksp); - } else { - cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); - - } -} +#ifdef _KERNEL /* * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of @@ -183,7 +130,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...) va_end(ap); if (c + width >= cols) { - console_printf("\n\r"); + console_printf("\n"); c = 0; if (format[0] != ' ' && depth > 0) { console_printf(" "); @@ -245,54 +192,54 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) case DATA_TYPE_BYTE: (void) nvpair_value_byte(nvp, &i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_INT8: (void) nvpair_value_int8(nvp, (void *)&i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_UINT8: (void) nvpair_value_uint8(nvp, &i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_INT16: (void) nvpair_value_int16(nvp, (void *)&i16); - c = fm_printf(d + 1, c, cols, "%x", i16); + c = fm_printf(d + 1, c, cols, "0x%x", i16); break; case DATA_TYPE_UINT16: (void) nvpair_value_uint16(nvp, &i16); - c = fm_printf(d + 1, c, cols, "%x", i16); + c = fm_printf(d + 1, c, cols, "0x%x", i16); break; case DATA_TYPE_INT32: (void) nvpair_value_int32(nvp, (void *)&i32); - c = fm_printf(d + 1, c, cols, "%x", i32); + c = fm_printf(d + 1, c, cols, "0x%x", i32); break; case DATA_TYPE_UINT32: (void) nvpair_value_uint32(nvp, &i32); - c = fm_printf(d + 1, c, cols, "%x", i32); + c = fm_printf(d + 1, c, cols, "0x%x", i32); break; case DATA_TYPE_INT64: (void) nvpair_value_int64(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; case DATA_TYPE_UINT64: (void) nvpair_value_uint64(nvp, &i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; case DATA_TYPE_HRTIME: (void) nvpair_value_hrtime(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; @@ -322,19 +269,124 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) } break; + case DATA_TYPE_INT8_ARRAY: { + int8_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT16_ARRAY: { + int16_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT32_ARRAY: { + int32_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT64_ARRAY: { + int64_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_STRING_ARRAY: case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_BYTE_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - case DATA_TYPE_STRING_ARRAY: c = fm_printf(d + 1, c, cols, "[...]"); break; + case DATA_TYPE_UNKNOWN: c = fm_printf(d + 1, c, cols, "<unknown>"); break; @@ -350,191 +402,255 @@ fm_nvprint(nvlist_t *nvl) char *class; int c = 0; - console_printf("\r"); + console_printf("\n"); if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0) - c = fm_printf(0, c, ereport_cols, "%s", class); + c = fm_printf(0, c, zevent_cols, "%s", class); - if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0) + if (fm_nvprintr(nvl, 0, c, zevent_cols) != 0) console_printf("\n"); console_printf("\n"); } -/* - * Wrapper for panic() that first produces an FMA-style message for admins. - * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this - * is the one exception to that rule and the only error that gets messaged. - * This function is intended for use by subsystems that have detected a fatal - * error and enqueued appropriate ereports and wish to then force a panic. - */ -/*PRINTFLIKE1*/ +static zevent_t * +zfs_zevent_alloc(void) +{ + zevent_t *ev; + + ev = kmem_zalloc(sizeof(zevent_t), KM_SLEEP); + if (ev == NULL) + return NULL; + + list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t), + offsetof(zfs_zevent_t, ze_node)); + list_link_init(&ev->ev_node); + + return ev; +} + +static void +zfs_zevent_free(zevent_t *ev) +{ + /* Run provided cleanup callback */ + ev->ev_cb(ev->ev_nvl, ev->ev_detector); + + list_destroy(&ev->ev_ze_list); + kmem_free(ev, sizeof(zevent_t)); +} + +static void +zfs_zevent_drain(zevent_t *ev) +{ + zfs_zevent_t *ze; + + ASSERT(MUTEX_HELD(&zevent_lock)); + list_remove(&zevent_list, ev); + + /* Remove references to this event in all private file data */ + while ((ze = list_head(&ev->ev_ze_list)) != NULL) { + list_remove(&ev->ev_ze_list, ze); + ze->ze_zevent = NULL; + ze->ze_dropped++; + } + + zfs_zevent_free(ev); +} + void -fm_panic(const char *format, ...) +zfs_zevent_drain_all(int *count) { - va_list ap; + zevent_t *ev; - (void) casptr((void *)&fm_panicstr, NULL, (void *)format); -#if defined(__i386) || defined(__amd64) - fastreboot_disable_highpil(); -#endif /* __i386 || __amd64 */ - va_start(ap, format); - vpanic(format, ap); - va_end(ap); + mutex_enter(&zevent_lock); + while ((ev = list_head(&zevent_list)) != NULL) + zfs_zevent_drain(ev); + + *count = zevent_len_cur; + zevent_len_cur = 0; + mutex_exit(&zevent_lock); } /* - * Simply tell the caller if fm_panicstr is set, ie. an fma event has - * caused the panic. If so, something other than the default panic - * diagnosis method will diagnose the cause of the panic. + * New zevents are inserted at the head. If the maximum queue + * length is exceeded a zevent will be drained from the tail. + * As part of this any user space processes which currently have + * a reference to this zevent_t in their private data will have + * this reference set to NULL. */ -int -is_fm_panic() +static void +zfs_zevent_insert(zevent_t *ev) { - if (fm_panicstr) - return (1); + mutex_enter(&zevent_lock); + list_insert_head(&zevent_list, ev); + if (zevent_len_cur >= zevent_len_max) + zfs_zevent_drain(list_tail(&zevent_list)); else - return (0); + zevent_len_cur++; + + mutex_exit(&zevent_lock); } /* - * Print any appropriate FMA banner message before the panic message. This - * function is called by panicsys() and prints the message for fm_panic(). - * We print the message here so that it comes after the system is quiesced. - * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix). - * The rest of the message is for the console only and not needed in the log, - * so it is printed using console_printf(). We break it up into multiple - * chunks so as to avoid overflowing any small legacy prom_printf() buffers. + * Post a zevent */ void -fm_banner(void) +zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) { - timespec_t tod; - hrtime_t now; + int64_t tv_array[2]; + timestruc_t tv; + size_t nvl_size = 0; + zevent_t *ev; - if (!fm_panicstr) - return; /* panic was not initiated by fm_panic(); do nothing */ + gethrestime(&tv); + tv_array[0] = tv.tv_sec; + tv_array[1] = tv.tv_nsec; + if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } - if (panicstr) { - tod = panic_hrestime; - now = panic_hrtime; - } else { - gethrestime(&tod); - now = gethrtime_waitfree(); + (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); + if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; } - cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, " - "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid); + if (zevent_console) + fm_nvprint(nvl); - console_printf( -"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n" -"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n", - fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now); + ev = zfs_zevent_alloc(); + if (ev == NULL) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; + } - console_printf( -"PLATFORM: %s, CSN: -, HOSTNAME: %s\n" -"SOURCE: %s, REV: %s %s\n", - platform, utsname.nodename, utsname.sysname, - utsname.release, utsname.version); + ev->ev_nvl = nvl; + ev->ev_detector = detector; + ev->ev_cb = cb; + zfs_zevent_insert(ev); + cv_broadcast(&zevent_cv); +} - console_printf( -"DESC: Errors have been detected that require a reboot to ensure system\n" -"integrity. See %s/%s for more information.\n", - fm_url, fm_msgid); +static int +zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) +{ + *ze = zfsdev_get_state(minor, ZST_ZEVENT); + if (*ze == NULL) + return (EBADF); - console_printf( -"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n" -"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n" -"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n"); + return (0); +} - console_printf("\n"); +int +zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) +{ + file_t *fp; + int error; + + fp = getf(fd); + if (fp == NULL) + return (EBADF); + + *minorp = zfsdev_getminor(fp->f_file); + error = zfs_zevent_minor_to_state(*minorp, ze); + + if (error) + zfs_zevent_fd_rele(fd); + + return (error); +} + +void +zfs_zevent_fd_rele(int fd) +{ + releasef(fd); } /* - * Utility function to write all of the pending ereports to the dump device. - * This function is called at either normal reboot or panic time, and simply - * iterates over the in-transit messages in the ereport sysevent channel. + * Get the next zevent in the stream and place a copy in 'event'. */ -void -fm_ereport_dump(void) +int +zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *dropped) { - evchanq_t *chq; - sysevent_t *sep; - erpt_dump_t ed; - - timespec_t tod; - hrtime_t now; - char *buf; - size_t len; - - if (panicstr) { - tod = panic_hrestime; - now = panic_hrtime; + zevent_t *ev; + int error; + + mutex_enter(&zevent_lock); + if (ze->ze_zevent == NULL) { + /* New stream start at the beginning/tail */ + ev = list_tail(&zevent_list); + if (ev == NULL) { + error = ENOENT; + goto out; + } } else { - if (ereport_errorq != NULL) - errorq_drain(ereport_errorq); - gethrestime(&tod); - now = gethrtime_waitfree(); + /* Existing stream continue with the next element and remove + * ourselves from the wait queue for the previous element */ + ev = list_prev(&zevent_list, ze->ze_zevent); + if (ev == NULL) { + error = ENOENT; + goto out; + } + + list_remove(&ze->ze_zevent->ev_ze_list, ze); } - /* - * In the panic case, sysevent_evc_walk_init() will return NULL. - */ - if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL && - !panicstr) - return; /* event channel isn't initialized yet */ + ze->ze_zevent = ev; + list_insert_head(&ev->ev_ze_list, ze); + nvlist_dup(ev->ev_nvl, event, KM_SLEEP); + *dropped = ze->ze_dropped; + ze->ze_dropped = 0; +out: + mutex_exit(&zevent_lock); - while ((sep = sysevent_evc_walk_step(chq)) != NULL) { - if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL) - break; + return error; +} + +int +zfs_zevent_wait(zfs_zevent_t *ze) +{ + int error = 0; + + mutex_enter(&zevent_lock); - ed.ed_magic = ERPT_MAGIC; - ed.ed_chksum = checksum32(buf, len); - ed.ed_size = (uint32_t)len; - ed.ed_pad = 0; - ed.ed_hrt_nsec = SE_TIME(sep); - ed.ed_hrt_base = now; - ed.ed_tod_base.sec = tod.tv_sec; - ed.ed_tod_base.nsec = tod.tv_nsec; - - dumpvp_write(&ed, sizeof (ed)); - dumpvp_write(buf, len); + if (zevent_flags & ZEVENT_SHUTDOWN) { + error = ESHUTDOWN; + goto out; } - sysevent_evc_walk_fini(chq); + zevent_waiters++; + cv_wait_interruptible(&zevent_cv, &zevent_lock); + if (issig(JUSTLOOKING)) + error = EINTR; + + zevent_waiters--; +out: + mutex_exit(&zevent_lock); + + return error; } -/* - * Post an error report (ereport) to the sysevent error channel. The error - * channel must be established with a prior call to sysevent_evc_create() - * before publication may occur. - */ void -fm_ereport_post(nvlist_t *ereport, int evc_flag) +zfs_zevent_init(zfs_zevent_t **zep) { - size_t nvl_size = 0; - evchan_t *error_chan; + zfs_zevent_t *ze; - (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE); - if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - return; - } + ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); + list_link_init(&ze->ze_node); +} - if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan, - EVCH_CREAT|EVCH_HOLD_PEND) != 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - return; - } +void +zfs_zevent_destroy(zfs_zevent_t *ze) +{ + mutex_enter(&zevent_lock); + if (ze->ze_zevent) + list_remove(&ze->ze_zevent->ev_ze_list, ze); + mutex_exit(&zevent_lock); - if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR, - SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - (void) sysevent_evc_unbind(error_chan); - return; - } - (void) sysevent_evc_unbind(error_chan); + kmem_free(ze, sizeof (zfs_zevent_t)); } +#endif /* _KERNEL */ /* * Wrapppers for FM nvlist allocators @@ -938,6 +1054,105 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, } } +void +fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, + nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); + nvlist_t *pairs[HC_MAXPAIRS]; + nvlist_t **hcl; + uint_t n; + int i, j; + va_list ap; + char *hcname, *hcid; + + if (!fm_fmri_hc_set_common(fmri, version, auth)) + return; + + /* + * copy the bboard nvpairs to the pairs array + */ + if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) + != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + for (i = 0; i < n; i++) { + if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, + &hcname) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { + for (j = 0; j <= i; j++) { + if (pairs[j] != NULL) + fm_nvlist_destroy(pairs[j], + FM_NVA_RETAIN); + } + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } + + /* + * create the pairs from passed in pairs + */ + npairs = MIN(npairs, HC_MAXPAIRS); + + va_start(ap, npairs); + for (i = n; i < npairs + n; i++) { + const char *name = va_arg(ap, const char *); + uint32_t id = va_arg(ap, uint32_t); + char idstr[11]; + (void) snprintf(idstr, sizeof (idstr), "%u", id); + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { + for (j = 0; j <= i; j++) { + if (pairs[j] != NULL) + fm_nvlist_destroy(pairs[j], + FM_NVA_RETAIN); + } + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } + va_end(ap); + + /* + * Create the fmri hc list + */ + if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, + npairs + n) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + for (i = 0; i < npairs + n; i++) { + fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); + } + + if (snvl != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } +} + /* * Set-up and validate the members of an dev fmri according to: * @@ -1167,7 +1382,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) ena = (uint64_t)((format & ENA_FORMAT_MASK) | ((cpuid << ENA_FMT1_CPUID_SHFT) & ENA_FMT1_CPUID_MASK) | - ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) & + ((gethrtime() << ENA_FMT1_TIME_SHFT) & ENA_FMT1_TIME_MASK)); } break; @@ -1185,7 +1400,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) uint64_t fm_ena_generate(uint64_t timestamp, uchar_t format) { - return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format)); + return (fm_ena_generate_cpu(timestamp, getcpuid(), format)); } uint64_t @@ -1253,134 +1468,67 @@ fm_ena_time_get(uint64_t ena) return (time); } -/* - * Convert a getpcstack() trace to symbolic name+offset, and add the resulting - * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK. - */ +#ifdef _KERNEL void -fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth) +fm_init(void) { - int i; - char *sym; - ulong_t off; - char *stkpp[FM_STK_DEPTH]; - char buf[FM_STK_DEPTH * FM_SYM_SZ]; - char *stkp = buf; - - for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) { - if ((sym = kobj_getsymname(stack[i], &off)) != NULL) - (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off); - else - (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]); - stkpp[i] = stkp; - } + zevent_len_cur = 0; + zevent_flags = 0; - fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK, - DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL); -} + if (zevent_len_max == 0) + zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4); -void -print_msg_hwerr(ctid_t ct_id, proc_t *p) -{ - uprintf("Killed process %d (%s) in contract id %d " - "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id); + /* Initialize zevent allocation and generation kstats */ + fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, + sizeof (struct erpt_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (fm_ksp != NULL) { + fm_ksp->ks_data = &erpt_kstat_data; + kstat_install(fm_ksp); + } else { + cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); + } + + mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node)); + cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); } void -fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, - nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) +fm_fini(void) { - nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); - nvlist_t *pairs[HC_MAXPAIRS]; - nvlist_t **hcl; - uint_t n; - int i, j; - va_list ap; - char *hcname, *hcid; + int count; - if (!fm_fmri_hc_set_common(fmri, version, auth)) - return; + zfs_zevent_drain_all(&count); + cv_broadcast(&zevent_cv); - /* - * copy the bboard nvpairs to the pairs array - */ - if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) - != 0) { - atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; + mutex_enter(&zevent_lock); + zevent_flags |= ZEVENT_SHUTDOWN; + while (zevent_waiters > 0) { + mutex_exit(&zevent_lock); + schedule(); + mutex_enter(&zevent_lock); } + mutex_exit(&zevent_lock); - for (i = 0; i < n; i++) { - if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, - &hcname) != 0) { - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } - if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } + cv_destroy(&zevent_cv); + list_destroy(&zevent_list); + mutex_destroy(&zevent_lock); - pairs[i] = fm_nvlist_create(nva); - if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || - nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { - for (j = 0; j <= i; j++) { - if (pairs[j] != NULL) - fm_nvlist_destroy(pairs[j], - FM_NVA_RETAIN); - } - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } + if (fm_ksp != NULL) { + kstat_delete(fm_ksp); + fm_ksp = NULL; } +} - /* - * create the pairs from passed in pairs - */ - npairs = MIN(npairs, HC_MAXPAIRS); - - va_start(ap, npairs); - for (i = n; i < npairs + n; i++) { - const char *name = va_arg(ap, const char *); - uint32_t id = va_arg(ap, uint32_t); - char idstr[11]; - (void) snprintf(idstr, sizeof (idstr), "%u", id); - pairs[i] = fm_nvlist_create(nva); - if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || - nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { - for (j = 0; j <= i; j++) { - if (pairs[j] != NULL) - fm_nvlist_destroy(pairs[j], - FM_NVA_RETAIN); - } - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } - } - va_end(ap); +module_param(zevent_len_max, int, 0644); +MODULE_PARM_DESC(zevent_len_max, "Maximum event queue length"); - /* - * Create the fmri hc list - */ - if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, - npairs + n) != 0) { - atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } +module_param(zevent_cols, int, 0644); +MODULE_PARM_DESC(zevent_cols, "Maximum event column width"); - for (i = 0; i < npairs + n; i++) { - fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); - } +module_param(zevent_console, int, 0644); +MODULE_PARM_DESC(zevent_console, "Log events to the console"); - if (snvl != NULL) { - if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } - } -} +#endif /* _KERNEL */ diff --git a/module/zfs/include/sys/fm/fs/zfs.h b/module/zfs/include/sys/fm/fs/zfs.h index c752edc99..d5c71d174 100644 --- a/module/zfs/include/sys/fm/fs/zfs.h +++ b/module/zfs/include/sys/fm/fs/zfs.h @@ -35,7 +35,9 @@ extern "C" { #define FM_EREPORT_ZFS_CHECKSUM "checksum" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" +#define FM_EREPORT_ZFS_CONFIG_SYNC "config.sync" #define FM_EREPORT_ZFS_POOL "zpool" +#define FM_EREPORT_ZFS_POOL_DESTROY "zpool.destroy" #define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown" #define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed" #define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data" @@ -43,9 +45,19 @@ extern "C" { #define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum" #define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small" #define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label" +#define FM_EREPORT_ZFS_DEVICE_REMOVE "vdev.remove" +#define FM_EREPORT_ZFS_DEVICE_CLEAR "vdev.clear" +#define FM_EREPORT_ZFS_DEVICE_CHECK "vdev.check" +#define FM_EREPORT_ZFS_DEVICE_SPARE "vdev.spare" +#define FM_EREPORT_ZFS_DEVICE_AUTOEXPAND "vdev.autoexpand" #define FM_EREPORT_ZFS_IO_FAILURE "io_failure" #define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" #define FM_EREPORT_ZFS_LOG_REPLAY "log_replay" +#define FM_EREPORT_ZFS_RESILVER_START "resilver.start" +#define FM_EREPORT_ZFS_RESILVER_FINISH "resilver.finish" +#define FM_EREPORT_ZFS_SCRUB_START "scrub.start" +#define FM_EREPORT_ZFS_SCRUB_FINISH "scrub.finish" +#define FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH "bootfs.vdev.attach" #define FM_EREPORT_PAYLOAD_ZFS_POOL "pool" #define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode" @@ -56,6 +68,7 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path" @@ -85,9 +98,9 @@ extern "C" { #define FM_EREPORT_FAILMODE_CONTINUE "continue" #define FM_EREPORT_FAILMODE_PANIC "panic" -#define FM_RESOURCE_REMOVED "removed" -#define FM_RESOURCE_AUTOREPLACE "autoreplace" -#define FM_RESOURCE_STATECHANGE "statechange" +#define FM_EREPORT_RESOURCE_REMOVED "removed" +#define FM_EREPORT_RESOURCE_AUTOREPLACE "autoreplace" +#define FM_EREPORT_RESOURCE_STATECHANGE "statechange" #ifdef __cplusplus } diff --git a/module/zfs/include/sys/fm/protocol.h b/module/zfs/include/sys/fm/protocol.h index 5eca760da..1ee221286 100644 --- a/module/zfs/include/sys/fm/protocol.h +++ b/module/zfs/include/sys/fm/protocol.h @@ -69,6 +69,7 @@ extern "C" { /* ereport payload member names */ #define FM_EREPORT_DETECTOR "detector" #define FM_EREPORT_ENA "ena" +#define FM_EREPORT_TIME "time" /* list.* event payload member names */ #define FM_LIST_EVENT_SIZE "list-sz" @@ -327,16 +328,13 @@ extern "C" { #define FM_FMRI_SW_CTXT_ZONE "zone" #define FM_FMRI_SW_CTXT_CTID "ctid" #define FM_FMRI_SW_CTXT_STACK "stack" +#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */ +#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */ extern nv_alloc_t *fm_nva_xcreate(char *, size_t); extern void fm_nva_xdestroy(nv_alloc_t *); - extern nvlist_t *fm_nvlist_create(nv_alloc_t *); extern void fm_nvlist_destroy(nvlist_t *, int); - -#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */ -#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */ - extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t, const nvlist_t *, ...); extern void fm_payload_set(nvlist_t *, ...); @@ -350,8 +348,6 @@ extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t, uint8_t *, const char *); extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *, const char *, uint64_t); -extern void fm_authority_set(nvlist_t *, int, const char *, const char *, - const char *, const char *); extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t); extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *, nvlist_t *, int, ...); diff --git a/module/zfs/include/sys/fm/util.h b/module/zfs/include/sys/fm/util.h index 37334101b..94947d67c 100644 --- a/module/zfs/include/sys/fm/util.h +++ b/module/zfs/include/sys/fm/util.h @@ -31,7 +31,6 @@ extern "C" { #endif #include <sys/nvpair.h> -#include <sys/errorq.h> /* * Shared user/kernel definitions for class length, error channel name, @@ -71,29 +70,42 @@ typedef struct erpt_dump { } erpt_dump_t; #ifdef _KERNEL -#include <sys/systm.h> -#define FM_STK_DEPTH 20 /* maximum stack depth */ -#define FM_SYM_SZ 64 /* maximum symbol size */ -#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */ +#define ZEVENT_SHUTDOWN 0x1 -#define FM_EREPORT_PAYLOAD_NAME_STACK "stack" +typedef void zevent_cb_t(nvlist_t *, nvlist_t *); -extern errorq_t *ereport_errorq; -extern void *ereport_dumpbuf; -extern size_t ereport_dumplen; +typedef struct zevent_s { + nvlist_t *ev_nvl; /* protected by the zevent_lock */ + nvlist_t *ev_detector; /* " */ + list_t ev_ze_list; /* " */ + list_node_t ev_node; /* " */ + zevent_cb_t *ev_cb; /* " */ +} zevent_t; + +typedef struct zfs_zevent { + zevent_t *ze_zevent; /* protected by the zevent_lock */ + list_node_t ze_node; /* " */ + uint64_t ze_dropped; /* " */ +} zfs_zevent_t; extern void fm_init(void); +extern void fm_fini(void); extern void fm_nvprint(nvlist_t *); -extern void fm_panic(const char *, ...); -extern void fm_banner(void); +extern void zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *); +extern void zfs_zevent_drain_all(int *); +extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **); +extern void zfs_zevent_fd_rele(int); +extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *); +extern int zfs_zevent_wait(zfs_zevent_t *); +extern void zfs_zevent_init(zfs_zevent_t **); +extern void zfs_zevent_destroy(zfs_zevent_t *); -extern void fm_ereport_dump(void); -extern void fm_ereport_post(nvlist_t *, int); +#else -extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int); +static inline void fm_init(void) { } +static inline void fm_fini(void) { } -extern int is_fm_panic(); #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/module/zfs/include/sys/zfs_context.h b/module/zfs/include/sys/zfs_context.h index 558e9e188..af9275b19 100644 --- a/module/zfs/include/sys/zfs_context.h +++ b/module/zfs/include/sys/zfs_context.h @@ -58,14 +58,9 @@ extern "C" { #include <sys/zone.h> #include <sys/uio.h> #include <sys/zfs_debug.h> -#include <sys/sysevent.h> -#include <sys/sysevent/eventdefs.h> -#include <sys/sysevent/dev.h> -#include <sys/fm/util.h> +#include <sys/fm/fs/zfs.h> #include <sys/sunddi.h> -#define CPU_SEQID (CPU->cpu_seqid) - #ifdef __cplusplus } #endif diff --git a/module/zfs/include/sys/zfs_ioctl.h b/module/zfs/include/sys/zfs_ioctl.h index 84bf794fe..ad41561ad 100644 --- a/module/zfs/include/sys/zfs_ioctl.h +++ b/module/zfs/include/sys/zfs_ioctl.h @@ -236,6 +236,9 @@ typedef struct zinject_record { #define ZINJECT_FLUSH_ARC 0x2 #define ZINJECT_UNLOAD_SPA 0x4 +#define ZEVENT_NONBLOCK 0x1 +#define ZEVENT_SIZE 1024 + typedef struct zfs_share { uint64_t z_exportdata; uint64_t z_sharedata; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 606138a3e..e037f4133 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1293,8 +1293,9 @@ spa_check_removed(vdev_t *vd) spa_check_removed(vd->vdev_child[c]); if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { - zfs_post_autoreplace(vd->vdev_spa, vd); - spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); + zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE, + vd->vdev_spa, vd, NULL, 0, 0); + spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK); } } @@ -3639,7 +3640,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, } } - spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { spa_unload(spa); @@ -3970,7 +3971,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) if (newvd->vdev_isspare) { spa_spare_activate(newvd); - spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); + spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE); } oldvdpath = spa_strdup(oldvd->vdev_path); @@ -4002,7 +4003,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) spa_strfree(newvdpath); if (spa->spa_bootfs) - spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH); + spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH); return (0); } @@ -4203,7 +4204,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) vd->vdev_detached = B_TRUE; vdev_dirty(tvd, VDD_DTL, vd, txg); - spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); + spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE); /* hang on to the spa before we release the lock */ spa_open_ref(spa, FTAG); @@ -5034,9 +5035,6 @@ spa_async_probe(spa_t *spa, vdev_t *vd) static void spa_async_autoexpand(spa_t *spa, vdev_t *vd) { - sysevent_id_t eid; - nvlist_t *attr; - char *physpath; int c; if (!spa->spa_autoexpand) @@ -5050,17 +5048,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd) if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) return; - physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); - - VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); - - (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, - ESC_DEV_DLE, attr, &eid, DDI_SLEEP); - - nvlist_free(attr); - kmem_free(physpath, MAXPATHLEN); + spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND); } static void @@ -5858,8 +5846,7 @@ spa_has_active_shared_spare(spa_t *spa) } /* - * Post a sysevent corresponding to the given event. The 'name' must be one of - * the event definitions in sys/sysevent/eventdefs.h. The payload will be + * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h. The payload will be * filled in from the spa and (optionally) the vdev. This doesn't do anything * in the userland libzpool, as we don't want consumers to misinterpret ztest * or zdb as real changes. @@ -5868,49 +5855,6 @@ void spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) { #ifdef _KERNEL - sysevent_t *ev; - sysevent_attr_list_t *attr = NULL; - sysevent_value_t value; - sysevent_id_t eid; - - ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", - SE_SLEEP); - - value.value_type = SE_DATA_TYPE_STRING; - value.value.sv_string = spa_name(spa); - if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) - goto done; - - value.value_type = SE_DATA_TYPE_UINT64; - value.value.sv_uint64 = spa_guid(spa); - if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) - goto done; - - if (vd) { - value.value_type = SE_DATA_TYPE_UINT64; - value.value.sv_uint64 = vd->vdev_guid; - if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, - SE_SLEEP) != 0) - goto done; - - if (vd->vdev_path) { - value.value_type = SE_DATA_TYPE_STRING; - value.value.sv_string = vd->vdev_path; - if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, - &value, SE_SLEEP) != 0) - goto done; - } - } - - if (sysevent_attach_attributes(ev, attr) != 0) - goto done; - attr = NULL; - - (void) log_sysevent(ev, SE_SLEEP, &eid); - -done: - if (attr) - sysevent_free_attr(attr); - sysevent_free(ev); + zfs_ereport_post(name, spa, vd, NULL, 0, 0); #endif } diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 69d57f66d..1cf3950d4 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -258,7 +258,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) spa_config_generation++; if (postsysevent) - spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); + spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC); } /* diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 32ef51db1..4027d0f4f 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -40,6 +40,7 @@ #include <sys/dsl_pool.h> #include <sys/dsl_dir.h> #include <sys/dsl_prop.h> +#include <sys/fm/util.h> #include <sys/dsl_scan.h> #include <sys/fs/zfs.h> #include <sys/metaslab_impl.h> @@ -1540,6 +1541,7 @@ spa_init(int mode) spa_mode_global = mode; + fm_init(); refcount_init(); unique_init(); zio_init(); @@ -1565,6 +1567,7 @@ spa_fini(void) zio_fini(); unique_fini(); refcount_fini(); + fm_fini(); avl_destroy(&spa_namespace_avl); avl_destroy(&spa_spare_avl); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 17b45b0e8..4613e951a 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2388,7 +2388,7 @@ vdev_clear(spa_t *spa, vdev_t *vd) if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) spa_async_request(spa, SPA_ASYNC_RESILVER); - spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR); + spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR); } /* diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 0b4812666..c93057e8e 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -99,6 +99,16 @@ */ #ifdef _KERNEL static void +zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector) +{ + if (nvl) + fm_nvlist_destroy(nvl, FM_NVA_FREE); + + if (detector) + fm_nvlist_destroy(detector, FM_NVA_FREE); +} + +static void zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, uint64_t stateoroffset, uint64_t size) @@ -410,7 +420,7 @@ update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count) * to the new smallest gap, to prepare for our next invocation. */ static void -shrink_ranges(zfs_ecksum_info_t *eip) +zei_shrink_ranges(zfs_ecksum_info_t *eip) { uint32_t mingap = UINT32_MAX; uint32_t new_allowed_gap = eip->zei_mingap + 1; @@ -429,12 +439,13 @@ shrink_ranges(zfs_ecksum_info_t *eip) uint32_t end = r[idx].zr_end; while (idx < max - 1) { - idx++; + uint32_t nstart, nend, gap; - uint32_t nstart = r[idx].zr_start; - uint32_t nend = r[idx].zr_end; + idx++; + nstart = r[idx].zr_start; + nend = r[idx].zr_end; - uint32_t gap = nstart - end; + gap = nstart - end; if (gap < new_allowed_gap) { end = nend; continue; @@ -454,13 +465,13 @@ shrink_ranges(zfs_ecksum_info_t *eip) } static void -add_range(zfs_ecksum_info_t *eip, int start, int end) +zei_add_range(zfs_ecksum_info_t *eip, int start, int end) { struct zei_ranges *r = eip->zei_ranges; size_t count = eip->zei_range_count; if (count >= MAX_RANGES) { - shrink_ranges(eip); + zei_shrink_ranges(eip); count = eip->zei_range_count; } if (count == 0) { @@ -482,7 +493,7 @@ add_range(zfs_ecksum_info_t *eip, int start, int end) } static size_t -range_total_size(zfs_ecksum_info_t *eip) +zei_range_total_size(zfs_ecksum_info_t *eip) { struct zei_ranges *r = eip->zei_ranges; size_t count = eip->zei_range_count; @@ -559,7 +570,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, if (start == -1) continue; - add_range(eip, start, idx); + zei_add_range(eip, start, idx); start = -1; } else { if (start != -1) @@ -569,10 +580,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, } } if (start != -1) - add_range(eip, start, idx); + zei_add_range(eip, start, idx); /* See if it will fit in our inline buffers */ - inline_size = range_total_size(eip); + inline_size = zei_range_total_size(eip); if (inline_size > ZFM_MAX_INLINE) no_inline = 1; @@ -675,10 +686,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, if (ereport == NULL) return; - fm_ereport_post(ereport, EVCH_SLEEP); - - fm_nvlist_destroy(ereport, FM_NVA_FREE); - fm_nvlist_destroy(detector, FM_NVA_FREE); + /* Cleanup is handled by the callback function */ + zfs_zevent_post(ereport, detector, zfs_zevent_post_cb); #endif } @@ -730,12 +739,10 @@ zfs_ereport_finish_checksum(zio_cksum_report_t *report, good_data, bad_data, report->zcr_length, drop_if_identical); if (info != NULL) - fm_ereport_post(report->zcr_ereport, EVCH_SLEEP); + zfs_zevent_post(report->zcr_ereport, + report->zcr_detector, zfs_zevent_post_cb); - fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE); - fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE); report->zcr_ereport = report->zcr_detector = NULL; - if (info != NULL) kmem_free(info, sizeof (*info)); #endif @@ -764,7 +771,7 @@ void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report) { #ifdef _KERNEL - fm_ereport_post(report->zcr_ereport, EVCH_SLEEP); + zfs_zevent_post(report->zcr_ereport, report->zcr_detector, NULL); #endif } @@ -787,14 +794,10 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, info = annotate_ecksum(ereport, zbc, good_data, bad_data, length, B_FALSE); - if (info != NULL) - fm_ereport_post(ereport, EVCH_SLEEP); - - fm_nvlist_destroy(ereport, FM_NVA_FREE); - fm_nvlist_destroy(detector, FM_NVA_FREE); - - if (info != NULL) + if (info != NULL) { + zfs_zevent_post(ereport, detector, zfs_zevent_post_cb); kmem_free(info, sizeof (*info)); + } #endif } @@ -817,13 +820,14 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); VERIFY(nvlist_add_uint64(resource, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); - if (vd) + if (vd) { VERIFY(nvlist_add_uint64(resource, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); + VERIFY(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0); + } - fm_ereport_post(resource, EVCH_SLEEP); - - fm_nvlist_destroy(resource, FM_NVA_FREE); + zfs_zevent_post(resource, NULL, zfs_zevent_post_cb); #endif } @@ -836,7 +840,7 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) void zfs_post_remove(spa_t *spa, vdev_t *vd) { - zfs_post_common(spa, vd, FM_RESOURCE_REMOVED); + zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED); } /* @@ -847,7 +851,7 @@ zfs_post_remove(spa_t *spa, vdev_t *vd) void zfs_post_autoreplace(spa_t *spa, vdev_t *vd) { - zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE); + zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE); } /* @@ -859,5 +863,13 @@ zfs_post_autoreplace(spa_t *spa, vdev_t *vd) void zfs_post_state_change(spa_t *spa, vdev_t *vd) { - zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE); + zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_ereport_post); +EXPORT_SYMBOL(zfs_ereport_post_checksum); +EXPORT_SYMBOL(zfs_post_remove); +EXPORT_SYMBOL(zfs_post_autoreplace); +EXPORT_SYMBOL(zfs_post_state_change); +#endif /* _KERNEL */ diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 3e149ab33..bcafcfbf6 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1798,7 +1798,7 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc) * local property values. */ static int -zfs_ioc_objset_recvd_props(struct file *filp, zfs_cmd_t *zc) +zfs_ioc_objset_recvd_props(zfs_cmd_t *zc) { objset_t *os = NULL; int error; @@ -4627,6 +4627,67 @@ zfs_ioc_get_holds(zfs_cmd_t *zc) } /* + * inputs: + * zc_guid flags (ZEVENT_NONBLOCK) + * + * outputs: + * zc_nvlist_dst next nvlist event + * zc_cookie dropped events since last get + * zc_cleanup_fd cleanup-on-exit file descriptor + */ +static int +zfs_ioc_events_next(zfs_cmd_t *zc) +{ + zfs_zevent_t *ze; + nvlist_t *event = NULL; + minor_t minor; + uint64_t dropped = 0; + int error; + + error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze); + if (error != 0) + return (error); + + do { + error = zfs_zevent_next(ze, &event, &dropped); + if (event != NULL) { + zc->zc_cookie = dropped; + error = put_nvlist(zc, event); + nvlist_free(event); + } + + if (zc->zc_guid & ZEVENT_NONBLOCK) + break; + + if ((error == 0) || (error != ENOENT)) + break; + + error = zfs_zevent_wait(ze); + if (error) + break; + } while (1); + + zfs_zevent_fd_rele(zc->zc_cleanup_fd); + + return (error); +} + +/* + * outputs: + * zc_cookie cleared events count + */ +static int +zfs_ioc_events_clear(zfs_cmd_t *zc) +{ + int count; + + zfs_zevent_drain_all(&count); + zc->zc_cookie = count; + + return 0; +} + +/* * pool create, destroy, and export don't log the history as part of * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export * do the logging of those commands. @@ -4747,7 +4808,11 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED } + POOL_CHECK_SUSPENDED }, + { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, }; int |