aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config/kernel-declare-event-class.m459
-rw-r--r--config/kernel.m432
-rw-r--r--include/sys/Makefile.am2
-rw-r--r--include/sys/sdt.h70
-rw-r--r--include/sys/trace.h1038
-rw-r--r--include/sys/zfs_context.h12
-rw-r--r--include/sys/zfs_debug.h42
-rw-r--r--lib/libspl/include/sys/Makefile.am1
-rw-r--r--lib/libzpool/Makefile.am1
-rw-r--r--module/zfs/Makefile.in1
-rw-r--r--module/zfs/spa_misc.c67
-rw-r--r--module/zfs/trace.c (renamed from lib/libspl/include/sys/sdt.h)31
-rw-r--r--module/zfs/zfs_debug.c116
-rw-r--r--module/zfs/zfs_ioctl.c49
-rwxr-xr-xscripts/zfs.sh1
15 files changed, 1361 insertions, 161 deletions
diff --git a/config/kernel-declare-event-class.m4 b/config/kernel-declare-event-class.m4
new file mode 100644
index 000000000..7867d7517
--- /dev/null
+++ b/config/kernel-declare-event-class.m4
@@ -0,0 +1,59 @@
+dnl #
+dnl # Ensure the DECLARE_EVENT_CLASS macro is available to non-GPL modules.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-I\$(src)"
+
+ AC_MSG_CHECKING([whether DECLARE_EVENT_CLASS() is available])
+ ZFS_LINUX_TRY_COMPILE_HEADER([
+ #include <linux/module.h>
+ MODULE_LICENSE(ZFS_META_LICENSE);
+
+ #define CREATE_TRACE_POINTS
+ #include "conftest.h"
+ ],[
+ trace_zfs_autoconf_event_one(1UL);
+ trace_zfs_autoconf_event_two(2UL);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_DECLARE_EVENT_CLASS, 1,
+ [DECLARE_EVENT_CLASS() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ],[
+ #if !defined(_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
+ #define _CONFTEST_H
+
+ #undef TRACE_SYSTEM
+ #define TRACE_SYSTEM zfs
+ #include <linux/tracepoint.h>
+
+ DECLARE_EVENT_CLASS(zfs_autoconf_event_class,
+ TP_PROTO(unsigned long i),
+ TP_ARGS(i),
+ TP_STRUCT__entry(
+ __field(unsigned long, i)
+ ),
+ TP_fast_assign(
+ __entry->i = i;
+ ),
+ TP_printk("i = %lu", __entry->i)
+ );
+
+ #define DEFINE_AUTOCONF_EVENT(name) \
+ DEFINE_EVENT(zfs_autoconf_event_class, name, \
+ TP_PROTO(unsigned long i), \
+ TP_ARGS(i))
+ DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_one);
+ DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_two);
+
+ #endif /* _CONFTEST_H */
+
+ #undef TRACE_INCLUDE_PATH
+ #define TRACE_INCLUDE_PATH .
+ #define TRACE_INCLUDE_FILE conftest
+ #include <trace/define_trace.h>
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 9145dbd89..d8784c9db 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -6,6 +6,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_SPL
ZFS_AC_TEST_MODULE
ZFS_AC_KERNEL_CONFIG
+ ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
ZFS_AC_KERNEL_TYPE_FMODE_T
@@ -506,9 +507,18 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [
])
dnl #
-dnl # ZFS_LINUX_CONFTEST
+dnl # ZFS_LINUX_CONFTEST_H
dnl #
-AC_DEFUN([ZFS_LINUX_CONFTEST], [
+AC_DEFUN([ZFS_LINUX_CONFTEST_H], [
+cat - <<_ACEOF >conftest.h
+$1
+_ACEOF
+])
+
+dnl #
+dnl # ZFS_LINUX_CONFTEST_C
+dnl #
+AC_DEFUN([ZFS_LINUX_CONFTEST_C], [
cat confdefs.h - <<_ACEOF >conftest.c
$1
_ACEOF
@@ -534,13 +544,14 @@ dnl #
dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE
dnl #
AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [
- m4_ifvaln([$1], [ZFS_LINUX_CONFTEST([$1])])
+ m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1])])
+ m4_ifvaln([$6], [ZFS_LINUX_CONFTEST_H([$6])], [ZFS_LINUX_CONFTEST_H([])])
rm -Rf build && mkdir -p build && touch build/conftest.mod.c
echo "obj-m := conftest.o" >build/Makefile
modpost_flag=''
test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
AS_IF(
- [AC_TRY_COMMAND(cp conftest.c build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
+ [AC_TRY_COMMAND(cp conftest.c conftest.h build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
[$4],
[_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])]
)
@@ -627,3 +638,16 @@ AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [
fi
fi
])
+
+dnl #
+dnl # ZFS_LINUX_TRY_COMPILE_HEADER
+dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are
+dnl # provided via the fifth parameter
+dnl #
+AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER],
+ [ZFS_LINUX_COMPILE_IFELSE(
+ [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])],
+ [modules],
+ [test -s build/conftest.o],
+ [$3], [$4], [AC_LANG_SOURCE([$5])])
+])
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index 2c0cadd40..7ddace00d 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -40,11 +40,13 @@ COMMON_H = \
$(top_srcdir)/include/sys/rrwlock.h \
$(top_srcdir)/include/sys/sa.h \
$(top_srcdir)/include/sys/sa_impl.h \
+ $(top_srcdir)/include/sys/sdt.h \
$(top_srcdir)/include/sys/spa_boot.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
+ $(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
$(top_srcdir)/include/sys/u8_textprep_data.h \
diff --git a/include/sys/sdt.h b/include/sys/sdt.h
new file mode 100644
index 000000000..56efa1b39
--- /dev/null
+++ b/include/sys/sdt.h
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H
+
+#ifndef _KERNEL
+
+#define ZFS_PROBE(a) ((void) 0)
+#define ZFS_PROBE1(a, c) ((void) 0)
+#define ZFS_PROBE2(a, c, e) ((void) 0)
+#define ZFS_PROBE3(a, c, e, g) ((void) 0)
+#define ZFS_PROBE4(a, c, e, g, i) ((void) 0)
+#define ZFS_SET_ERROR(err) ((void) 0)
+
+#else
+
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#include <sys/trace.h>
+
+/*
+ * The set-error SDT probe is extra static, in that we declare its fake
+ * function literally, rather than with the DTRACE_PROBE1() macro. This is
+ * necessary so that SET_ERROR() can evaluate to a value, which wouldn't
+ * be possible if it required multiple statements (to declare the function
+ * and then call it).
+ *
+ * SET_ERROR() uses the comma operator so that it can be used without much
+ * additional code. For example, "return (EINVAL);" becomes
+ * "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated
+ * twice, so it should not have side effects (e.g. something like:
+ * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
+ */
+#define SET_ERROR(err) \
+ (trace_zfs_set__error(__FILE__, __func__, __LINE__, err), err)
+
+#else
+
+#undef SET_ERROR
+#define SET_ERROR(err) (err)
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+
+#endif /* _KERNEL */
+
+#endif /* _SYS_SDT_H */
diff --git a/include/sys/trace.h b/include/sys/trace.h
new file mode 100644
index 000000000..c73b15ac3
--- /dev/null
+++ b/include/sys/trace.h
@@ -0,0 +1,1038 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Prakash Surya. All rights reserved.
+ */
+
+#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM zfs
+
+#if !defined(_TRACE_ZFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ZFS_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+#include <sys/list.h>
+
+/*
+ * Redefine the DTRACE_PROBE* functions to use Linux tracepoints
+ */
+#undef DTRACE_PROBE1
+#define DTRACE_PROBE1(name, t1, arg1) \
+ trace_zfs_##name((arg1))
+
+#undef DTRACE_PROBE2
+#define DTRACE_PROBE2(name, t1, arg1, t2, arg2) \
+ trace_zfs_##name((arg1), (arg2))
+
+#undef DTRACE_PROBE3
+#define DTRACE_PROBE3(name, t1, arg1, t2, arg2, t3, arg3) \
+ trace_zfs_##name((arg1), (arg2), (arg3))
+
+#undef DTRACE_PROBE4
+#define DTRACE_PROBE4(name, t1, arg1, t2, arg2, t3, arg3, t4, arg4) \
+ trace_zfs_##name((arg1), (arg2), (arg3), (arg4))
+
+typedef struct arc_buf_hdr arc_buf_hdr_t;
+typedef struct zio zio_t;
+typedef struct vdev vdev_t;
+typedef struct l2arc_write_callback l2arc_write_callback_t;
+typedef struct blkptr blkptr_t;
+typedef struct zbookmark_phys zbookmark_phys_t;
+typedef struct l2arc_dev l2arc_dev_t;
+typedef struct dmu_buf_impl dmu_buf_impl_t;
+typedef struct dmu_tx dmu_tx_t;
+typedef struct dnode dnode_t;
+typedef struct dsl_pool dsl_pool_t;
+typedef struct znode znode_t;
+typedef struct zfs_ace_hdr zfs_ace_hdr_t;
+typedef struct zilog zilog_t;
+typedef struct zrlock zrlock_t;
+
+/*
+ * Generic support for one argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ * arc_buf_hdr_t *, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
+ TP_PROTO(arc_buf_hdr_t *ab),
+ TP_ARGS(ab),
+ TP_STRUCT__entry(
+ __array(uint64_t, hdr_dva_word, 2)
+ __field(uint64_t, hdr_birth)
+ __field(uint64_t, hdr_cksum0)
+ __field(uint32_t, hdr_flags)
+ __field(uint32_t, hdr_datacnt)
+ __field(arc_buf_contents_t, hdr_type)
+ __field(uint64_t, hdr_size)
+ __field(uint64_t, hdr_spa)
+ __field(arc_state_type_t, hdr_state_type)
+ __field(clock_t, hdr_access)
+ __field(uint32_t, hdr_mru_hits)
+ __field(uint32_t, hdr_mru_ghost_hits)
+ __field(uint32_t, hdr_mfu_hits)
+ __field(uint32_t, hdr_mfu_ghost_hits)
+ __field(uint32_t, hdr_l2_hits)
+ __field(int64_t, hdr_refcount)
+ ),
+ TP_fast_assign(
+ __entry->hdr_dva_word[0] = ab->b_dva.dva_word[0];
+ __entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
+ __entry->hdr_birth = ab->b_birth;
+ __entry->hdr_cksum0 = ab->b_cksum0;
+ __entry->hdr_flags = ab->b_flags;
+ __entry->hdr_datacnt = ab->b_datacnt;
+ __entry->hdr_type = ab->b_type;
+ __entry->hdr_size = ab->b_size;
+ __entry->hdr_spa = ab->b_spa;
+ __entry->hdr_state_type = ab->b_state->arcs_state;
+ __entry->hdr_access = ab->b_arc_access;
+ __entry->hdr_mru_hits = ab->b_mru_hits;
+ __entry->hdr_mru_ghost_hits = ab->b_mru_ghost_hits;
+ __entry->hdr_mfu_hits = ab->b_mfu_hits;
+ __entry->hdr_mfu_ghost_hits = ab->b_mfu_ghost_hits;
+ __entry->hdr_l2_hits = ab->b_l2_hits;
+ __entry->hdr_refcount = ab->b_refcnt.rc_count;
+ ),
+ TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
+ "flags 0x%x datacnt %u type %u size %llu spa %llu "
+ "state_type %u access %lu mru_hits %u mru_ghost_hits %u "
+ "mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
+ __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
+ __entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
+ __entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
+ __entry->hdr_spa, __entry->hdr_state_type,
+ __entry->hdr_access, __entry->hdr_mru_hits,
+ __entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
+ __entry->hdr_mfu_ghost_hits, __entry->hdr_l2_hits,
+ __entry->hdr_refcount)
+);
+
+#define DEFINE_ARC_BUF_HDR_EVENT(name) \
+DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
+ TP_PROTO(arc_buf_hdr_t *ab), \
+ TP_ARGS(ab))
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ * vdev_t *, ...,
+ * zio_t *, ...);
+ */
+#define ZIO_TP_STRUCT_ENTRY \
+ __field(zio_type_t, zio_type) \
+ __field(int, zio_cmd) \
+ __field(zio_priority_t, zio_priority) \
+ __field(uint64_t, zio_size) \
+ __field(uint64_t, zio_orig_size) \
+ __field(uint64_t, zio_offset) \
+ __field(hrtime_t, zio_timestamp) \
+ __field(hrtime_t, zio_delta) \
+ __field(uint64_t, zio_delay) \
+ __field(enum zio_flag, zio_flags) \
+ __field(enum zio_stage, zio_stage) \
+ __field(enum zio_stage, zio_pipeline) \
+ __field(enum zio_flag, zio_orig_flags) \
+ __field(enum zio_stage, zio_orig_stage) \
+ __field(enum zio_stage, zio_orig_pipeline) \
+ __field(uint8_t, zio_reexecute) \
+ __field(uint64_t, zio_txg) \
+ __field(int, zio_error) \
+ __field(uint64_t, zio_ena) \
+ \
+ __field(enum zio_checksum, zp_checksum) \
+ __field(enum zio_compress, zp_compress) \
+ __field(dmu_object_type_t, zp_type) \
+ __field(uint8_t, zp_level) \
+ __field(uint8_t, zp_copies) \
+ __field(boolean_t, zp_dedup) \
+ __field(boolean_t, zp_dedup_verify) \
+ __field(boolean_t, zp_nopwrite)
+
+#define ZIO_TP_FAST_ASSIGN \
+ __entry->zio_type = zio->io_type; \
+ __entry->zio_cmd = zio->io_cmd; \
+ __entry->zio_priority = zio->io_priority; \
+ __entry->zio_size = zio->io_size; \
+ __entry->zio_orig_size = zio->io_orig_size; \
+ __entry->zio_offset = zio->io_offset; \
+ __entry->zio_timestamp = zio->io_timestamp; \
+ __entry->zio_delta = zio->io_delta; \
+ __entry->zio_delay = zio->io_delay; \
+ __entry->zio_flags = zio->io_flags; \
+ __entry->zio_stage = zio->io_stage; \
+ __entry->zio_pipeline = zio->io_pipeline; \
+ __entry->zio_orig_flags = zio->io_orig_flags; \
+ __entry->zio_orig_stage = zio->io_orig_stage; \
+ __entry->zio_orig_pipeline = zio->io_orig_pipeline; \
+ __entry->zio_reexecute = zio->io_reexecute; \
+ __entry->zio_txg = zio->io_txg; \
+ __entry->zio_error = zio->io_error; \
+ __entry->zio_ena = zio->io_ena; \
+ \
+ __entry->zp_checksum = zio->io_prop.zp_checksum; \
+ __entry->zp_compress = zio->io_prop.zp_compress; \
+ __entry->zp_type = zio->io_prop.zp_type; \
+ __entry->zp_level = zio->io_prop.zp_level; \
+ __entry->zp_copies = zio->io_prop.zp_copies; \
+ __entry->zp_dedup = zio->io_prop.zp_dedup; \
+ __entry->zp_nopwrite = zio->io_prop.zp_nopwrite; \
+ __entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;
+
+#define ZIO_TP_PRINTK_FMT \
+ "zio { type %u cmd %i prio %u size %llu orig_size %llu " \
+ "offset %llu timestamp %llu delta %llu delay %llu " \
+ "flags 0x%x stage 0x%x pipeline 0x%x orig_flags 0x%x " \
+ "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
+ "txg %llu error %d ena %llu prop { checksum %u compress %u " \
+ "type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"
+
+#define ZIO_TP_PRINTK_ARGS \
+ __entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \
+ __entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
+ __entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
+ __entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
+ __entry->zio_orig_flags, __entry->zio_orig_stage, \
+ __entry->zio_orig_pipeline, __entry->zio_reexecute, \
+ __entry->zio_txg, __entry->zio_error, __entry->zio_ena, \
+ __entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \
+ __entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \
+ __entry->zp_dedup_verify, __entry->zp_nopwrite
+
+
+DECLARE_EVENT_CLASS(zfs_l2arc_rw_class,
+ TP_PROTO(vdev_t *vd, zio_t *zio),
+ TP_ARGS(vd, zio),
+ TP_STRUCT__entry(
+ __field(uint64_t, vdev_id)
+ __field(uint64_t, vdev_guid)
+ __field(uint64_t, vdev_state)
+ ZIO_TP_STRUCT_ENTRY
+ ),
+ TP_fast_assign(
+ __entry->vdev_id = vd->vdev_id;
+ __entry->vdev_guid = vd->vdev_guid;
+ __entry->vdev_state = vd->vdev_state;
+ ZIO_TP_FAST_ASSIGN
+ ),
+ TP_printk("vdev { id %llu guid %llu state %llu } "
+ ZIO_TP_PRINTK_FMT, __entry->vdev_id, __entry->vdev_guid,
+ __entry->vdev_state, ZIO_TP_PRINTK_ARGS)
+);
+
+#define DEFINE_L2ARC_RW_EVENT(name) \
+DEFINE_EVENT(zfs_l2arc_rw_class, name, \
+ TP_PROTO(vdev_t *vd, zio_t *zio), \
+ TP_ARGS(vd, zio))
+DEFINE_L2ARC_RW_EVENT(zfs_l2arc__read);
+DEFINE_L2ARC_RW_EVENT(zfs_l2arc__write);
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ * zio_t *, ...,
+ * l2arc_write_callback_t *, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_l2arc_iodone_class,
+ TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb),
+ TP_ARGS(zio, cb),
+ TP_STRUCT__entry(ZIO_TP_STRUCT_ENTRY),
+ TP_fast_assign(ZIO_TP_FAST_ASSIGN),
+ TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
+);
+
+#define DEFINE_L2ARC_IODONE_EVENT(name) \
+DEFINE_EVENT(zfs_l2arc_iodone_class, name, \
+ TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb), \
+ TP_ARGS(zio, cb))
+DEFINE_L2ARC_IODONE_EVENT(zfs_l2arc__iodone);
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ * arc_buf_hdr_t *, ...,
+ * const blkptr_t *,
+ * uint64_t,
+ * const zbookmark_phys_t *);
+ */
+DECLARE_EVENT_CLASS(zfs_arc_miss_class,
+ TP_PROTO(arc_buf_hdr_t *hdr,
+ const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb),
+ TP_ARGS(hdr, bp, size, zb),
+ TP_STRUCT__entry(
+ __array(uint64_t, hdr_dva_word, 2)
+ __field(uint64_t, hdr_birth)
+ __field(uint64_t, hdr_cksum0)
+ __field(uint32_t, hdr_flags)
+ __field(uint32_t, hdr_datacnt)
+ __field(arc_buf_contents_t, hdr_type)
+ __field(uint64_t, hdr_size)
+ __field(uint64_t, hdr_spa)
+ __field(arc_state_type_t, hdr_state_type)
+ __field(clock_t, hdr_access)
+ __field(uint32_t, hdr_mru_hits)
+ __field(uint32_t, hdr_mru_ghost_hits)
+ __field(uint32_t, hdr_mfu_hits)
+ __field(uint32_t, hdr_mfu_ghost_hits)
+ __field(uint32_t, hdr_l2_hits)
+ __field(int64_t, hdr_refcount)
+
+ __array(uint64_t, bp_dva0, 2)
+ __array(uint64_t, bp_dva1, 2)
+ __array(uint64_t, bp_dva2, 2)
+ __array(uint64_t, bp_cksum, 4)
+
+ __field(uint64_t, bp_lsize)
+
+ __field(uint64_t, zb_objset)
+ __field(uint64_t, zb_object)
+ __field(int64_t, zb_level)
+ __field(uint64_t, zb_blkid)
+ ),
+ TP_fast_assign(
+ __entry->hdr_dva_word[0] = hdr->b_dva.dva_word[0];
+ __entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
+ __entry->hdr_birth = hdr->b_birth;
+ __entry->hdr_cksum0 = hdr->b_cksum0;
+ __entry->hdr_flags = hdr->b_flags;
+ __entry->hdr_datacnt = hdr->b_datacnt;
+ __entry->hdr_type = hdr->b_type;
+ __entry->hdr_size = hdr->b_size;
+ __entry->hdr_spa = hdr->b_spa;
+ __entry->hdr_state_type = hdr->b_state->arcs_state;
+ __entry->hdr_access = hdr->b_arc_access;
+ __entry->hdr_mru_hits = hdr->b_mru_hits;
+ __entry->hdr_mru_ghost_hits = hdr->b_mru_ghost_hits;
+ __entry->hdr_mfu_hits = hdr->b_mfu_hits;
+ __entry->hdr_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
+ __entry->hdr_l2_hits = hdr->b_l2_hits;
+ __entry->hdr_refcount = hdr->b_refcnt.rc_count;
+
+ __entry->bp_dva0[0] = bp->blk_dva[0].dva_word[0];
+ __entry->bp_dva0[1] = bp->blk_dva[0].dva_word[1];
+ __entry->bp_dva1[0] = bp->blk_dva[1].dva_word[0];
+ __entry->bp_dva1[1] = bp->blk_dva[1].dva_word[1];
+ __entry->bp_dva2[0] = bp->blk_dva[2].dva_word[0];
+ __entry->bp_dva2[1] = bp->blk_dva[2].dva_word[1];
+ __entry->bp_cksum[0] = bp->blk_cksum.zc_word[0];
+ __entry->bp_cksum[1] = bp->blk_cksum.zc_word[1];
+ __entry->bp_cksum[2] = bp->blk_cksum.zc_word[2];
+ __entry->bp_cksum[3] = bp->blk_cksum.zc_word[3];
+
+ __entry->bp_lsize = size;
+
+ __entry->zb_objset = zb->zb_objset;
+ __entry->zb_object = zb->zb_object;
+ __entry->zb_level = zb->zb_level;
+ __entry->zb_blkid = zb->zb_blkid;
+ ),
+ TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
+ "flags 0x%x datacnt %u type %u size %llu spa %llu state_type %u "
+ "access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
+ "mfu_ghost_hits %u l2_hits %u refcount %lli } "
+ "bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
+ "0x%llx:0x%llx cksum 0x%llx:0x%llx:0x%llx:0x%llx "
+ "lsize %llu } zb { objset %llu object %llu level %lli "
+ "blkid %llu }",
+ __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
+ __entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
+ __entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
+ __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
+ __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
+ __entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
+ __entry->hdr_l2_hits, __entry->hdr_refcount,
+ __entry->bp_dva0[0], __entry->bp_dva0[1],
+ __entry->bp_dva1[0], __entry->bp_dva1[1],
+ __entry->bp_dva2[0], __entry->bp_dva2[1],
+ __entry->bp_cksum[0], __entry->bp_cksum[1],
+ __entry->bp_cksum[2], __entry->bp_cksum[3],
+ __entry->bp_lsize, __entry->zb_objset, __entry->zb_object,
+ __entry->zb_level, __entry->zb_blkid)
+);
+
+#define DEFINE_ARC_MISS_EVENT(name) \
+DEFINE_EVENT(zfs_arc_miss_class, name, \
+ TP_PROTO(arc_buf_hdr_t *hdr, \
+ const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb), \
+ TP_ARGS(hdr, bp, size, zb))
+DEFINE_ARC_MISS_EVENT(zfs_arc__miss);
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ * l2arc_dev_t *, ...,
+ * list_t *, ...,
+ * uint64_t, ...,
+ * boolean_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_l2arc_evict_class,
+ TP_PROTO(l2arc_dev_t *dev,
+ list_t *buflist, uint64_t taddr, boolean_t all),
+ TP_ARGS(dev, buflist, taddr, all),
+ TP_STRUCT__entry(
+ __field(uint64_t, vdev_id)
+ __field(uint64_t, vdev_guid)
+ __field(uint64_t, vdev_state)
+
+ __field(uint64_t, l2ad_hand)
+ __field(uint64_t, l2ad_start)
+ __field(uint64_t, l2ad_end)
+ __field(uint64_t, l2ad_evict)
+ __field(boolean_t, l2ad_first)
+ __field(boolean_t, l2ad_writing)
+
+ __field(uint64_t, taddr)
+ __field(boolean_t, all)
+ ),
+ TP_fast_assign(
+ __entry->vdev_id = dev->l2ad_vdev->vdev_id;
+ __entry->vdev_guid = dev->l2ad_vdev->vdev_guid;
+ __entry->vdev_state = dev->l2ad_vdev->vdev_state;
+
+ __entry->l2ad_hand = dev->l2ad_hand;
+ __entry->l2ad_start = dev->l2ad_start;
+ __entry->l2ad_end = dev->l2ad_end;
+ __entry->l2ad_evict = dev->l2ad_evict;
+ __entry->l2ad_first = dev->l2ad_first;
+ __entry->l2ad_writing = dev->l2ad_writing;
+
+ __entry->taddr = taddr;
+ __entry->all = all;
+ ),
+ TP_printk("l2ad { vdev { id %llu guid %llu state %llu } "
+ "hand %llu start %llu end %llu evict %llu "
+ "first %d writing %d } taddr %llu all %d",
+ __entry->vdev_id, __entry->vdev_guid, __entry->vdev_state,
+ __entry->l2ad_hand, __entry->l2ad_start,
+ __entry->l2ad_end, __entry->l2ad_evict,
+ __entry->l2ad_first, __entry->l2ad_writing,
+ __entry->taddr, __entry->all)
+);
+
+#define DEFINE_L2ARC_EVICT_EVENT(name) \
+DEFINE_EVENT(zfs_l2arc_evict_class, name, \
+ TP_PROTO(l2arc_dev_t *dev, \
+ list_t *buflist, uint64_t taddr, boolean_t all), \
+ TP_ARGS(dev, buflist, taddr, all))
+DEFINE_L2ARC_EVICT_EVENT(zfs_l2arc__evict);
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ * dmu_tx_t *, ...,
+ * uint64_t, ...,
+ * uint64_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
+ TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time),
+ TP_ARGS(tx, dirty, min_tx_time),
+ TP_STRUCT__entry(
+ __field(uint64_t, tx_txg)
+ __field(uint64_t, tx_lastsnap_txg)
+ __field(uint64_t, tx_lasttried_txg)
+ __field(boolean_t, tx_anyobj)
+ __field(boolean_t, tx_waited)
+ __field(hrtime_t, tx_start)
+ __field(boolean_t, tx_wait_dirty)
+ __field(int, tx_err)
+#ifdef DEBUG_DMU_TX
+ __field(uint64_t, tx_space_towrite)
+ __field(uint64_t, tx_space_tofree)
+ __field(uint64_t, tx_space_tooverwrite)
+ __field(uint64_t, tx_space_tounref)
+ __field(int64_t, tx_space_written)
+ __field(int64_t, tx_space_freed)
+#endif
+ __field(uint64_t, min_tx_time)
+ __field(uint64_t, dirty)
+ ),
+ TP_fast_assign(
+ __entry->tx_txg = tx->tx_txg;
+ __entry->tx_lastsnap_txg = tx->tx_lastsnap_txg;
+ __entry->tx_lasttried_txg = tx->tx_lasttried_txg;
+ __entry->tx_anyobj = tx->tx_anyobj;
+ __entry->tx_waited = tx->tx_waited;
+ __entry->tx_start = tx->tx_start;
+ __entry->tx_wait_dirty = tx->tx_wait_dirty;
+ __entry->tx_err = tx->tx_err;
+#ifdef DEBUG_DMU_TX
+ __entry->tx_space_towrite = tx->tx_space_towrite;
+ __entry->tx_space_tofree = tx->tx_space_tofree;
+ __entry->tx_space_tooverwrite = tx->tx_space_tooverwrite;
+ __entry->tx_space_tounref = tx->tx_space_tounref;
+ __entry->tx_space_written = tx->tx_space_written.rc_count;
+ __entry->tx_space_freed = tx->tx_space_freed.rc_count;
+#endif
+ __entry->dirty = dirty;
+ __entry->min_tx_time = min_tx_time;
+ ),
+ TP_printk("tx { txg %llu lastsnap_txg %llu tx_lasttried_txg %llu "
+ "anyobj %d waited %d start %llu wait_dirty %d err %i "
+#ifdef DEBUG_DMU_TX
+ "space_towrite %llu space_tofree %llu space_tooverwrite %llu "
+ "space_tounref %llu space_written %lli space_freed %lli "
+#endif
+ "} dirty %llu min_tx_time %llu",
+ __entry->tx_txg, __entry->tx_lastsnap_txg,
+ __entry->tx_lasttried_txg, __entry->tx_anyobj, __entry->tx_waited,
+ __entry->tx_start, __entry->tx_wait_dirty, __entry->tx_err,
+#ifdef DEBUG_DMU_TX
+ __entry->tx_space_towrite, __entry->tx_space_tofree,
+ __entry->tx_space_tooverwrite, __entry->tx_space_tounref,
+ __entry->tx_space_written, __entry->tx_space_freed,
+#endif
+ __entry->dirty, __entry->min_tx_time)
+);
+
+#define DEFINE_DELAY_MINTIME_EVENT(name) \
+DEFINE_EVENT(zfs_delay_mintime_class, name, \
+ TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time), \
+ TP_ARGS(tx, dirty, min_tx_time))
+DEFINE_DELAY_MINTIME_EVENT(zfs_delay__mintime);
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ * dnode_t *, ...,
+ * int64_t, ...,
+ * uint32_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_dnode_move_class,
+ TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs),
+ TP_ARGS(dn, refcount, dbufs),
+ TP_STRUCT__entry(
+ __field(uint64_t, dn_object)
+ __field(dmu_object_type_t, dn_type)
+ __field(uint16_t, dn_bonuslen)
+ __field(uint8_t, dn_bonustype)
+ __field(uint8_t, dn_nblkptr)
+ __field(uint8_t, dn_checksum)
+ __field(uint8_t, dn_compress)
+ __field(uint8_t, dn_nlevels)
+ __field(uint8_t, dn_indblkshift)
+ __field(uint8_t, dn_datablkshift)
+ __field(uint8_t, dn_moved)
+ __field(uint16_t, dn_datablkszsec)
+ __field(uint32_t, dn_datablksz)
+ __field(uint64_t, dn_maxblkid)
+ __field(int64_t, dn_tx_holds)
+ __field(int64_t, dn_holds)
+ __field(boolean_t, dn_have_spill)
+
+ __field(int64_t, refcount)
+ __field(uint32_t, dbufs)
+ ),
+ TP_fast_assign(
+ __entry->dn_object = dn->dn_object;
+ __entry->dn_type = dn->dn_type;
+ __entry->dn_bonuslen = dn->dn_bonuslen;
+ __entry->dn_bonustype = dn->dn_bonustype;
+ __entry->dn_nblkptr = dn->dn_nblkptr;
+ __entry->dn_checksum = dn->dn_checksum;
+ __entry->dn_compress = dn->dn_compress;
+ __entry->dn_nlevels = dn->dn_nlevels;
+ __entry->dn_indblkshift = dn->dn_indblkshift;
+ __entry->dn_datablkshift = dn->dn_datablkshift;
+ __entry->dn_moved = dn->dn_moved;
+ __entry->dn_datablkszsec = dn->dn_datablkszsec;
+ __entry->dn_datablksz = dn->dn_datablksz;
+ __entry->dn_maxblkid = dn->dn_maxblkid;
+ __entry->dn_tx_holds = dn->dn_tx_holds.rc_count;
+ __entry->dn_holds = dn->dn_holds.rc_count;
+ __entry->dn_have_spill = dn->dn_have_spill;
+
+ __entry->refcount = refcount;
+ __entry->dbufs = dbufs;
+ ),
+ TP_printk("dn { object %llu type %d bonuslen %u bonustype %u "
+ "nblkptr %u checksum %u compress %u nlevels %u indblkshift %u "
+ "datablkshift %u moved %u datablkszsec %u datablksz %u "
+ "maxblkid %llu tx_holds %lli holds %lli have_spill %d } "
+ "refcount %lli dbufs %u",
+ __entry->dn_object, __entry->dn_type, __entry->dn_bonuslen,
+ __entry->dn_bonustype, __entry->dn_nblkptr, __entry->dn_checksum,
+ __entry->dn_compress, __entry->dn_nlevels, __entry->dn_indblkshift,
+ __entry->dn_datablkshift, __entry->dn_moved,
+ __entry->dn_datablkszsec, __entry->dn_datablksz,
+ __entry->dn_maxblkid, __entry->dn_tx_holds, __entry->dn_holds,
+ __entry->dn_have_spill, __entry->refcount, __entry->dbufs)
+);
+
+#define DEFINE_DNODE_MOVE_EVENT(name) \
+DEFINE_EVENT(zfs_dnode_move_class, name, \
+ TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs), \
+ TP_ARGS(dn, refcount, dbufs))
+DEFINE_DNODE_MOVE_EVENT(zfs_dnode__move);
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ * dsl_pool_t *, ...,
+ * uint64_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_txg_class,
+ TP_PROTO(dsl_pool_t *dp, uint64_t txg),
+ TP_ARGS(dp, txg),
+ TP_STRUCT__entry(
+ __field(uint64_t, txg)
+ ),
+ TP_fast_assign(
+ __entry->txg = txg;
+ ),
+ TP_printk("txg %llu", __entry->txg)
+);
+
+#define DEFINE_TXG_EVENT(name) \
+DEFINE_EVENT(zfs_txg_class, name, \
+ TP_PROTO(dsl_pool_t *dp, uint64_t txg), \
+ TP_ARGS(dp, txg))
+DEFINE_TXG_EVENT(zfs_dsl_pool_sync__done);
+DEFINE_TXG_EVENT(zfs_txg__quiescing);
+DEFINE_TXG_EVENT(zfs_txg__opened);
+DEFINE_TXG_EVENT(zfs_txg__syncing);
+DEFINE_TXG_EVENT(zfs_txg__synced);
+DEFINE_TXG_EVENT(zfs_txg__quiesced);
+
+/*
+ * Generic support for three argument tracepoints of the form:
+ *
+ * DTRACE_PROBE3(...,
+ * znode_t *, ...,
+ * zfs_ace_hdr_t *, ...,
+ * uint32_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_ace_class,
+ TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched),
+ TP_ARGS(zn, ace, mask_matched),
+ TP_STRUCT__entry(
+ __field(uint64_t, z_id)
+ __field(uint8_t, z_unlinked)
+ __field(uint8_t, z_atime_dirty)
+ __field(uint8_t, z_zn_prefetch)
+ __field(uint8_t, z_moved)
+ __field(uint_t, z_blksz)
+ __field(uint_t, z_seq)
+ __field(uint64_t, z_mapcnt)
+ __field(uint64_t, z_gen)
+ __field(uint64_t, z_size)
+ __array(uint64_t, z_atime, 2)
+ __field(uint64_t, z_links)
+ __field(uint64_t, z_pflags)
+ __field(uint64_t, z_uid)
+ __field(uint64_t, z_gid)
+ __field(uint32_t, z_sync_cnt)
+ __field(mode_t, z_mode)
+ __field(boolean_t, z_is_sa)
+ __field(boolean_t, z_is_zvol)
+ __field(boolean_t, z_is_mapped)
+ __field(boolean_t, z_is_ctldir)
+ __field(boolean_t, z_is_stale)
+
+ __field(unsigned long, i_ino)
+ __field(unsigned int, i_nlink)
+ __field(u64, i_version)
+ __field(loff_t, i_size)
+ __field(unsigned int, i_blkbits)
+ __field(unsigned short, i_bytes)
+ __field(umode_t, i_mode)
+ __field(__u32, i_generation)
+
+ __field(uint16_t, z_type)
+ __field(uint16_t, z_flags)
+ __field(uint32_t, z_access_mask)
+
+ __field(uint32_t, mask_matched)
+ ),
+ TP_fast_assign(
+ __entry->z_id = zn->z_id;
+ __entry->z_unlinked = zn->z_unlinked;
+ __entry->z_atime_dirty = zn->z_atime_dirty;
+ __entry->z_zn_prefetch = zn->z_zn_prefetch;
+ __entry->z_moved = zn->z_moved;
+ __entry->z_blksz = zn->z_blksz;
+ __entry->z_seq = zn->z_seq;
+ __entry->z_mapcnt = zn->z_mapcnt;
+ __entry->z_gen = zn->z_gen;
+ __entry->z_size = zn->z_size;
+ __entry->z_atime[0] = zn->z_atime[0];
+ __entry->z_atime[1] = zn->z_atime[1];
+ __entry->z_links = zn->z_links;
+ __entry->z_pflags = zn->z_pflags;
+ __entry->z_uid = zn->z_uid;
+ __entry->z_gid = zn->z_gid;
+ __entry->z_sync_cnt = zn->z_sync_cnt;
+ __entry->z_mode = zn->z_mode;
+ __entry->z_is_sa = zn->z_is_sa;
+ __entry->z_is_zvol = zn->z_is_zvol;
+ __entry->z_is_mapped = zn->z_is_mapped;
+ __entry->z_is_ctldir = zn->z_is_ctldir;
+ __entry->z_is_stale = zn->z_is_stale;
+
+ __entry->i_ino = zn->z_inode.i_ino;
+ __entry->i_nlink = zn->z_inode.i_nlink;
+ __entry->i_version = zn->z_inode.i_version;
+ __entry->i_size = zn->z_inode.i_size;
+ __entry->i_blkbits = zn->z_inode.i_blkbits;
+ __entry->i_bytes = zn->z_inode.i_bytes;
+ __entry->i_mode = zn->z_inode.i_mode;
+ __entry->i_generation = zn->z_inode.i_generation;
+
+ __entry->z_type = ace->z_type;
+ __entry->z_flags = ace->z_flags;
+ __entry->z_access_mask = ace->z_access_mask;
+
+ __entry->mask_matched = mask_matched;
+ ),
+ TP_printk("zn { id %llu unlinked %u atime_dirty %u "
+ "zn_prefetch %u moved %u blksz %u seq %u "
+ "mapcnt %llu gen %llu size %llu atime 0x%llx:0x%llx "
+ "links %llu pflags %llu uid %llu gid %llu "
+ "sync_cnt %u mode 0x%x is_sa %d is_zvol %d "
+ "is_mapped %d is_ctldir %d is_stale %d inode { "
+ "ino %lu nlink %u version %llu size %lli blkbits %u "
+ "bytes %u mode 0x%x generation %x } } ace { type %u "
+ "flags %u access_mask %u } mask_matched %u",
+ __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
+ __entry->z_zn_prefetch, __entry->z_moved, __entry->z_blksz,
+ __entry->z_seq, __entry->z_mapcnt, __entry->z_gen,
+ __entry->z_size, __entry->z_atime[0], __entry->z_atime[1],
+ __entry->z_links, __entry->z_pflags, __entry->z_uid,
+ __entry->z_gid, __entry->z_sync_cnt, __entry->z_mode,
+ __entry->z_is_sa, __entry->z_is_zvol, __entry->z_is_mapped,
+ __entry->z_is_ctldir, __entry->z_is_stale, __entry->i_ino,
+ __entry->i_nlink, __entry->i_version, __entry->i_size,
+ __entry->i_blkbits, __entry->i_bytes, __entry->i_mode,
+ __entry->i_generation, __entry->z_type, __entry->z_flags,
+ __entry->z_access_mask, __entry->mask_matched)
+);
+
+#define DEFINE_ACE_EVENT(name) \
+DEFINE_EVENT(zfs_ace_class, name, \
+ TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched), \
+ TP_ARGS(zn, ace, mask_matched))
+DEFINE_ACE_EVENT(zfs_zfs__ace__denies);
+DEFINE_ACE_EVENT(zfs_zfs__ace__allows);
+
+/*
+ * Generic support for one argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ * zilog_t *, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_zil_class,
+ TP_PROTO(zilog_t *zilog),
+ TP_ARGS(zilog),
+ TP_STRUCT__entry(
+ __field(uint64_t, zl_lr_seq)
+ __field(uint64_t, zl_commit_lr_seq)
+ __field(uint64_t, zl_destroy_txg)
+ __field(uint64_t, zl_replaying_seq)
+ __field(uint32_t, zl_suspend)
+ __field(uint8_t, zl_suspending)
+ __field(uint8_t, zl_keep_first)
+ __field(uint8_t, zl_replay)
+ __field(uint8_t, zl_stop_sync)
+ __field(uint8_t, zl_writer)
+ __field(uint8_t, zl_logbias)
+ __field(uint8_t, zl_sync)
+ __field(int, zl_parse_error)
+ __field(uint64_t, zl_parse_blk_seq)
+ __field(uint64_t, zl_parse_lr_seq)
+ __field(uint64_t, zl_parse_blk_count)
+ __field(uint64_t, zl_parse_lr_count)
+ __field(uint64_t, zl_next_batch)
+ __field(uint64_t, zl_com_batch)
+ __field(uint64_t, zl_itx_list_sz)
+ __field(uint64_t, zl_cur_used)
+ __field(clock_t, zl_replay_time)
+ __field(uint64_t, zl_replay_blks)
+ ),
+ TP_fast_assign(
+ __entry->zl_lr_seq = zilog->zl_lr_seq;
+ __entry->zl_commit_lr_seq = zilog->zl_commit_lr_seq;
+ __entry->zl_destroy_txg = zilog->zl_destroy_txg;
+ __entry->zl_replaying_seq = zilog->zl_replaying_seq;
+ __entry->zl_suspend = zilog->zl_suspend;
+ __entry->zl_suspending = zilog->zl_suspending;
+ __entry->zl_keep_first = zilog->zl_keep_first;
+ __entry->zl_replay = zilog->zl_replay;
+ __entry->zl_stop_sync = zilog->zl_stop_sync;
+ __entry->zl_writer = zilog->zl_writer;
+ __entry->zl_logbias = zilog->zl_logbias;
+ __entry->zl_sync = zilog->zl_sync;
+ __entry->zl_parse_error = zilog->zl_parse_error;
+ __entry->zl_parse_blk_seq = zilog->zl_parse_blk_seq;
+ __entry->zl_parse_lr_seq = zilog->zl_parse_lr_seq;
+ __entry->zl_parse_blk_count = zilog->zl_parse_blk_count;
+ __entry->zl_parse_lr_count = zilog->zl_parse_lr_count;
+ __entry->zl_next_batch = zilog->zl_next_batch;
+ __entry->zl_com_batch = zilog->zl_com_batch;
+ __entry->zl_itx_list_sz = zilog->zl_itx_list_sz;
+ __entry->zl_cur_used = zilog->zl_cur_used;
+ __entry->zl_replay_time = zilog->zl_replay_time;
+ __entry->zl_replay_blks = zilog->zl_replay_blks;
+ ),
+ TP_printk("zl { lr_seq %llu commit_lr_seq %llu destroy_txg %llu "
+ "replaying_seq %llu suspend %u suspending %u keep_first %u "
+ "replay %u stop_sync %u writer %u logbias %u sync %u "
+ "parse_error %u parse_blk_seq %llu parse_lr_seq %llu "
+ "parse_blk_count %llu parse_lr_count %llu next_batch %llu "
+ "com_batch %llu itx_list_sz %llu cur_used %llu replay_time %lu "
+ "replay_blks %llu }",
+ __entry->zl_lr_seq, __entry->zl_commit_lr_seq,
+ __entry->zl_destroy_txg, __entry->zl_replaying_seq,
+ __entry->zl_suspend, __entry->zl_suspending, __entry->zl_keep_first,
+ __entry->zl_replay, __entry->zl_stop_sync, __entry->zl_writer,
+ __entry->zl_logbias, __entry->zl_sync, __entry->zl_parse_error,
+ __entry->zl_parse_blk_seq, __entry->zl_parse_lr_seq,
+ __entry->zl_parse_blk_count, __entry->zl_parse_lr_count,
+ __entry->zl_next_batch, __entry->zl_com_batch,
+ __entry->zl_itx_list_sz, __entry->zl_cur_used,
+ __entry->zl_replay_time, __entry->zl_replay_blks)
+);
+
+#define DEFINE_ZIL_EVENT(name) \
+DEFINE_EVENT(zfs_zil_class, name, \
+ TP_PROTO(zilog_t *zilog), \
+ TP_ARGS(zilog))
+DEFINE_ZIL_EVENT(zfs_zil__cw1);
+DEFINE_ZIL_EVENT(zfs_zil__cw2);
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ * dmu_buf_impl_t *, ...,
+ * zio_t *, ...);
+ */
+#define DBUF_TP_STRUCT_ENTRY \
+ __field(const char *, os_spa) \
+ __field(uint64_t, ds_object) \
+ __field(uint64_t, db_object) \
+ __field(uint64_t, db_level) \
+ __field(uint64_t, db_blkid) \
+ __field(uint64_t, db_offset) \
+ __field(uint64_t, db_size) \
+ __field(uint64_t, db_state) \
+ __field(int64_t, db_holds) \
+
+#define DBUF_TP_FAST_ASSIGN \
+ __entry->os_spa = \
+ spa_name(DB_DNODE(db)->dn_objset->os_spa); \
+ \
+ __entry->ds_object = db->db_objset->os_dsl_dataset ? \
+ db->db_objset->os_dsl_dataset->ds_object : 0; \
+ \
+ __entry->db_object = db->db.db_object; \
+ __entry->db_level = db->db_level; \
+ __entry->db_blkid = db->db_blkid; \
+ __entry->db_offset = db->db.db_offset; \
+ __entry->db_size = db->db.db_size; \
+ __entry->db_state = db->db_state; \
+ __entry->db_holds = refcount_count(&db->db_holds);
+
+#define DBUF_TP_PRINTK_FMT \
+ "dbuf { spa \"%s\" objset %llu object %llu level %llu " \
+ "blkid %llu offset %llu size %llu state %llu holds %lld }"
+
+#define DBUF_TP_PRINTK_ARGS \
+ __entry->os_spa, __entry->ds_object, \
+ __entry->db_object, __entry->db_level, \
+ __entry->db_blkid, __entry->db_offset, \
+ __entry->db_size, __entry->db_state, __entry->db_holds
+
+DECLARE_EVENT_CLASS(zfs_dbuf_class,
+ TP_PROTO(dmu_buf_impl_t *db, zio_t *zio),
+ TP_ARGS(db, zio),
+ TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
+ TP_fast_assign(DBUF_TP_FAST_ASSIGN),
+ TP_printk(DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS)
+);
+
+#define DEFINE_DBUF_EVENT(name) \
+DEFINE_EVENT(zfs_dbuf_class, name, \
+ TP_PROTO(dmu_buf_impl_t *db, zio_t *zio), \
+ TP_ARGS(db, zio))
+DEFINE_DBUF_EVENT(zfs_blocked__read);
+
+/*
+ * Generic support for two argument tracepoints of the form:
+ *
+ * DTRACE_PROBE2(...,
+ * zrlock_t *, ...,
+ * uint32_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_zrlock_class,
+ TP_PROTO(zrlock_t *zrl, uint32_t n),
+ TP_ARGS(zrl, n),
+ TP_STRUCT__entry(
+ __field(int32_t, zr_refcount)
+#ifdef ZFS_DEBUG
+ __field(pid_t, zr_owner_pid)
+ __field(const char *, zr_caller)
+#endif
+ __field(uint32_t, n)
+ ),
+ TP_fast_assign(
+ __entry->zr_refcount = zrl->zr_refcount;
+#ifdef ZFS_DEBUG
+ __entry->zr_owner_pid = zrl->zr_owner->pid;
+ __entry->zr_caller = zrl->zr_caller;
+#endif
+ __entry->n = n;
+ ),
+#ifdef ZFS_DEBUG
+ TP_printk("zrl { refcount %d owner_pid %d caller %s } n %u",
+ __entry->zr_refcount, __entry->zr_owner_pid, __entry->zr_caller,
+ __entry->n)
+#else
+ TP_printk("zrl { refcount %d } n %u",
+ __entry->zr_refcount, __entry->n)
+#endif
+);
+
+#define DEFINE_ZRLOCK_EVENT(name) \
+DEFINE_EVENT(zfs_zrlock_class, name, \
+ TP_PROTO(zrlock_t *zrl, uint32_t n), \
+ TP_ARGS(zrl, n))
+DEFINE_ZRLOCK_EVENT(zfs_zrlock__reentry);
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ * const char *, ...,
+ * const char *, ...,
+ * int, ...,
+ * uintptr_t, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_set_error_class,
+ TP_PROTO(const char *file, const char *function, int line,
+ uintptr_t error),
+ TP_ARGS(file, function, line, error),
+ TP_STRUCT__entry(
+ __field(const char *, file)
+ __field(const char *, function)
+ __field(int, line)
+ __field(uintptr_t, error)
+ ),
+ TP_fast_assign(
+ __entry->file = strchr(file, '/') ? strrchr(file, '/') + 1 : file;
+ __entry->function = function;
+ __entry->line = line;
+ __entry->error = error;
+ ),
+ TP_printk("%s:%d:%s(): error 0x%lx", __entry->file, __entry->line,
+ __entry->function, __entry->error)
+);
+
+#define DEFINE_SET_ERROR_EVENT(name) \
+DEFINE_EVENT(zfs_set_error_class, name, \
+ TP_PROTO(const char *file, const char *function, int line, \
+ uintptr_t error), \
+ TP_ARGS(file, function, line, error))
+DEFINE_SET_ERROR_EVENT(zfs_set__error);
+
+/*
+ * Generic support for four argument tracepoints of the form:
+ *
+ * DTRACE_PROBE4(...,
+ * const char *, ...,
+ * const char *, ...,
+ * int, ...,
+ * const char *, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_dprintf_class,
+ TP_PROTO(const char *file, const char *function, int line,
+ const char *msg),
+ TP_ARGS(file, function, line, msg),
+ TP_STRUCT__entry(
+ __field(const char *, file)
+ __field(const char *, function)
+ __field(int, line)
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ __entry->file = file;
+ __entry->function = function;
+ __entry->line = line;
+ __assign_str(msg, msg);
+ ),
+ TP_printk("%s:%d:%s(): %s", __entry->file, __entry->line,
+ __entry->function, __get_str(msg))
+);
+
+#define DEFINE_DPRINTF_EVENT(name) \
+DEFINE_EVENT(zfs_dprintf_class, name, \
+ TP_PROTO(const char *file, const char *function, int line, \
+ const char *msg), \
+ TP_ARGS(file, function, line, msg))
+DEFINE_DPRINTF_EVENT(zfs_zfs__dprintf);
+
+/*
+ * Generic support for one argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ * const char *, ...);
+ */
+DECLARE_EVENT_CLASS(zfs_dbgmsg_class,
+ TP_PROTO(const char *msg),
+ TP_ARGS(msg),
+ TP_STRUCT__entry(
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ __assign_str(msg, msg);
+ ),
+ TP_printk("%s", __get_str(msg))
+);
+
+#define DEFINE_DBGMSG_EVENT(name) \
+DEFINE_EVENT(zfs_dbgmsg_class, name, \
+ TP_PROTO(const char *msg), \
+ TP_ARGS(msg))
+DEFINE_DBGMSG_EVENT(zfs_zfs__dbgmsg);
+
+#endif /* _TRACE_ZFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH sys
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
+
+#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index ee8221d53..d4c6fb810 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -66,6 +66,7 @@
#include <sys/sunddi.h>
#include <sys/ctype.h>
#include <sys/disp.h>
+#include <sys/trace.h>
#include <linux/dcache_compat.h>
#include <linux/utsname_compat.h>
@@ -140,15 +141,12 @@
#define CE_PANIC 3 /* panic */
#define CE_IGNORE 4 /* print nothing */
-extern int aok;
-
/*
* ZFS debugging
*/
extern void dprintf_setup(int *argc, char **argv);
-extern void __dprintf(const char *file, const char *func,
- int line, const char *fmt, ...);
+
extern void cmn_err(int, const char *, ...);
extern void vcmn_err(int, const char *, va_list);
extern void panic(const char *, ...);
@@ -156,7 +154,8 @@ extern void vpanic(const char *, va_list);
#define fm_panic panic
-#ifdef __sun
+extern int aok;
+
/*
* DTrace SDT probes have different signatures in userland than they do in
* kernel. If they're being used in kernel code, re-define them out of
@@ -202,9 +201,6 @@ extern void vpanic(const char *, va_list);
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
*/
#define SET_ERROR(err) (ZFS_SET_ERROR(err), err)
-#else
-#define SET_ERROR(err) (err)
-#endif
/*
* Threads. TS_STACK_MIN is dictated by the minimum allowed pthread stack
diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h
index 35ffa0187..1a7062408 100644
--- a/include/sys/zfs_debug.h
+++ b/include/sys/zfs_debug.h
@@ -38,14 +38,6 @@ extern "C" {
#define FALSE 0
#endif
-/*
- * ZFS debugging - Always enabled for user space builds.
- */
-
-#if !defined(ZFS_DEBUG) && !defined(_KERNEL)
-#define ZFS_DEBUG
-#endif
-
extern int zfs_flags;
extern int zfs_recover;
extern int zfs_free_leak_on_eio;
@@ -59,29 +51,15 @@ extern int zfs_free_leak_on_eio;
#define ZFS_DEBUG_ZIO_FREE (1<<6)
#define ZFS_DEBUG_HISTOGRAM_VERIFY (1<<7)
-/*
- * Always log zfs debug messages to the spl debug subsystem as SS_USER1.
- * When the SPL is configured with debugging enabled these messages will
- * appear in the internal spl debug log, otherwise they are a no-op.
- */
-#if defined(_KERNEL)
-
-#include <spl-debug.h>
-#define dprintf(...) \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) \
- __SDEBUG(NULL, SS_USER1, SD_DPRINTF, __VA_ARGS__)
-
-/*
- * When zfs is running is user space the debugging is always enabled.
- * The messages will be printed using the __dprintf() function and
- * filtered based on the zfs_flags variable.
- */
-#else
-#define dprintf(...) \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) \
+#if defined(HAVE_DECLARE_EVENT_CLASS) || !defined(_KERNEL)
+extern void __dprintf(const char *file, const char *func,
+ int line, const char *fmt, ...);
+#define dprintf(...) \
+ if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
-
-#endif /* _KERNEL */
+#else
+#define dprintf(...) ((void)0)
+#endif /* HAVE_DECLARE_EVENT_CLASS || !_KERNEL */
extern void zfs_panic_recover(const char *fmt, ...);
@@ -93,12 +71,8 @@ typedef struct zfs_dbgmsg {
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);
-#if defined(_KERNEL) && defined(__linux__)
-#define zfs_dbgmsg(...) dprintf(__VA_ARGS__)
-#else
extern void zfs_dbgmsg(const char *fmt, ...);
extern void zfs_dbgmsg_print(const char *tag);
-#endif
#ifndef _KERNEL
extern int dprintf_find_string(const char *string);
diff --git a/lib/libspl/include/sys/Makefile.am b/lib/libspl/include/sys/Makefile.am
index d86cc6a56..8545f54ea 100644
--- a/lib/libspl/include/sys/Makefile.am
+++ b/lib/libspl/include/sys/Makefile.am
@@ -33,7 +33,6 @@ libspl_HEADERS = \
$(top_srcdir)/lib/libspl/include/sys/param.h \
$(top_srcdir)/lib/libspl/include/sys/priv.h \
$(top_srcdir)/lib/libspl/include/sys/processor.h \
- $(top_srcdir)/lib/libspl/include/sys/sdt.h \
$(top_srcdir)/lib/libspl/include/sys/stack.h \
$(top_srcdir)/lib/libspl/include/sys/stat.h \
$(top_srcdir)/lib/libspl/include/sys/stropts.h \
diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am
index f4838da75..85bc0510a 100644
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@@ -70,6 +70,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/space_map.c \
$(top_srcdir)/module/zfs/space_reftree.c \
$(top_srcdir)/module/zfs/txg.c \
+ $(top_srcdir)/module/zfs/trace.c \
$(top_srcdir)/module/zfs/uberblock.c \
$(top_srcdir)/module/zfs/unique.c \
$(top_srcdir)/module/zfs/vdev.c \
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index 48e7e97e9..954841f33 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -52,6 +52,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/spa_stats.o
$(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o
$(MODULE)-objs += @top_srcdir@/module/zfs/space_reftree.o
$(MODULE)-objs += @top_srcdir@/module/zfs/txg.o
+$(MODULE)-objs += @top_srcdir@/module/zfs/trace.o
$(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o
$(MODULE)-objs += @top_srcdir@/module/zfs/unique.o
$(MODULE)-objs += @top_srcdir@/module/zfs/vdev.o
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index bb740e569..2d16d7e06 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -238,6 +238,53 @@ static avl_tree_t spa_l2cache_avl;
kmem_cache_t *spa_buffer_pool;
int spa_mode_global;
+#ifdef ZFS_DEBUG
+/* Everything except dprintf and spa is on by default in debug builds */
+int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
+#else
+int zfs_flags = 0;
+#endif
+
+/*
+ * zfs_recover can be set to nonzero to attempt to recover from
+ * otherwise-fatal errors, typically caused by on-disk corruption. When
+ * set, calls to zfs_panic_recover() will turn into warning messages.
+ * This should only be used as a last resort, as it typically results
+ * in leaked space, or worse.
+ */
+int zfs_recover = B_FALSE;
+
+/*
+ * If destroy encounters an EIO while reading metadata (e.g. indirect
+ * blocks), space referenced by the missing metadata can not be freed.
+ * Normally this causes the background destroy to become "stalled", as
+ * it is unable to make forward progress. While in this stalled state,
+ * all remaining space to free from the error-encountering filesystem is
+ * "temporarily leaked". Set this flag to cause it to ignore the EIO,
+ * permanently leak the space from indirect blocks that can not be read,
+ * and continue to free everything else that it can.
+ *
+ * The default, "stalling" behavior is useful if the storage partially
+ * fails (i.e. some but not all i/os fail), and then later recovers. In
+ * this case, we will be able to continue pool operations while it is
+ * partially failed, and when it recovers, we can continue to free the
+ * space, with no leaks. However, note that this case is actually
+ * fairly rare.
+ *
+ * Typically pools either (a) fail completely (but perhaps temporarily,
+ * e.g. a top-level vdev going offline), or (b) have localized,
+ * permanent errors (e.g. disk returns the wrong data due to bit flip or
+ * firmware bug). In case (a), this setting does not matter because the
+ * pool will be suspended and the sync thread will not be able to make
+ * forward progress regardless. In case (b), because the error is
+ * permanent, the best we can do is leak the minimum amount of space,
+ * which is what setting this flag will do. Therefore, it is reasonable
+ * for this flag to normally be set, but we chose the more conservative
+ * approach of not setting it, so that there is no possibility of
+ * leaking space in the "partial temporary" failure case.
+ */
+int zfs_free_leak_on_eio = B_FALSE;
+
/*
* Expiration time in milliseconds. This value has two meanings. First it is
* used to determine when the spa_deadman() logic should fire. By default the
@@ -1319,6 +1366,16 @@ spa_freeze(spa_t *spa)
txg_wait_synced(spa_get_dsl(spa), freeze_txg);
}
+void
+zfs_panic_recover(const char *fmt, ...)
+{
+ va_list adx;
+
+ va_start(adx, fmt);
+ vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
+ va_end(adx);
+}
+
/*
* This is a stripped-down version of strtoull, suitable only for converting
* lowercase hexadecimal numbers that don't overflow.
@@ -1923,6 +1980,16 @@ EXPORT_SYMBOL(spa_mode);
EXPORT_SYMBOL(spa_namespace_lock);
+module_param(zfs_flags, int, 0644);
+MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
+
+module_param(zfs_recover, int, 0644);
+MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
+
+module_param(zfs_free_leak_on_eio, int, 0644);
+MODULE_PARM_DESC(zfs_free_leak_on_eio,
+ "Set to ignore IO errors during free and permanently leak the space");
+
module_param(zfs_deadman_synctime_ms, ulong, 0644);
MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");
diff --git a/lib/libspl/include/sys/sdt.h b/module/zfs/trace.c
index f68f79040..54ca2b2b4 100644
--- a/lib/libspl/include/sys/sdt.h
+++ b/module/zfs/trace.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,17 +19,21 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Each Linux tracepoints subsystem must define CREATE_TRACE_POINTS in one
+ * (and only one) C file, so this dummy file exists for that purpose.
*/
-#ifndef _LIBSPL_SYS_SDT_H
-#define _LIBSPL_SYS_SDT_H
+#include <sys/arc_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dmu_tx.h>
+#include <sys/dnode.h>
+#include <sys/zfs_znode.h>
+#include <sys/zil_impl.h>
+#include <sys/zrlock.h>
-#define DTRACE_PROBE(a) ((void) 0)
-#define DTRACE_PROBE1(a, b, c) ((void) 0)
-#define DTRACE_PROBE2(a, b, c, d, e) ((void) 0)
-#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void) 0)
-#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void) 0)
-
-#endif
+#define CREATE_TRACE_POINTS
+#include <sys/trace.h>
diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c
index e7f266b5f..e1675c818 100644
--- a/module/zfs/zfs_debug.c
+++ b/module/zfs/zfs_debug.c
@@ -25,99 +25,22 @@
#include <sys/zfs_context.h>
-#if !defined(_KERNEL) || !defined(__linux__)
list_t zfs_dbgmsgs;
int zfs_dbgmsg_size;
kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
-#endif
-/*
- * Enable various debugging features.
- */
-int zfs_flags = 0;
-
-/*
- * zfs_recover can be set to nonzero to attempt to recover from
- * otherwise-fatal errors, typically caused by on-disk corruption. When
- * set, calls to zfs_panic_recover() will turn into warning messages.
- * This should only be used as a last resort, as it typically results
- * in leaked space, or worse.
- */
-int zfs_recover = B_FALSE;
-
-/*
- * If destroy encounters an EIO while reading metadata (e.g. indirect
- * blocks), space referenced by the missing metadata can not be freed.
- * Normally this causes the background destroy to become "stalled", as
- * it is unable to make forward progress. While in this stalled state,
- * all remaining space to free from the error-encountering filesystem is
- * "temporarily leaked". Set this flag to cause it to ignore the EIO,
- * permanently leak the space from indirect blocks that can not be read,
- * and continue to free everything else that it can.
- *
- * The default, "stalling" behavior is useful if the storage partially
- * fails (i.e. some but not all i/os fail), and then later recovers. In
- * this case, we will be able to continue pool operations while it is
- * partially failed, and when it recovers, we can continue to free the
- * space, with no leaks. However, note that this case is actually
- * fairly rare.
- *
- * Typically pools either (a) fail completely (but perhaps temporarily,
- * e.g. a top-level vdev going offline), or (b) have localized,
- * permanent errors (e.g. disk returns the wrong data due to bit flip or
- * firmware bug). In case (a), this setting does not matter because the
- * pool will be suspended and the sync thread will not be able to make
- * forward progress regardless. In case (b), because the error is
- * permanent, the best we can do is leak the minimum amount of space,
- * which is what setting this flag will do. Therefore, it is reasonable
- * for this flag to normally be set, but we chose the more conservative
- * approach of not setting it, so that there is no possibility of
- * leaking space in the "partial temporary" failure case.
- */
-int zfs_free_leak_on_eio = B_FALSE;
-
-
-void
-zfs_panic_recover(const char *fmt, ...)
-{
- va_list adx;
-
- va_start(adx, fmt);
- vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
- va_end(adx);
-}
-
-/*
- * Debug logging is enabled by default for production kernel builds.
- * The overhead for this is negligible and the logs can be valuable when
- * debugging. For non-production user space builds all debugging except
- * logging is enabled since performance is no longer a concern.
- */
void
zfs_dbgmsg_init(void)
{
-#if !defined(_KERNEL) || !defined(__linux__)
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
offsetof(zfs_dbgmsg_t, zdm_node));
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
-#endif
-
- if (zfs_flags == 0) {
-#if defined(_KERNEL)
- zfs_flags = ZFS_DEBUG_DPRINTF;
- spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF);
- spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1);
-#else
- zfs_flags = ~ZFS_DEBUG_DPRINTF;
-#endif /* _KERNEL */
- }
}
void
zfs_dbgmsg_fini(void)
{
-#if !defined(_KERNEL) || !defined(__linux__)
zfs_dbgmsg_t *zdm;
while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
@@ -127,25 +50,24 @@ zfs_dbgmsg_fini(void)
}
mutex_destroy(&zfs_dbgmsgs_lock);
ASSERT0(zfs_dbgmsg_size);
-#endif
}
-#if !defined(_KERNEL) || !defined(__linux__)
/*
- * Print these messages by running:
- * echo ::zfs_dbgmsg | mdb -k
+ * To get this data enable the zfs__dbgmsg tracepoint as shown:
*
- * Monitor these messages by running:
- * dtrace -qn 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
+ * # Enable zfs__dbgmsg tracepoint, clear the tracepoint ring buffer
+ * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
+ * $ echo 0 > /sys/kernel/debug/tracing/trace
*
- * When used with libzpool, monitor with:
- * dtrace -qn 'zfs$pid::zfs_dbgmsg:probe1{printf("%s\n", copyinstr(arg1))}'
+ * # Dump the ring buffer.
+ * $ cat /sys/kernel/debug/tracing/trace
*/
void
zfs_dbgmsg(const char *fmt, ...)
{
int size;
va_list adx;
+ char *nl;
zfs_dbgmsg_t *zdm;
va_start(adx, fmt);
@@ -156,13 +78,20 @@ zfs_dbgmsg(const char *fmt, ...)
* There is one byte of string in sizeof (zfs_dbgmsg_t), used
* for the terminating null.
*/
- zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP);
+ zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_PUSHPAGE);
zdm->zdm_timestamp = gethrestime_sec();
va_start(adx, fmt);
(void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
va_end(adx);
+ /*
+ * Get rid of trailing newline.
+ */
+ nl = strrchr(zdm->zdm_msg, '\n');
+ if (nl != NULL)
+ *nl = '\0';
+
DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
mutex_enter(&zfs_dbgmsgs_lock);
@@ -180,6 +109,7 @@ zfs_dbgmsg(const char *fmt, ...)
void
zfs_dbgmsg_print(const char *tag)
{
+#if !defined(_KERNEL)
zfs_dbgmsg_t *zdm;
(void) printf("ZFS_DBGMSG(%s):\n", tag);
@@ -188,17 +118,5 @@ zfs_dbgmsg_print(const char *tag)
zdm = list_next(&zfs_dbgmsgs, zdm))
(void) printf("%s\n", zdm->zdm_msg);
mutex_exit(&zfs_dbgmsgs_lock);
+#endif /* !_KERNEL */
}
-#endif
-
-#if defined(_KERNEL)
-module_param(zfs_flags, int, 0644);
-MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
-
-module_param(zfs_recover, int, 0644);
-MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
-
-module_param(zfs_free_leak_on_eio, int, 0644);
-MODULE_PARM_DESC(zfs_free_leak_on_eio,
- "Set to ignore IO errors during free and permanently leak the space");
-#endif /* _KERNEL */
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 37a893c47..9396d6caa 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -247,6 +247,55 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+void
+__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
+{
+ const char *newfile;
+ size_t size = 4096;
+ char *buf = kmem_alloc(size, KM_PUSHPAGE);
+ char *nl;
+ va_list adx;
+
+ /*
+ * Get rid of annoying prefix to filename.
+ */
+ newfile = strrchr(file, '/');
+ if (newfile != NULL) {
+ newfile = newfile + 1; /* Get rid of leading / */
+ } else {
+ newfile = file;
+ }
+
+ va_start(adx, fmt);
+ (void) vsnprintf(buf, size, fmt, adx);
+ va_end(adx);
+
+ /*
+ * Get rid of trailing newline.
+ */
+ nl = strrchr(buf, '\n');
+ if (nl != NULL)
+ *nl = '\0';
+
+ /*
+ * To get this data enable the zfs__dprintf trace point as shown:
+ *
+ * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
+ * $ echo 1 > /sys/module/zfs/parameters/zfs_flags
+ * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
+ * $ echo 0 > /sys/kernel/debug/tracing/trace
+ *
+ * # Dump the ring buffer.
+ * $ cat /sys/kernel/debug/tracing/trace
+ */
+ DTRACE_PROBE4(zfs__dprintf,
+ char *, newfile, char *, func, int, line, char *, buf);
+
+ kmem_free(buf, size);
+}
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+
static void
history_str_free(char *buf)
{
diff --git a/scripts/zfs.sh b/scripts/zfs.sh
index b97a0577f..55584ddd1 100755
--- a/scripts/zfs.sh
+++ b/scripts/zfs.sh
@@ -33,7 +33,6 @@ MODULE-OPTIONS:
$0 zfs="zfs_prefetch_disable=1"
$0 zfs="zfs_prefetch_disable=1 zfs_mdcomp_disable=1"
-$0 spl="spl_debug_mask=0"
EOF
}