summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configure.ac1
-rw-r--r--contrib/Makefile.am4
-rw-r--r--contrib/bpftrace/Makefile.am3
-rw-r--r--contrib/bpftrace/taskqlatency.bt54
-rwxr-xr-xcontrib/bpftrace/zfs-trace.sh10
-rw-r--r--include/os/linux/spl/sys/Makefile.am1
-rw-r--r--include/os/linux/spl/sys/trace_spl.h3
-rw-r--r--include/os/linux/spl/sys/trace_taskq.h89
-rw-r--r--module/os/linux/spl/spl-taskq.c19
-rw-r--r--module/os/linux/spl/spl-trace.c3
10 files changed, 181 insertions, 6 deletions
diff --git a/configure.ac b/configure.ac
index 5bcf72bbe..723407558 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,7 @@ AC_CONFIG_FILES([
cmd/zvol_wait/Makefile
contrib/Makefile
contrib/bash_completion.d/Makefile
+ contrib/bpftrace/Makefile
contrib/dracut/02zfsexpandknowledge/Makefile
contrib/dracut/90zfs/Makefile
contrib/dracut/Makefile
diff --git a/contrib/Makefile.am b/contrib/Makefile.am
index 9a82f82ee..9f34fd835 100644
--- a/contrib/Makefile.am
+++ b/contrib/Makefile.am
@@ -1,2 +1,2 @@
-SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp
-DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp
+SUBDIRS = bash_completion.d bpftrace dracut initramfs pyzfs zcp
+DIST_SUBDIRS = bash_completion.d bpftrace dracut initramfs pyzfs zcp
diff --git a/contrib/bpftrace/Makefile.am b/contrib/bpftrace/Makefile.am
new file mode 100644
index 000000000..c4359e6c8
--- /dev/null
+++ b/contrib/bpftrace/Makefile.am
@@ -0,0 +1,3 @@
+EXTRA_DIST = \
+ $(top_srcdir)/contrib/bpftrace/taskqlatency.bt \
+ $(top_srcdir)/contrib/bpftrace/zfs-trace.sh
diff --git a/contrib/bpftrace/taskqlatency.bt b/contrib/bpftrace/taskqlatency.bt
new file mode 100644
index 000000000..598f9882b
--- /dev/null
+++ b/contrib/bpftrace/taskqlatency.bt
@@ -0,0 +1,54 @@
+#include <sys/taskq.h>
+
+kprobe:trace_zfs_taskq_ent__birth
+{
+ $tqent = (struct taskq_ent *)arg0;
+
+ $tqent_id = $tqent->tqent_id;
+ $tq_name = str($tqent->tqent_taskq->tq_name);
+
+ @birth[$tq_name, $tqent_id] = nsecs;
+}
+
+kprobe:trace_zfs_taskq_ent__start
+{
+ $tqent = (struct taskq_ent *)arg0;
+
+ @tqent_id[tid] = $tqent->tqent_id;
+ @tq_name[tid] = str($tqent->tqent_taskq->tq_name);
+
+ @start[@tq_name[tid], @tqent_id[tid]] = nsecs;
+}
+
+kprobe:trace_zfs_taskq_ent__start
+/ @birth[@tq_name[tid], @tqent_id[tid]] /
+{
+ @queue_lat_us[@tq_name[tid]] =
+ hist((nsecs - @birth[@tq_name[tid], @tqent_id[tid]])/1000);
+ delete(@birth[@tq_name[tid], @tqent_id[tid]]);
+}
+
+kprobe:trace_zfs_taskq_ent__finish
+/ @start[@tq_name[tid], @tqent_id[tid]] /
+{
+ $tqent = (struct taskq_ent *)arg0;
+
+ @exec_lat_us[@tq_name[tid], ksym($tqent->tqent_func)] =
+ hist((nsecs - @start[@tq_name[tid], @tqent_id[tid]])/1000);
+ delete(@start[@tq_name[tid], @tqent_id[tid]]);
+}
+
+kprobe:trace_zfs_taskq_ent__finish
+{
+ delete(@tq_name[tid]);
+ delete(@tqent_id[tid]);
+}
+
+END
+{
+ clear(@birth);
+ clear(@start);
+
+ clear(@tq_name);
+ clear(@tqent_id);
+}
diff --git a/contrib/bpftrace/zfs-trace.sh b/contrib/bpftrace/zfs-trace.sh
new file mode 100755
index 000000000..13230b78c
--- /dev/null
+++ b/contrib/bpftrace/zfs-trace.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+ZVER=$(cat /sys/module/zfs/version | cut -f 1 -d '-')
+KVER=$(uname -r)
+
+bpftrace \
+ --include "/usr/src/zfs-$ZVER/$KVER/zfs_config.h" \
+ -I "/usr/src/zfs-$ZVER/include" \
+ -I "/usr/src/zfs-$ZVER/include/spl" \
+ "$@"
diff --git a/include/os/linux/spl/sys/Makefile.am b/include/os/linux/spl/sys/Makefile.am
index 251542118..e3df4edae 100644
--- a/include/os/linux/spl/sys/Makefile.am
+++ b/include/os/linux/spl/sys/Makefile.am
@@ -46,6 +46,7 @@ KERNEL_H = \
$(top_srcdir)/include/os/linux/spl/sys/timer.h \
$(top_srcdir)/include/os/linux/spl/sys/trace.h \
$(top_srcdir)/include/os/linux/spl/sys/trace_spl.h \
+ $(top_srcdir)/include/os/linux/spl/sys/trace_taskq.h \
$(top_srcdir)/include/os/linux/spl/sys/tsd.h \
$(top_srcdir)/include/os/linux/spl/sys/types32.h \
$(top_srcdir)/include/os/linux/spl/sys/types.h \
diff --git a/include/os/linux/spl/sys/trace_spl.h b/include/os/linux/spl/sys/trace_spl.h
index 6b50d546b..bffd91d91 100644
--- a/include/os/linux/spl/sys/trace_spl.h
+++ b/include/os/linux/spl/sys/trace_spl.h
@@ -22,6 +22,9 @@
#ifndef _OS_LINUX_SPL_TRACE_H
#define _OS_LINUX_SPL_TRACE_H
+#include <sys/taskq.h>
+
#include <sys/trace.h>
+#include <sys/trace_taskq.h>
#endif
diff --git a/include/os/linux/spl/sys/trace_taskq.h b/include/os/linux/spl/sys/trace_taskq.h
new file mode 100644
index 000000000..dbbb3c4c7
--- /dev/null
+++ b/include/os/linux/spl/sys/trace_taskq.h
@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL)
+#if defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR zfs_taskq
+
+#if !defined(_TRACE_TASKQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TASKQ_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * Generic support for single argument tracepoints of the form:
+ *
+ * DTRACE_PROBE1(...,
+ * taskq_ent_t *, ...);
+ */
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_taskq_ent_class,
+ TP_PROTO(taskq_ent_t *taskq_ent),
+ TP_ARGS(taskq_ent),
+ TP_STRUCT__entry(
+ __field(taskq_ent_t *, taskq_ent)
+ ),
+ TP_fast_assign(
+ __entry->taskq_ent = taskq_ent;
+ ),
+ TP_printk("taskq_ent %p", __entry->taskq_ent)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define DEFINE_TASKQ_EVENT(name) \
+DEFINE_EVENT(zfs_taskq_ent_class, name, \
+ TP_PROTO(taskq_ent_t *taskq_ent), \
+ TP_ARGS(taskq_ent))
+/* END CSTYLED */
+DEFINE_TASKQ_EVENT(zfs_taskq_ent__birth);
+DEFINE_TASKQ_EVENT(zfs_taskq_ent__start);
+DEFINE_TASKQ_EVENT(zfs_taskq_ent__finish);
+
+#endif /* _TRACE_TASKQ_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH sys
+#define TRACE_INCLUDE_FILE trace_taskq
+#include <trace/define_trace.h>
+
+#else
+
+/*
+ * When tracepoints are not available, a DEFINE_DTRACE_PROBE* macro is
+ * needed for each DTRACE_PROBE. These will be used to generate stub
+ * tracing functions and prototypes for those functions. See
+ * include/os/linux/spl/sys/trace.h.
+ */
+
+DEFINE_DTRACE_PROBE1(taskq_ent__birth);
+DEFINE_DTRACE_PROBE1(taskq_ent__start);
+DEFINE_DTRACE_PROBE1(taskq_ent__finish);
+
+#endif /* HAVE_DECLARE_EVENT_CLASS */
+#endif /* _KERNEL */
diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c
index 8910c109e..a01c4b475 100644
--- a/module/os/linux/spl/spl-taskq.c
+++ b/module/os/linux/spl/spl-taskq.c
@@ -28,6 +28,7 @@
#include <sys/taskq.h>
#include <sys/kmem.h>
#include <sys/tsd.h>
+#include <sys/trace_spl.h>
int spl_taskq_thread_bind = 0;
module_param(spl_taskq_thread_bind, int, 0644);
@@ -223,6 +224,8 @@ task_expire_impl(taskq_ent_t *t)
}
t->tqent_birth = jiffies;
+ DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
+
/*
* The priority list must be maintained in strict task id order
* from lowest to highest for lowest_id to be easily calculable.
@@ -593,7 +596,9 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
t->tqent_taskq = tq;
t->tqent_timer.function = NULL;
t->tqent_timer.expires = 0;
+
t->tqent_birth = jiffies;
+ DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
@@ -706,7 +711,9 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
t->tqent_func = func;
t->tqent_arg = arg;
t->tqent_taskq = tq;
+
t->tqent_birth = jiffies;
+ DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
spin_unlock(&t->tqent_lock);
@@ -906,15 +913,15 @@ taskq_thread(void *args)
* tqent_flags here.
*
* Also use an on stack taskq_ent_t for tqt_task
- * assignment in this case. We only populate the two
- * fields used by the only user in taskq proc file.
+ * assignment in this case; we want to make sure
+ * to duplicate all fields, so the values are
+ * correct when it's accessed via DTRACE_PROBE*.
*/
tqt->tqt_id = t->tqent_id;
tqt->tqt_flags = t->tqent_flags;
if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
- dup_task.tqent_func = t->tqent_func;
- dup_task.tqent_arg = t->tqent_arg;
+ dup_task = *t;
t = &dup_task;
}
tqt->tqt_task = t;
@@ -923,9 +930,13 @@ taskq_thread(void *args)
tq->tq_nactive++;
spin_unlock_irqrestore(&tq->tq_lock, flags);
+ DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
+
/* Perform the requested task */
t->tqent_func(t->tqent_arg);
+ DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
+
spin_lock_irqsave_nested(&tq->tq_lock, flags,
tq->tq_lock_class);
tq->tq_nactive--;
diff --git a/module/os/linux/spl/spl-trace.c b/module/os/linux/spl/spl-trace.c
index 6415f9a91..7912a3812 100644
--- a/module/os/linux/spl/spl-trace.c
+++ b/module/os/linux/spl/spl-trace.c
@@ -24,7 +24,10 @@
* source file, so this dummy file exists for that purpose.
*/
+#include <sys/taskq.h>
+
#ifdef _KERNEL
#define CREATE_TRACE_POINTS
#include <sys/trace.h>
+#include <sys/trace_taskq.h>
#endif