diff options
author | Prakash Surya <[email protected]> | 2019-10-24 14:13:41 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-11-01 13:14:54 -0700 |
commit | ae38e00968a1920eb3c1051df888b24301e2f82b (patch) | |
tree | 8c365cff0efe7622bbb649b50e3f00539a693fcb | |
parent | e5d1c27e30f1aecfde2a7a16830b2d34c2f5e975 (diff) |
Add tracepoints for taskq entry lifetime events
This adds some new DTRACE_PROBE* endpoints so that we can observe taskq
latencies on a system. Additionally, a new "taskqlatency.bt" script is
added to do this observation via "bpftrace". Lastly, a "zfs-trace.sh"
script is added to wrap "bpftrace" with the proper options required to
run and use "taskqlatency.bt".
For example, with these changes in place, a user can run the following:
$ cd ./contrib/bpftrace
$ sudo ./zfs-trace.sh taskqlatency.bt
Attaching 6 probes...
^C
Here's some example output, showing latency information for time spent
executing the taskq entry's function:
@exec_lat_us[dp_sync_taskq, userquota_updates_task]:
[2, 4) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8) 0 | |
[8, 16) 1 |@@@@@@@@@@ |
[16, 32) 2 |@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[z_wr_int_h, zio_execute]:
[8, 16) 16 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@ |
@exec_lat_us[z_wr_iss_h, zio_execute]:
[16, 32) 4 |@@@@@@@@@@@@@@@@ |
[32, 64) 13 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 1 |@@@@ |
@exec_lat_us[z_ioctl_int, zio_execute]:
[2, 4) 1 |@@@@ |
[4, 8) 11 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
@exec_lat_us[dp_sync_taskq, sync_dnodes_task]:
[2, 4) 1 |@@@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 8 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 2 |@@@@@@@@@@@@@ |
[32, 64) 4 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 1 |@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@@@
Here's some example output, showing latency information for time spent
waiting on the taskq, prior to starting execution of entry's function:
@queue_lat_us[dp_sync_taskq]:
[2, 4) 1 |@@@@ |
[4, 8) 7 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 2 |@@@@@@@@ |
[16, 32) 3 |@@@@@@@@@@@@@ |
[32, 64) 12 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128) 6 |@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 0 | |
[256, 512) 1 |@@@@ |
@queue_lat_us[z_wr_iss]:
[4, 8) 4 |@@@@ |
[8, 16) 13 |@@@@@@@@@@@@@@@ |
[16, 32) 6 |@@@@@@@ |
[32, 64) 2 |@@ |
[64, 128) 12 |@@@@@@@@@@@@@@ |
[128, 256) 15 |@@@@@@@@@@@@@@@@@@ |
[256, 512) 33 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 27 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1K, 2K) 7 |@@@@@@@@ |
[2K, 4K) 14 |@@@@@@@@@@@@@@@@ |
[4K, 8K) 14 |@@@@@@@@@@@@@@@@ |
[8K, 16K) 23 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16K, 32K) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
@queue_lat_us[z_wr_int]:
[2, 4) 10 |@@@@@ |
[4, 8) 71 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[8, 16) 88 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[16, 32) 50 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[32, 64) 65 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 43 |@@@@@@@@@@@@@@@@@@@@@@@@@ |
[128, 256) 19 |@@@@@@@@@@@ |
[256, 512) 3 |@ |
[512, 1K) 1 | |
Reviewed by: Brad Lewis <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Prakash Surya <[email protected]>
Closes #9525
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | contrib/Makefile.am | 4 | ||||
-rw-r--r-- | contrib/bpftrace/Makefile.am | 3 | ||||
-rw-r--r-- | contrib/bpftrace/taskqlatency.bt | 54 | ||||
-rwxr-xr-x | contrib/bpftrace/zfs-trace.sh | 10 | ||||
-rw-r--r-- | include/os/linux/spl/sys/Makefile.am | 1 | ||||
-rw-r--r-- | include/os/linux/spl/sys/trace_spl.h | 3 | ||||
-rw-r--r-- | include/os/linux/spl/sys/trace_taskq.h | 89 | ||||
-rw-r--r-- | module/os/linux/spl/spl-taskq.c | 19 | ||||
-rw-r--r-- | module/os/linux/spl/spl-trace.c | 3 |
10 files changed, 181 insertions, 6 deletions
diff --git a/configure.ac b/configure.ac index 5bcf72bbe..723407558 100644 --- a/configure.ac +++ b/configure.ac @@ -86,6 +86,7 @@ AC_CONFIG_FILES([ cmd/zvol_wait/Makefile contrib/Makefile contrib/bash_completion.d/Makefile + contrib/bpftrace/Makefile contrib/dracut/02zfsexpandknowledge/Makefile contrib/dracut/90zfs/Makefile contrib/dracut/Makefile diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 9a82f82ee..9f34fd835 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp -DIST_SUBDIRS = bash_completion.d dracut initramfs pyzfs zcp +SUBDIRS = bash_completion.d bpftrace dracut initramfs pyzfs zcp +DIST_SUBDIRS = bash_completion.d bpftrace dracut initramfs pyzfs zcp diff --git a/contrib/bpftrace/Makefile.am b/contrib/bpftrace/Makefile.am new file mode 100644 index 000000000..c4359e6c8 --- /dev/null +++ b/contrib/bpftrace/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST = \ + $(top_srcdir)/contrib/bpftrace/taskqlatency.bt \ + $(top_srcdir)/contrib/bpftrace/zfs-trace.sh diff --git a/contrib/bpftrace/taskqlatency.bt b/contrib/bpftrace/taskqlatency.bt new file mode 100644 index 000000000..598f9882b --- /dev/null +++ b/contrib/bpftrace/taskqlatency.bt @@ -0,0 +1,54 @@ +#include <sys/taskq.h> + +kprobe:trace_zfs_taskq_ent__birth +{ + $tqent = (struct taskq_ent *)arg0; + + $tqent_id = $tqent->tqent_id; + $tq_name = str($tqent->tqent_taskq->tq_name); + + @birth[$tq_name, $tqent_id] = nsecs; +} + +kprobe:trace_zfs_taskq_ent__start +{ + $tqent = (struct taskq_ent *)arg0; + + @tqent_id[tid] = $tqent->tqent_id; + @tq_name[tid] = str($tqent->tqent_taskq->tq_name); + + @start[@tq_name[tid], @tqent_id[tid]] = nsecs; +} + +kprobe:trace_zfs_taskq_ent__start +/ @birth[@tq_name[tid], @tqent_id[tid]] / +{ + @queue_lat_us[@tq_name[tid]] = + hist((nsecs - @birth[@tq_name[tid], @tqent_id[tid]])/1000); + delete(@birth[@tq_name[tid], @tqent_id[tid]]); +} + +kprobe:trace_zfs_taskq_ent__finish +/ @start[@tq_name[tid], @tqent_id[tid]] / +{ + $tqent = (struct taskq_ent *)arg0; + + @exec_lat_us[@tq_name[tid], ksym($tqent->tqent_func)] = + hist((nsecs - @start[@tq_name[tid], @tqent_id[tid]])/1000); + delete(@start[@tq_name[tid], @tqent_id[tid]]); +} + +kprobe:trace_zfs_taskq_ent__finish +{ + delete(@tq_name[tid]); + delete(@tqent_id[tid]); +} + +END +{ + clear(@birth); + clear(@start); + + clear(@tq_name); + clear(@tqent_id); +} diff --git a/contrib/bpftrace/zfs-trace.sh b/contrib/bpftrace/zfs-trace.sh new file mode 100755 index 000000000..13230b78c --- /dev/null +++ b/contrib/bpftrace/zfs-trace.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +ZVER=$(cat /sys/module/zfs/version | cut -f 1 -d '-') +KVER=$(uname -r) + +bpftrace \ + --include "/usr/src/zfs-$ZVER/$KVER/zfs_config.h" \ + -I "/usr/src/zfs-$ZVER/include" \ + -I "/usr/src/zfs-$ZVER/include/spl" \ + "$@" diff --git a/include/os/linux/spl/sys/Makefile.am b/include/os/linux/spl/sys/Makefile.am index 251542118..e3df4edae 100644 --- a/include/os/linux/spl/sys/Makefile.am +++ b/include/os/linux/spl/sys/Makefile.am @@ -46,6 +46,7 @@ KERNEL_H = \ $(top_srcdir)/include/os/linux/spl/sys/timer.h \ $(top_srcdir)/include/os/linux/spl/sys/trace.h \ $(top_srcdir)/include/os/linux/spl/sys/trace_spl.h \ + $(top_srcdir)/include/os/linux/spl/sys/trace_taskq.h \ $(top_srcdir)/include/os/linux/spl/sys/tsd.h \ $(top_srcdir)/include/os/linux/spl/sys/types32.h \ $(top_srcdir)/include/os/linux/spl/sys/types.h \ diff --git a/include/os/linux/spl/sys/trace_spl.h b/include/os/linux/spl/sys/trace_spl.h index 6b50d546b..bffd91d91 100644 --- a/include/os/linux/spl/sys/trace_spl.h +++ b/include/os/linux/spl/sys/trace_spl.h @@ -22,6 +22,9 @@ #ifndef _OS_LINUX_SPL_TRACE_H #define _OS_LINUX_SPL_TRACE_H +#include <sys/taskq.h> + #include <sys/trace.h> +#include <sys/trace_taskq.h> #endif diff --git a/include/os/linux/spl/sys/trace_taskq.h b/include/os/linux/spl/sys/trace_taskq.h new file mode 100644 index 000000000..dbbb3c4c7 --- /dev/null +++ b/include/os/linux/spl/sys/trace_taskq.h @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#if defined(_KERNEL) +#if defined(HAVE_DECLARE_EVENT_CLASS) + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM zfs + +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR zfs_taskq + +#if !defined(_TRACE_TASKQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TASKQ_H + +#include <linux/tracepoint.h> +#include <sys/types.h> + +/* + * Generic support for single argument tracepoints of the form: + * + * DTRACE_PROBE1(..., + * taskq_ent_t *, ...); + */ +/* BEGIN CSTYLED */ +DECLARE_EVENT_CLASS(zfs_taskq_ent_class, + TP_PROTO(taskq_ent_t *taskq_ent), + TP_ARGS(taskq_ent), + TP_STRUCT__entry( + __field(taskq_ent_t *, taskq_ent) + ), + TP_fast_assign( + __entry->taskq_ent = taskq_ent; + ), + TP_printk("taskq_ent %p", __entry->taskq_ent) +); +/* END CSTYLED */ + +/* BEGIN CSTYLED */ +#define DEFINE_TASKQ_EVENT(name) \ +DEFINE_EVENT(zfs_taskq_ent_class, name, \ + TP_PROTO(taskq_ent_t *taskq_ent), \ + TP_ARGS(taskq_ent)) +/* END CSTYLED */ +DEFINE_TASKQ_EVENT(zfs_taskq_ent__birth); +DEFINE_TASKQ_EVENT(zfs_taskq_ent__start); +DEFINE_TASKQ_EVENT(zfs_taskq_ent__finish); + +#endif /* _TRACE_TASKQ_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH sys +#define TRACE_INCLUDE_FILE trace_taskq +#include <trace/define_trace.h> + +#else + +/* + * When tracepoints are not available, a DEFINE_DTRACE_PROBE* macro is + * needed for each DTRACE_PROBE. These will be used to generate stub + * tracing functions and prototypes for those functions. See + * include/os/linux/spl/sys/trace.h. + */ + +DEFINE_DTRACE_PROBE1(taskq_ent__birth); +DEFINE_DTRACE_PROBE1(taskq_ent__start); +DEFINE_DTRACE_PROBE1(taskq_ent__finish); + +#endif /* HAVE_DECLARE_EVENT_CLASS */ +#endif /* _KERNEL */ diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 8910c109e..a01c4b475 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -28,6 +28,7 @@ #include <sys/taskq.h> #include <sys/kmem.h> #include <sys/tsd.h> +#include <sys/trace_spl.h> int spl_taskq_thread_bind = 0; module_param(spl_taskq_thread_bind, int, 0644); @@ -223,6 +224,8 @@ task_expire_impl(taskq_ent_t *t) } t->tqent_birth = jiffies; + DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t); + /* * The priority list must be maintained in strict task id order * from lowest to highest for lowest_id to be easily calculable. @@ -593,7 +596,9 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) t->tqent_taskq = tq; t->tqent_timer.function = NULL; t->tqent_timer.expires = 0; + t->tqent_birth = jiffies; + DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t); ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC)); @@ -706,7 +711,9 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, t->tqent_func = func; t->tqent_arg = arg; t->tqent_taskq = tq; + t->tqent_birth = jiffies; + DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t); spin_unlock(&t->tqent_lock); @@ -906,15 +913,15 @@ taskq_thread(void *args) * tqent_flags here. * * Also use an on stack taskq_ent_t for tqt_task - * assignment in this case. We only populate the two - * fields used by the only user in taskq proc file. + * assignment in this case; we want to make sure + * to duplicate all fields, so the values are + * correct when it's accessed via DTRACE_PROBE*. */ tqt->tqt_id = t->tqent_id; tqt->tqt_flags = t->tqent_flags; if (t->tqent_flags & TQENT_FLAG_PREALLOC) { - dup_task.tqent_func = t->tqent_func; - dup_task.tqent_arg = t->tqent_arg; + dup_task = *t; t = &dup_task; } tqt->tqt_task = t; @@ -923,9 +930,13 @@ taskq_thread(void *args) tq->tq_nactive++; spin_unlock_irqrestore(&tq->tq_lock, flags); + DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t); + /* Perform the requested task */ t->tqent_func(t->tqent_arg); + DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t); + spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); tq->tq_nactive--; diff --git a/module/os/linux/spl/spl-trace.c b/module/os/linux/spl/spl-trace.c index 6415f9a91..7912a3812 100644 --- a/module/os/linux/spl/spl-trace.c +++ b/module/os/linux/spl/spl-trace.c @@ -24,7 +24,10 @@ * source file, so this dummy file exists for that purpose. */ +#include <sys/taskq.h> + #ifdef _KERNEL #define CREATE_TRACE_POINTS #include <sys/trace.h> +#include <sys/trace_taskq.h> #endif |