diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/Makefile.in | 1 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 67 | ||||
-rw-r--r-- | module/zfs/trace.c | 39 | ||||
-rw-r--r-- | module/zfs/zfs_debug.c | 116 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 49 |
5 files changed, 173 insertions, 99 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 48e7e97e9..954841f33 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -52,6 +52,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/spa_stats.o $(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o $(MODULE)-objs += @top_srcdir@/module/zfs/space_reftree.o $(MODULE)-objs += @top_srcdir@/module/zfs/txg.o +$(MODULE)-objs += @top_srcdir@/module/zfs/trace.o $(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o $(MODULE)-objs += @top_srcdir@/module/zfs/unique.o $(MODULE)-objs += @top_srcdir@/module/zfs/vdev.o diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index bb740e569..2d16d7e06 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -238,6 +238,53 @@ static avl_tree_t spa_l2cache_avl; kmem_cache_t *spa_buffer_pool; int spa_mode_global; +#ifdef ZFS_DEBUG +/* Everything except dprintf and spa is on by default in debug builds */ +int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA); +#else +int zfs_flags = 0; +#endif + +/* + * zfs_recover can be set to nonzero to attempt to recover from + * otherwise-fatal errors, typically caused by on-disk corruption. When + * set, calls to zfs_panic_recover() will turn into warning messages. + * This should only be used as a last resort, as it typically results + * in leaked space, or worse. + */ +int zfs_recover = B_FALSE; + +/* + * If destroy encounters an EIO while reading metadata (e.g. indirect + * blocks), space referenced by the missing metadata can not be freed. + * Normally this causes the background destroy to become "stalled", as + * it is unable to make forward progress. While in this stalled state, + * all remaining space to free from the error-encountering filesystem is + * "temporarily leaked". Set this flag to cause it to ignore the EIO, + * permanently leak the space from indirect blocks that can not be read, + * and continue to free everything else that it can. + * + * The default, "stalling" behavior is useful if the storage partially + * fails (i.e. some but not all i/os fail), and then later recovers. In + * this case, we will be able to continue pool operations while it is + * partially failed, and when it recovers, we can continue to free the + * space, with no leaks. However, note that this case is actually + * fairly rare. + * + * Typically pools either (a) fail completely (but perhaps temporarily, + * e.g. a top-level vdev going offline), or (b) have localized, + * permanent errors (e.g. disk returns the wrong data due to bit flip or + * firmware bug). In case (a), this setting does not matter because the + * pool will be suspended and the sync thread will not be able to make + * forward progress regardless. In case (b), because the error is + * permanent, the best we can do is leak the minimum amount of space, + * which is what setting this flag will do. Therefore, it is reasonable + * for this flag to normally be set, but we chose the more conservative + * approach of not setting it, so that there is no possibility of + * leaking space in the "partial temporary" failure case. + */ +int zfs_free_leak_on_eio = B_FALSE; + /* * Expiration time in milliseconds. This value has two meanings. First it is * used to determine when the spa_deadman() logic should fire. By default the @@ -1319,6 +1366,16 @@ spa_freeze(spa_t *spa) txg_wait_synced(spa_get_dsl(spa), freeze_txg); } +void +zfs_panic_recover(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx); + va_end(adx); +} + /* * This is a stripped-down version of strtoull, suitable only for converting * lowercase hexadecimal numbers that don't overflow. @@ -1923,6 +1980,16 @@ EXPORT_SYMBOL(spa_mode); EXPORT_SYMBOL(spa_namespace_lock); +module_param(zfs_flags, int, 0644); +MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags"); + +module_param(zfs_recover, int, 0644); +MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors"); + +module_param(zfs_free_leak_on_eio, int, 0644); +MODULE_PARM_DESC(zfs_free_leak_on_eio, + "Set to ignore IO errors during free and permanently leak the space"); + module_param(zfs_deadman_synctime_ms, ulong, 0644); MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds"); diff --git a/module/zfs/trace.c b/module/zfs/trace.c new file mode 100644 index 000000000..54ca2b2b4 --- /dev/null +++ b/module/zfs/trace.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Each Linux tracepoints subsystem must define CREATE_TRACE_POINTS in one + * (and only one) C file, so this dummy file exists for that purpose. + */ + +#include <sys/arc_impl.h> +#include <sys/vdev_impl.h> +#include <sys/zio.h> +#include <sys/dbuf.h> +#include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/dmu_tx.h> +#include <sys/dnode.h> +#include <sys/zfs_znode.h> +#include <sys/zil_impl.h> +#include <sys/zrlock.h> + +#define CREATE_TRACE_POINTS +#include <sys/trace.h> diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c index e7f266b5f..e1675c818 100644 --- a/module/zfs/zfs_debug.c +++ b/module/zfs/zfs_debug.c @@ -25,99 +25,22 @@ #include <sys/zfs_context.h> -#if !defined(_KERNEL) || !defined(__linux__) list_t zfs_dbgmsgs; int zfs_dbgmsg_size; kmutex_t zfs_dbgmsgs_lock; int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */ -#endif -/* - * Enable various debugging features. - */ -int zfs_flags = 0; - -/* - * zfs_recover can be set to nonzero to attempt to recover from - * otherwise-fatal errors, typically caused by on-disk corruption. When - * set, calls to zfs_panic_recover() will turn into warning messages. - * This should only be used as a last resort, as it typically results - * in leaked space, or worse. - */ -int zfs_recover = B_FALSE; - -/* - * If destroy encounters an EIO while reading metadata (e.g. indirect - * blocks), space referenced by the missing metadata can not be freed. - * Normally this causes the background destroy to become "stalled", as - * it is unable to make forward progress. While in this stalled state, - * all remaining space to free from the error-encountering filesystem is - * "temporarily leaked". Set this flag to cause it to ignore the EIO, - * permanently leak the space from indirect blocks that can not be read, - * and continue to free everything else that it can. - * - * The default, "stalling" behavior is useful if the storage partially - * fails (i.e. some but not all i/os fail), and then later recovers. In - * this case, we will be able to continue pool operations while it is - * partially failed, and when it recovers, we can continue to free the - * space, with no leaks. However, note that this case is actually - * fairly rare. - * - * Typically pools either (a) fail completely (but perhaps temporarily, - * e.g. a top-level vdev going offline), or (b) have localized, - * permanent errors (e.g. disk returns the wrong data due to bit flip or - * firmware bug). In case (a), this setting does not matter because the - * pool will be suspended and the sync thread will not be able to make - * forward progress regardless. In case (b), because the error is - * permanent, the best we can do is leak the minimum amount of space, - * which is what setting this flag will do. Therefore, it is reasonable - * for this flag to normally be set, but we chose the more conservative - * approach of not setting it, so that there is no possibility of - * leaking space in the "partial temporary" failure case. - */ -int zfs_free_leak_on_eio = B_FALSE; - - -void -zfs_panic_recover(const char *fmt, ...) -{ - va_list adx; - - va_start(adx, fmt); - vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx); - va_end(adx); -} - -/* - * Debug logging is enabled by default for production kernel builds. - * The overhead for this is negligible and the logs can be valuable when - * debugging. For non-production user space builds all debugging except - * logging is enabled since performance is no longer a concern. - */ void zfs_dbgmsg_init(void) { -#if !defined(_KERNEL) || !defined(__linux__) list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t), offsetof(zfs_dbgmsg_t, zdm_node)); mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL); -#endif - - if (zfs_flags == 0) { -#if defined(_KERNEL) - zfs_flags = ZFS_DEBUG_DPRINTF; - spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF); - spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1); -#else - zfs_flags = ~ZFS_DEBUG_DPRINTF; -#endif /* _KERNEL */ - } } void zfs_dbgmsg_fini(void) { -#if !defined(_KERNEL) || !defined(__linux__) zfs_dbgmsg_t *zdm; while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) { @@ -127,25 +50,24 @@ zfs_dbgmsg_fini(void) } mutex_destroy(&zfs_dbgmsgs_lock); ASSERT0(zfs_dbgmsg_size); -#endif } -#if !defined(_KERNEL) || !defined(__linux__) /* - * Print these messages by running: - * echo ::zfs_dbgmsg | mdb -k + * To get this data enable the zfs__dbgmsg tracepoint as shown: * - * Monitor these messages by running: - * dtrace -qn 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}' + * # Enable zfs__dbgmsg tracepoint, clear the tracepoint ring buffer + * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable + * $ echo 0 > /sys/kernel/debug/tracing/trace * - * When used with libzpool, monitor with: - * dtrace -qn 'zfs$pid::zfs_dbgmsg:probe1{printf("%s\n", copyinstr(arg1))}' + * # Dump the ring buffer. + * $ cat /sys/kernel/debug/tracing/trace */ void zfs_dbgmsg(const char *fmt, ...) { int size; va_list adx; + char *nl; zfs_dbgmsg_t *zdm; va_start(adx, fmt); @@ -156,13 +78,20 @@ zfs_dbgmsg(const char *fmt, ...) * There is one byte of string in sizeof (zfs_dbgmsg_t), used * for the terminating null. */ - zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP); + zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_PUSHPAGE); zdm->zdm_timestamp = gethrestime_sec(); va_start(adx, fmt); (void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx); va_end(adx); + /* + * Get rid of trailing newline. + */ + nl = strrchr(zdm->zdm_msg, '\n'); + if (nl != NULL) + *nl = '\0'; + DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg); mutex_enter(&zfs_dbgmsgs_lock); @@ -180,6 +109,7 @@ zfs_dbgmsg(const char *fmt, ...) void zfs_dbgmsg_print(const char *tag) { +#if !defined(_KERNEL) zfs_dbgmsg_t *zdm; (void) printf("ZFS_DBGMSG(%s):\n", tag); @@ -188,17 +118,5 @@ zfs_dbgmsg_print(const char *tag) zdm = list_next(&zfs_dbgmsgs, zdm)) (void) printf("%s\n", zdm->zdm_msg); mutex_exit(&zfs_dbgmsgs_lock); +#endif /* !_KERNEL */ } -#endif - -#if defined(_KERNEL) -module_param(zfs_flags, int, 0644); -MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags"); - -module_param(zfs_recover, int, 0644); -MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors"); - -module_param(zfs_free_leak_on_eio, int, 0644); -MODULE_PARM_DESC(zfs_free_leak_on_eio, - "Set to ignore IO errors during free and permanently leak the space"); -#endif /* _KERNEL */ diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 37a893c47..9396d6caa 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -247,6 +247,55 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); +#if defined(HAVE_DECLARE_EVENT_CLASS) +void +__dprintf(const char *file, const char *func, int line, const char *fmt, ...) +{ + const char *newfile; + size_t size = 4096; + char *buf = kmem_alloc(size, KM_PUSHPAGE); + char *nl; + va_list adx; + + /* + * Get rid of annoying prefix to filename. + */ + newfile = strrchr(file, '/'); + if (newfile != NULL) { + newfile = newfile + 1; /* Get rid of leading / */ + } else { + newfile = file; + } + + va_start(adx, fmt); + (void) vsnprintf(buf, size, fmt, adx); + va_end(adx); + + /* + * Get rid of trailing newline. + */ + nl = strrchr(buf, '\n'); + if (nl != NULL) + *nl = '\0'; + + /* + * To get this data enable the zfs__dprintf trace point as shown: + * + * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer + * $ echo 1 > /sys/module/zfs/parameters/zfs_flags + * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable + * $ echo 0 > /sys/kernel/debug/tracing/trace + * + * # Dump the ring buffer. + * $ cat /sys/kernel/debug/tracing/trace + */ + DTRACE_PROBE4(zfs__dprintf, + char *, newfile, char *, func, int, line, char *, buf); + + kmem_free(buf, size); +} +#endif /* HAVE_DECLARE_EVENT_CLASS */ + static void history_str_free(char *buf) { |