1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#if !defined(_KERNEL) || !defined(__linux__)
list_t zfs_dbgmsgs;
int zfs_dbgmsg_size;
kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
#endif
/*
* Enable various debugging features.
*/
int zfs_flags = 0;
/*
* zfs_recover can be set to nonzero to attempt to recover from
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
* This should only be used as a last resort, as it typically results
* in leaked space, or worse.
*/
int zfs_recover = B_FALSE;
/*
* If destroy encounters an EIO while reading metadata (e.g. indirect
* blocks), space referenced by the missing metadata can not be freed.
* Normally this causes the background destroy to become "stalled", as
* it is unable to make forward progress. While in this stalled state,
* all remaining space to free from the error-encountering filesystem is
* "temporarily leaked". Set this flag to cause it to ignore the EIO,
* permanently leak the space from indirect blocks that can not be read,
* and continue to free everything else that it can.
*
* The default, "stalling" behavior is useful if the storage partially
* fails (i.e. some but not all i/os fail), and then later recovers. In
* this case, we will be able to continue pool operations while it is
* partially failed, and when it recovers, we can continue to free the
* space, with no leaks. However, note that this case is actually
* fairly rare.
*
* Typically pools either (a) fail completely (but perhaps temporarily,
* e.g. a top-level vdev going offline), or (b) have localized,
* permanent errors (e.g. disk returns the wrong data due to bit flip or
* firmware bug). In case (a), this setting does not matter because the
* pool will be suspended and the sync thread will not be able to make
* forward progress regardless. In case (b), because the error is
* permanent, the best we can do is leak the minimum amount of space,
* which is what setting this flag will do. Therefore, it is reasonable
* for this flag to normally be set, but we chose the more conservative
* approach of not setting it, so that there is no possibility of
* leaking space in the "partial temporary" failure case.
*/
int zfs_free_leak_on_eio = B_FALSE;
void
zfs_panic_recover(const char *fmt, ...)
{
va_list adx;
va_start(adx, fmt);
vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
va_end(adx);
}
/*
* Debug logging is enabled by default for production kernel builds.
* The overhead for this is negligible and the logs can be valuable when
* debugging. For non-production user space builds all debugging except
* logging is enabled since performance is no longer a concern.
*/
void
zfs_dbgmsg_init(void)
{
#if !defined(_KERNEL) || !defined(__linux__)
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
offsetof(zfs_dbgmsg_t, zdm_node));
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
#endif
if (zfs_flags == 0) {
#if defined(_KERNEL)
zfs_flags = ZFS_DEBUG_DPRINTF;
spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF);
spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1);
#else
zfs_flags = ~ZFS_DEBUG_DPRINTF;
#endif /* _KERNEL */
}
}
void
zfs_dbgmsg_fini(void)
{
#if !defined(_KERNEL) || !defined(__linux__)
zfs_dbgmsg_t *zdm;
while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
int size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
mutex_destroy(&zfs_dbgmsgs_lock);
ASSERT0(zfs_dbgmsg_size);
#endif
}
#if !defined(_KERNEL) || !defined(__linux__)
/*
* Print these messages by running:
* echo ::zfs_dbgmsg | mdb -k
*
* Monitor these messages by running:
* dtrace -qn 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
*
* When used with libzpool, monitor with:
* dtrace -qn 'zfs$pid::zfs_dbgmsg:probe1{printf("%s\n", copyinstr(arg1))}'
*/
void
zfs_dbgmsg(const char *fmt, ...)
{
int size;
va_list adx;
zfs_dbgmsg_t *zdm;
va_start(adx, fmt);
size = vsnprintf(NULL, 0, fmt, adx);
va_end(adx);
/*
* There is one byte of string in sizeof (zfs_dbgmsg_t), used
* for the terminating null.
*/
zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP);
zdm->zdm_timestamp = gethrestime_sec();
va_start(adx, fmt);
(void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
va_end(adx);
DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
mutex_enter(&zfs_dbgmsgs_lock);
list_insert_tail(&zfs_dbgmsgs, zdm);
zfs_dbgmsg_size += sizeof (zfs_dbgmsg_t) + size;
while (zfs_dbgmsg_size > zfs_dbgmsg_maxsize) {
zdm = list_remove_head(&zfs_dbgmsgs);
size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
mutex_exit(&zfs_dbgmsgs_lock);
}
void
zfs_dbgmsg_print(const char *tag)
{
zfs_dbgmsg_t *zdm;
(void) printf("ZFS_DBGMSG(%s):\n", tag);
mutex_enter(&zfs_dbgmsgs_lock);
for (zdm = list_head(&zfs_dbgmsgs); zdm;
zdm = list_next(&zfs_dbgmsgs, zdm))
(void) printf("%s\n", zdm->zdm_msg);
mutex_exit(&zfs_dbgmsgs_lock);
}
#endif
#if defined(_KERNEL)
module_param(zfs_flags, int, 0644);
MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
module_param(zfs_recover, int, 0644);
MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
module_param(zfs_free_leak_on_eio, int, 0644);
MODULE_PARM_DESC(zfs_free_leak_on_eio,
"Set to ignore IO errors during free and permanently leak the space");
#endif /* _KERNEL */
|