aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorJohn Gallagher <[email protected]>2018-09-26 11:08:12 -0700
committerBrian Behlendorf <[email protected]>2018-09-26 11:08:12 -0700
commitd12614521a307c709778e5f7f91ae6085f63f9e0 (patch)
tree130e6dde286d0da760612a7f4d9595a660777011 /module
parent3ed2fbcc1ce36fdc516aa11848692a4e4c4a2bc0 (diff)
Fixes for procfs files backed by linked lists
There are some issues with the way the seq_file interface is implemented for kstats backed by linked lists (zfs_dbgmsgs and certain per-pool debugging info): * We don't account for the fact that seq_file sometimes visits a node multiple times, which results in missing messages when read through procfs. * We don't keep separate state for each reader of a file, so concurrent readers will receive incorrect results. * We don't account for the fact that entries may have been removed from the list between read syscalls, so reading from these files in procfs can cause the system to crash. This change fixes these issues and adds procfs_list, a wrapper around a linked list which abstracts away the details of implementing the seq_file interface for a list and exposing the contents of the list through procfs. Reviewed by: Don Brady <[email protected]> Reviewed-by: Serapheim Dimitropoulos <[email protected]> Reviewed by: Brad Lewis <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: John Gallagher <[email protected]> External-issue: LX-1211 Closes #7819
Diffstat (limited to 'module')
-rw-r--r--module/spl/Makefile.in1
-rw-r--r--module/spl/spl-kstat.c100
-rw-r--r--module/spl/spl-procfs-list.c256
-rw-r--r--module/zfs/spa_stats.c600
-rw-r--r--module/zfs/vdev_queue.c40
-rw-r--r--module/zfs/zfs_debug.c132
6 files changed, 607 insertions, 522 deletions
diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in
index 97a431f22..3bcbf63cb 100644
--- a/module/spl/Makefile.in
+++ b/module/spl/Makefile.in
@@ -18,6 +18,7 @@ $(MODULE)-objs += spl-kobj.o
$(MODULE)-objs += spl-kstat.o
$(MODULE)-objs += spl-mutex.o
$(MODULE)-objs += spl-proc.o
+$(MODULE)-objs += spl-procfs-list.o
$(MODULE)-objs += spl-rwlock.o
$(MODULE)-objs += spl-taskq.o
$(MODULE)-objs += spl-thread.o
diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c
index c3fc2e4b2..8683693c8 100644
--- a/module/spl/spl-kstat.c
+++ b/module/spl/spl-kstat.c
@@ -530,6 +530,18 @@ __kstat_set_raw_ops(kstat_t *ksp,
}
EXPORT_SYMBOL(__kstat_set_raw_ops);
+void
+kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
+ const char *name)
+{
+ kpep->kpe_owner = NULL;
+ kpep->kpe_proc = NULL;
+ INIT_LIST_HEAD(&kpep->kpe_list);
+ strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
+ strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
+}
+EXPORT_SYMBOL(kstat_proc_entry_init);
+
kstat_t *
__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
@@ -556,13 +568,10 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
ksp->ks_magic = KS_MAGIC;
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
ksp->ks_lock = &ksp->ks_private_lock;
- INIT_LIST_HEAD(&ksp->ks_list);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
- strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
ksp->ks_instance = ks_instance;
- strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = ks_flags;
@@ -573,6 +582,7 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
ksp->ks_raw_ops.addr = NULL;
ksp->ks_raw_buf = NULL;
ksp->ks_raw_bufsize = 0;
+ kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
@@ -614,14 +624,14 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
EXPORT_SYMBOL(__kstat_create);
static int
-kstat_detect_collision(kstat_t *ksp)
+kstat_detect_collision(kstat_proc_entry_t *kpep)
{
kstat_module_t *module;
- kstat_t *tmp;
+ kstat_proc_entry_t *tmp;
char *parent;
char *cp;
- parent = kmem_asprintf("%s", ksp->ks_module);
+ parent = kmem_asprintf("%s", kpep->kpe_module);
if ((cp = strrchr(parent, '/')) == NULL) {
strfree(parent);
@@ -630,8 +640,8 @@ kstat_detect_collision(kstat_t *ksp)
cp[0] = '\0';
if ((module = kstat_find_module(parent)) != NULL) {
- list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
- if (strncmp(tmp->ks_name, cp+1, KSTAT_STRLEN) == 0) {
+ list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
+ if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
strfree(parent);
return (EEXIST);
}
@@ -642,24 +652,30 @@ kstat_detect_collision(kstat_t *ksp)
return (0);
}
+/*
+ * Add a file to the proc filesystem under the kstat namespace (i.e.
+ * /proc/spl/kstat/). The file need not necessarily be implemented as a
+ * kstat.
+ */
void
-__kstat_install(kstat_t *ksp)
+kstat_proc_entry_install(kstat_proc_entry_t *kpep,
+ const struct file_operations *file_ops, void *data)
{
kstat_module_t *module;
- kstat_t *tmp;
+ kstat_proc_entry_t *tmp;
- ASSERT(ksp);
+ ASSERT(kpep);
mutex_enter(&kstat_module_lock);
- module = kstat_find_module(ksp->ks_module);
+ module = kstat_find_module(kpep->kpe_module);
if (module == NULL) {
- if (kstat_detect_collision(ksp) != 0) {
+ if (kstat_detect_collision(kpep) != 0) {
cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
- " collision", ksp->ks_module, ksp->ks_name);
+ " collision", kpep->kpe_module, kpep->kpe_name);
goto out;
}
- module = kstat_create_module(ksp->ks_module);
+ module = kstat_create_module(kpep->kpe_module);
if (module == NULL)
goto out;
}
@@ -668,44 +684,60 @@ __kstat_install(kstat_t *ksp)
* Only one entry by this name per-module, on failure the module
* shouldn't be deleted because we know it has at least one entry.
*/
- list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
- if (strncmp(tmp->ks_name, ksp->ks_name, KSTAT_STRLEN) == 0)
+ list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
+ if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
goto out;
}
- list_add_tail(&ksp->ks_list, &module->ksm_kstat_list);
+ list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
- mutex_enter(ksp->ks_lock);
- ksp->ks_owner = module;
- ksp->ks_proc = proc_create_data(ksp->ks_name, 0644,
- module->ksm_proc, &proc_kstat_operations, (void *)ksp);
- if (ksp->ks_proc == NULL) {
- list_del_init(&ksp->ks_list);
+ kpep->kpe_owner = module;
+ kpep->kpe_proc = proc_create_data(kpep->kpe_name, 0644,
+ module->ksm_proc, file_ops, data);
+ if (kpep->kpe_proc == NULL) {
+ list_del_init(&kpep->kpe_list);
if (list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
}
- mutex_exit(ksp->ks_lock);
out:
mutex_exit(&kstat_module_lock);
+
+}
+EXPORT_SYMBOL(kstat_proc_entry_install);
+
+void
+__kstat_install(kstat_t *ksp)
+{
+ ASSERT(ksp);
+ kstat_proc_entry_install(&ksp->ks_proc, &proc_kstat_operations, ksp);
}
EXPORT_SYMBOL(__kstat_install);
void
-__kstat_delete(kstat_t *ksp)
+kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
{
- kstat_module_t *module = ksp->ks_owner;
+ kstat_module_t *module = kpep->kpe_owner;
+ if (kpep->kpe_proc)
+ remove_proc_entry(kpep->kpe_name, module->ksm_proc);
mutex_enter(&kstat_module_lock);
- list_del_init(&ksp->ks_list);
+ list_del_init(&kpep->kpe_list);
+
+ /*
+ * Remove top level module directory if it wasn't empty before, but now
+ * is.
+ */
+ if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
+ kstat_delete_module(module);
mutex_exit(&kstat_module_lock);
- if (ksp->ks_proc) {
- remove_proc_entry(ksp->ks_name, module->ksm_proc);
+}
+EXPORT_SYMBOL(kstat_proc_entry_delete);
- /* Remove top level module directory if it's empty */
- if (list_empty(&module->ksm_kstat_list))
- kstat_delete_module(module);
- }
+void
+__kstat_delete(kstat_t *ksp)
+{
+ kstat_proc_entry_delete(&ksp->ks_proc);
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);
diff --git a/module/spl/spl-procfs-list.c b/module/spl/spl-procfs-list.c
new file mode 100644
index 000000000..4902e0a56
--- /dev/null
+++ b/module/spl/spl-procfs-list.c
@@ -0,0 +1,256 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#include <sys/list.h>
+#include <sys/mutex.h>
+#include <sys/procfs_list.h>
+#include <linux/proc_fs.h>
+
+/*
+ * A procfs_list is a wrapper around a linked list which implements the seq_file
+ * interface, allowing the contents of the list to be exposed through procfs.
+ * The kernel already has some utilities to help implement the seq_file
+ * interface for linked lists (seq_list_*), but they aren't appropriate for use
+ * with lists that have many entries, because seq_list_start walks the list at
+ * the start of each read syscall to find where it left off, so reading a file
+ * ends up being quadratic in the number of entries in the list.
+ *
+ * This implementation avoids this penalty by maintaining a separate cursor into
+ * the list per instance of the file that is open. It also maintains some extra
+ * information in each node of the list to prevent reads of entries that have
+ * been dropped from the list.
+ *
+ * Callers should only add elements to the list using procfs_list_add, which
+ * adds an element to the tail of the list. Other operations can be performed
+ * directly on the wrapped list using the normal list manipulation functions,
+ * but elements should only be removed from the head of the list.
+ */
+
+#define NODE_ID(procfs_list, obj) \
+ (((procfs_list_node_t *)(((char *)obj) + \
+ (procfs_list)->pl_node_offset))->pln_id)
+
+typedef struct procfs_list_cursor {
+ procfs_list_t *procfs_list; /* List into which this cursor points */
+ void *cached_node; /* Most recently accessed node */
+ loff_t cached_pos; /* Position of cached_node */
+} procfs_list_cursor_t;
+
+static int
+procfs_list_seq_show(struct seq_file *f, void *p)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+
+ ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+ if (p == SEQ_START_TOKEN) {
+ if (procfs_list->pl_show_header != NULL)
+ return (procfs_list->pl_show_header(f));
+ else
+ return (0);
+ }
+ return (procfs_list->pl_show(f, p));
+}
+
+static void *
+procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
+{
+ void *next_node;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+
+ if (cursor->cached_node == SEQ_START_TOKEN)
+ next_node = list_head(&procfs_list->pl_list);
+ else
+ next_node = list_next(&procfs_list->pl_list,
+ cursor->cached_node);
+
+ if (next_node != NULL) {
+ cursor->cached_node = next_node;
+ cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
+ *pos = cursor->cached_pos;
+ }
+ return (next_node);
+}
+
+static void *
+procfs_list_seq_start(struct seq_file *f, loff_t *pos)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+
+ mutex_enter(&procfs_list->pl_lock);
+
+ if (*pos == 0) {
+ cursor->cached_node = SEQ_START_TOKEN;
+ cursor->cached_pos = 0;
+ return (SEQ_START_TOKEN);
+ }
+
+ /*
+ * Check if our cached pointer has become stale, which happens if the
+ * the message where we left off has been dropped from the list since
+ * the last read syscall completed.
+ */
+ void *oldest_node = list_head(&procfs_list->pl_list);
+ if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
+ NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
+ return (ERR_PTR(-EIO));
+
+ /*
+ * If it isn't starting from the beginning of the file, the seq_file
+ * code will either pick up at the same position it visited last or the
+ * following one.
+ */
+ if (*pos == cursor->cached_pos) {
+ return (cursor->cached_node);
+ } else {
+ ASSERT3U(*pos, ==, cursor->cached_pos + 1);
+ return (procfs_list_next_node(cursor, pos));
+ }
+}
+
+static void *
+procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
+ return (procfs_list_next_node(cursor, pos));
+}
+
+static void
+procfs_list_seq_stop(struct seq_file *f, void *p)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+ mutex_exit(&procfs_list->pl_lock);
+}
+
+static struct seq_operations procfs_list_seq_ops = {
+ .show = procfs_list_seq_show,
+ .start = procfs_list_seq_start,
+ .next = procfs_list_seq_next,
+ .stop = procfs_list_seq_stop,
+};
+
+static int
+procfs_list_open(struct inode *inode, struct file *filp)
+{
+ int rc = seq_open_private(filp, &procfs_list_seq_ops,
+ sizeof (procfs_list_cursor_t));
+ if (rc != 0)
+ return (rc);
+
+ struct seq_file *f = filp->private_data;
+ procfs_list_cursor_t *cursor = f->private;
+ cursor->procfs_list = PDE_DATA(inode);
+ cursor->cached_node = NULL;
+ cursor->cached_pos = 0;
+
+ return (0);
+}
+
+static ssize_t
+procfs_list_write(struct file *filp, const char __user *buf, size_t len,
+ loff_t *ppos)
+{
+ struct seq_file *f = filp->private_data;
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+ int rc;
+
+ if (procfs_list->pl_clear != NULL &&
+ (rc = procfs_list->pl_clear(procfs_list)) != 0)
+ return (-rc);
+ return (len);
+}
+
+static struct file_operations procfs_list_operations = {
+ .owner = THIS_MODULE,
+ .open = procfs_list_open,
+ .write = procfs_list_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*
+ * Initialize a procfs_list and create a file for it in the proc filesystem
+ * under the kstat namespace.
+ */
+void
+procfs_list_install(const char *module,
+ const char *name,
+ procfs_list_t *procfs_list,
+ int (*show)(struct seq_file *f, void *p),
+ int (*show_header)(struct seq_file *f),
+ int (*clear)(procfs_list_t *procfs_list),
+ size_t procfs_list_node_off)
+{
+ mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&procfs_list->pl_list,
+ procfs_list_node_off + sizeof (procfs_list_node_t),
+ procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+ procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
+ procfs_list->pl_show = show;
+ procfs_list->pl_show_header = show_header;
+ procfs_list->pl_clear = clear;
+ procfs_list->pl_node_offset = procfs_list_node_off;
+
+ kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name);
+ kstat_proc_entry_install(&procfs_list->pl_kstat_entry,
+ &procfs_list_operations, procfs_list);
+}
+EXPORT_SYMBOL(procfs_list_install);
+
+/* Remove the proc filesystem file corresponding to the given list */
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{
+ kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
+}
+EXPORT_SYMBOL(procfs_list_uninstall);
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+ ASSERT(list_is_empty(&procfs_list->pl_list));
+ list_destroy(&procfs_list->pl_list);
+ mutex_destroy(&procfs_list->pl_lock);
+}
+EXPORT_SYMBOL(procfs_list_destroy);
+
+/*
+ * Add a new node to the tail of the list. While the standard list manipulation
+ * functions can be use for all other operation, adding elements to the list
+ * should only be done using this helper so that the id of the new node is set
+ * correctly.
+ */
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+ ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+ NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+ list_insert_tail(&procfs_list->pl_list, p);
+}
+EXPORT_SYMBOL(procfs_list_add);
diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c
index fa1cf9e98..c02ef86b5 100644
--- a/module/zfs/spa_stats.c
+++ b/module/zfs/spa_stats.c
@@ -55,7 +55,6 @@ int zfs_multihost_history = 0;
* Read statistics - Information exported regarding each arc_read call
*/
typedef struct spa_read_history {
- uint64_t uid; /* unique identifier */
hrtime_t start; /* time read completed */
uint64_t objset; /* read from this objset */
uint64_t object; /* read of this object number */
@@ -65,13 +64,13 @@ typedef struct spa_read_history {
uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
pid_t pid; /* PID of task doing read */
char comm[16]; /* process name of task doing read */
- list_node_t srh_link;
+ procfs_list_node_t srh_node;
} spa_read_history_t;
static int
-spa_read_history_headers(char *buf, size_t size)
+spa_read_history_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
+ seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
"%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
"level", "blkid", "aflags", "origin", "pid", "process");
@@ -79,13 +78,13 @@ spa_read_history_headers(char *buf, size_t size)
}
static int
-spa_read_history_data(char *buf, size_t size, void *data)
+spa_read_history_show(struct seq_file *f, void *data)
{
spa_read_history_t *srh = (spa_read_history_t *)data;
- (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
+ seq_printf(f, "%-8llu %-16llu 0x%-6llx "
"%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
- (u_longlong_t)srh->uid, srh->start,
+ (u_longlong_t)srh->srh_node.pln_id, srh->start,
(longlong_t)srh->objset, (longlong_t)srh->object,
(longlong_t)srh->level, (longlong_t)srh->blkid,
srh->aflags, srh->origin, srh->pid, srh->comm);
@@ -93,120 +92,73 @@ spa_read_history_data(char *buf, size_t size, void *data)
return (0);
}
-/*
- * Calculate the address for the next spa_stats_history_t entry. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
-static void *
-spa_read_history_addr(kstat_t *ksp, loff_t n)
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
-
- if (n == 0)
- ssh->private = list_tail(&ssh->list);
- else if (ssh->private)
- ssh->private = list_prev(&ssh->list, ssh->private);
+ spa_read_history_t *srh;
+ while (shl->size > size) {
+ srh = list_remove_head(&shl->procfs_list.pl_list);
+ ASSERT3P(srh, !=, NULL);
+ kmem_free(srh, sizeof (spa_read_history_t));
+ shl->size--;
+ }
- return (ssh->private);
+ if (size == 0)
+ ASSERT(list_is_empty(&shl->procfs_list.pl_list));
}
-/*
- * When the kstat is written discard all spa_read_history_t entries. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
static int
-spa_read_history_update(kstat_t *ksp, int rw)
+spa_read_history_clear(procfs_list_t *procfs_list)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
-
- if (rw == KSTAT_WRITE) {
- spa_read_history_t *srh;
-
- while ((srh = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(srh, sizeof (spa_read_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- }
-
- ksp->ks_ndata = ssh->size;
- ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
-
+ spa_history_list_t *shl = procfs_list->pl_private;
+ mutex_enter(&procfs_list->pl_lock);
+ spa_read_history_truncate(shl, 0);
+ mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_read_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
- char *name;
- kstat_t *ksp;
+ spa_history_list_t *shl = &spa->spa_stats.read_history;
+ char *module;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&ssh->list, sizeof (spa_read_history_t),
- offsetof(spa_read_history_t, srh_link));
+ shl->size = 0;
- ssh->count = 0;
- ssh->size = 0;
- ssh->private = NULL;
+ module = kmem_asprintf("zfs/%s", spa_name(spa));
- name = kmem_asprintf("zfs/%s", spa_name(spa));
+ shl->procfs_list.pl_private = shl;
+ procfs_list_install(module,
+ "reads",
+ &shl->procfs_list,
+ spa_read_history_show,
+ spa_read_history_show_header,
+ spa_read_history_clear,
+ offsetof(spa_read_history_t, srh_node));
- ksp = kstat_create(name, 0, "reads", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
-
- if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = NULL;
- ksp->ks_private = spa;
- ksp->ks_update = spa_read_history_update;
- kstat_set_raw_ops(ksp, spa_read_history_headers,
- spa_read_history_data, spa_read_history_addr);
- kstat_install(ksp);
- }
- strfree(name);
+ strfree(module);
}
static void
spa_read_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
- spa_read_history_t *srh;
- kstat_t *ksp;
-
- ksp = ssh->kstat;
- if (ksp)
- kstat_delete(ksp);
-
- mutex_enter(&ssh->lock);
- while ((srh = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(srh, sizeof (spa_read_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- list_destroy(&ssh->list);
- mutex_exit(&ssh->lock);
-
- mutex_destroy(&ssh->lock);
+ spa_history_list_t *shl = &spa->spa_stats.read_history;
+ procfs_list_uninstall(&shl->procfs_list);
+ spa_read_history_truncate(shl, 0);
+ procfs_list_destroy(&shl->procfs_list);
}
void
spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
{
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
- spa_read_history_t *srh, *rm;
+ spa_history_list_t *shl = &spa->spa_stats.read_history;
+ spa_read_history_t *srh;
ASSERT3P(spa, !=, NULL);
ASSERT3P(zb, !=, NULL);
- if (zfs_read_history == 0 && ssh->size == 0)
+ if (zfs_read_history == 0 && shl->size == 0)
return;
if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
@@ -222,19 +174,14 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
srh->aflags = aflags;
srh->pid = getpid();
- mutex_enter(&ssh->lock);
+ mutex_enter(&shl->procfs_list.pl_lock);
- srh->uid = ssh->count++;
- list_insert_head(&ssh->list, srh);
- ssh->size++;
+ procfs_list_add(&shl->procfs_list, srh);
+ shl->size++;
- while (ssh->size > zfs_read_history) {
- ssh->size--;
- rm = list_remove_tail(&ssh->list);
- kmem_free(rm, sizeof (spa_read_history_t));
- }
+ spa_read_history_truncate(shl, zfs_read_history);
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
}
/*
@@ -256,22 +203,21 @@ typedef struct spa_txg_history {
uint64_t writes; /* number of write operations */
uint64_t ndirty; /* number of dirty bytes */
hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
- list_node_t sth_link;
+ procfs_list_node_t sth_node;
} spa_txg_history_t;
static int
-spa_txg_history_headers(char *buf, size_t size)
+spa_txg_history_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
+ seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
"%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
"ndirty", "nread", "nwritten", "reads", "writes",
"otime", "qtime", "wtime", "stime");
-
return (0);
}
static int
-spa_txg_history_data(char *buf, size_t size, void *data)
+spa_txg_history_show(struct seq_file *f, void *data)
{
spa_txg_history_t *sth = (spa_txg_history_t *)data;
uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
@@ -303,7 +249,7 @@ spa_txg_history_data(char *buf, size_t size, void *data)
sync = sth->times[TXG_STATE_SYNCED] -
sth->times[TXG_STATE_WAIT_FOR_SYNC];
- (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
+ seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
"%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
(longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
(u_longlong_t)sth->ndirty,
@@ -315,110 +261,62 @@ spa_txg_history_data(char *buf, size_t size, void *data)
return (0);
}
-/*
- * Calculate the address for the next spa_stats_history_t entry. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
-static void *
-spa_txg_history_addr(kstat_t *ksp, loff_t n)
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
+ spa_txg_history_t *sth;
+ while (shl->size > size) {
+ sth = list_remove_head(&shl->procfs_list.pl_list);
+ ASSERT3P(sth, !=, NULL);
+ kmem_free(sth, sizeof (spa_txg_history_t));
+ shl->size--;
+ }
- if (n == 0)
- ssh->private = list_tail(&ssh->list);
- else if (ssh->private)
- ssh->private = list_prev(&ssh->list, ssh->private);
+ if (size == 0)
+ ASSERT(list_is_empty(&shl->procfs_list.pl_list));
- return (ssh->private);
}
-/*
- * When the kstat is written discard all spa_txg_history_t entries. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
static int
-spa_txg_history_update(kstat_t *ksp, int rw)
+spa_txg_history_clear(procfs_list_t *procfs_list)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
-
- if (rw == KSTAT_WRITE) {
- spa_txg_history_t *sth;
-
- while ((sth = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(sth, sizeof (spa_txg_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- }
-
- ksp->ks_ndata = ssh->size;
- ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
-
+ spa_history_list_t *shl = procfs_list->pl_private;
+ mutex_enter(&procfs_list->pl_lock);
+ spa_txg_history_truncate(shl, 0);
+ mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_txg_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
- char *name;
- kstat_t *ksp;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
+ char *module;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&ssh->list, sizeof (spa_txg_history_t),
- offsetof(spa_txg_history_t, sth_link));
+ shl->size = 0;
- ssh->count = 0;
- ssh->size = 0;
- ssh->private = NULL;
+ module = kmem_asprintf("zfs/%s", spa_name(spa));
- name = kmem_asprintf("zfs/%s", spa_name(spa));
+ shl->procfs_list.pl_private = shl;
+ procfs_list_install(module,
+ "txgs",
+ &shl->procfs_list,
+ spa_txg_history_show,
+ spa_txg_history_show_header,
+ spa_txg_history_clear,
+ offsetof(spa_txg_history_t, sth_node));
- ksp = kstat_create(name, 0, "txgs", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
-
- if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = NULL;
- ksp->ks_private = spa;
- ksp->ks_update = spa_txg_history_update;
- kstat_set_raw_ops(ksp, spa_txg_history_headers,
- spa_txg_history_data, spa_txg_history_addr);
- kstat_install(ksp);
- }
- strfree(name);
+ strfree(module);
}
static void
spa_txg_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
- spa_txg_history_t *sth;
- kstat_t *ksp;
-
- ksp = ssh->kstat;
- if (ksp)
- kstat_delete(ksp);
-
- mutex_enter(&ssh->lock);
- while ((sth = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(sth, sizeof (spa_txg_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- list_destroy(&ssh->list);
- mutex_exit(&ssh->lock);
-
- mutex_destroy(&ssh->lock);
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
+ procfs_list_uninstall(&shl->procfs_list);
+ spa_txg_history_truncate(shl, 0);
+ procfs_list_destroy(&shl->procfs_list);
}
/*
@@ -427,10 +325,10 @@ spa_txg_history_destroy(spa_t *spa)
void
spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
- spa_txg_history_t *sth, *rm;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
+ spa_txg_history_t *sth;
- if (zfs_txg_history == 0 && ssh->size == 0)
+ if (zfs_txg_history == 0 && shl->size == 0)
return;
sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
@@ -438,18 +336,11 @@ spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
sth->state = TXG_STATE_OPEN;
sth->times[TXG_STATE_BIRTH] = birth_time;
- mutex_enter(&ssh->lock);
-
- list_insert_head(&ssh->list, sth);
- ssh->size++;
-
- while (ssh->size > zfs_txg_history) {
- ssh->size--;
- rm = list_remove_tail(&ssh->list);
- kmem_free(rm, sizeof (spa_txg_history_t));
- }
-
- mutex_exit(&ssh->lock);
+ mutex_enter(&shl->procfs_list.pl_lock);
+ procfs_list_add(&shl->procfs_list, sth);
+ shl->size++;
+ spa_txg_history_truncate(shl, zfs_txg_history);
+ mutex_exit(&shl->procfs_list.pl_lock);
}
/*
@@ -459,16 +350,16 @@ int
spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
hrtime_t completed_time)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
int error = ENOENT;
if (zfs_txg_history == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (sth = list_head(&ssh->list); sth != NULL;
- sth = list_next(&ssh->list, sth)) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
+ sth = list_prev(&shl->procfs_list.pl_list, sth)) {
if (sth->txg == txg) {
sth->times[completed_state] = completed_time;
sth->state++;
@@ -476,7 +367,7 @@ spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -488,16 +379,16 @@ static int
spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
int error = ENOENT;
if (zfs_txg_history == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (sth = list_head(&ssh->list); sth != NULL;
- sth = list_next(&ssh->list, sth)) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
+ sth = list_prev(&shl->procfs_list.pl_list, sth)) {
if (sth->txg == txg) {
sth->nread = nread;
sth->nwritten = nwritten;
@@ -508,7 +399,7 @@ spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -580,16 +471,16 @@ static int
spa_tx_assign_update(kstat_t *ksp, int rw)
{
spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
int i;
if (rw == KSTAT_WRITE) {
- for (i = 0; i < ssh->count; i++)
- ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
+ for (i = 0; i < shk->count; i++)
+ ((kstat_named_t *)shk->private)[i].value.ui64 = 0;
}
- for (i = ssh->count; i > 0; i--)
- if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
+ for (i = shk->count; i > 0; i--)
+ if (((kstat_named_t *)shk->private)[i-1].value.ui64 != 0)
break;
ksp->ks_ndata = i;
@@ -601,22 +492,22 @@ spa_tx_assign_update(kstat_t *ksp, int rw)
static void
spa_tx_assign_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
char *name;
kstat_named_t *ks;
kstat_t *ksp;
int i;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
- ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
- ssh->size = ssh->count * sizeof (kstat_named_t);
- ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
+ shk->count = 42; /* power of two buckets for 1ns to 2,199s */
+ shk->size = shk->count * sizeof (kstat_named_t);
+ shk->private = kmem_alloc(shk->size, KM_SLEEP);
name = kmem_asprintf("zfs/%s", spa_name(spa));
- for (i = 0; i < ssh->count; i++) {
- ks = &((kstat_named_t *)ssh->private)[i];
+ for (i = 0; i < shk->count; i++) {
+ ks = &((kstat_named_t *)shk->private)[i];
ks->data_type = KSTAT_DATA_UINT64;
ks->value.ui64 = 0;
(void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
@@ -625,13 +516,13 @@ spa_tx_assign_init(spa_t *spa)
ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
+ shk->kstat = ksp;
if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = ssh->private;
- ksp->ks_ndata = ssh->count;
- ksp->ks_data_size = ssh->size;
+ ksp->ks_lock = &shk->lock;
+ ksp->ks_data = shk->private;
+ ksp->ks_ndata = shk->count;
+ ksp->ks_data_size = shk->size;
ksp->ks_private = spa;
ksp->ks_update = spa_tx_assign_update;
kstat_install(ksp);
@@ -642,27 +533,27 @@ spa_tx_assign_init(spa_t *spa)
static void
spa_tx_assign_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
kstat_t *ksp;
- ksp = ssh->kstat;
+ ksp = shk->kstat;
if (ksp)
kstat_delete(ksp);
- kmem_free(ssh->private, ssh->size);
- mutex_destroy(&ssh->lock);
+ kmem_free(shk->private, shk->size);
+ mutex_destroy(&shk->lock);
}
void
spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
{
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
uint64_t idx = 0;
- while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
+ while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
idx++;
- atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
+ atomic_inc_64(&((kstat_named_t *)shk->private)[idx].value.ui64);
}
/*
@@ -682,19 +573,19 @@ spa_io_history_update(kstat_t *ksp, int rw)
static void
spa_io_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
char *name;
kstat_t *ksp;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
- ssh->kstat = ksp;
+ shk->kstat = ksp;
if (ksp) {
- ksp->ks_lock = &ssh->lock;
+ ksp->ks_lock = &shk->lock;
ksp->ks_private = spa;
ksp->ks_update = spa_io_history_update;
kstat_install(ksp);
@@ -705,12 +596,12 @@ spa_io_history_init(spa_t *spa)
static void
spa_io_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
- if (ssh->kstat)
- kstat_delete(ssh->kstat);
+ if (shk->kstat)
+ kstat_delete(shk->kstat);
- mutex_destroy(&ssh->lock);
+ mutex_destroy(&shk->lock);
}
/*
@@ -733,7 +624,7 @@ spa_io_history_destroy(spa_t *spa)
*/
typedef struct spa_mmp_history {
- uint64_t mmp_kstat_id; /* unique # for updates */
+ uint64_t mmp_node_id; /* unique # for updates */
uint64_t txg; /* txg of last sync */
uint64_t timestamp; /* UTC time MMP write issued */
uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */
@@ -743,20 +634,20 @@ typedef struct spa_mmp_history {
int io_error; /* error status of MMP write */
hrtime_t error_start; /* hrtime of start of error period */
hrtime_t duration; /* time from submission to completion */
- list_node_t smh_link;
+ procfs_list_node_t smh_node;
} spa_mmp_history_t;
static int
-spa_mmp_history_headers(char *buf, size_t size)
+spa_mmp_history_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
+ seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
"%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
"mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
return (0);
}
static int
-spa_mmp_history_data(char *buf, size_t size, void *data)
+spa_mmp_history_show(struct seq_file *f, void *data)
{
spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
@@ -764,8 +655,8 @@ spa_mmp_history_data(char *buf, size_t size, void *data)
char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
"%-10lld %s\n";
- (void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt),
- (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
+ seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
+ (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
(u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
(longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
(u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
@@ -774,137 +665,86 @@ spa_mmp_history_data(char *buf, size_t size, void *data)
return (0);
}
-/*
- * Calculate the address for the next spa_stats_history_t entry. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
-static void *
-spa_mmp_history_addr(kstat_t *ksp, loff_t n)
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
+ spa_mmp_history_t *smh;
+ while (shl->size > size) {
+ smh = list_remove_head(&shl->procfs_list.pl_list);
+ if (smh->vdev_path)
+ strfree(smh->vdev_path);
+ kmem_free(smh, sizeof (spa_mmp_history_t));
+ shl->size--;
+ }
- if (n == 0)
- ssh->private = list_tail(&ssh->list);
- else if (ssh->private)
- ssh->private = list_prev(&ssh->list, ssh->private);
+ if (size == 0)
+ ASSERT(list_is_empty(&shl->procfs_list.pl_list));
- return (ssh->private);
}
-/*
- * When the kstat is written discard all spa_mmp_history_t entries. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
static int
-spa_mmp_history_update(kstat_t *ksp, int rw)
+spa_mmp_history_clear(procfs_list_t *procfs_list)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
-
- if (rw == KSTAT_WRITE) {
- spa_mmp_history_t *smh;
-
- while ((smh = list_remove_head(&ssh->list))) {
- ssh->size--;
- if (smh->vdev_path)
- strfree(smh->vdev_path);
- kmem_free(smh, sizeof (spa_mmp_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- }
-
- ksp->ks_ndata = ssh->size;
- ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
-
+ spa_history_list_t *shl = procfs_list->pl_private;
+ mutex_enter(&procfs_list->pl_lock);
+ spa_mmp_history_truncate(shl, 0);
+ mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_mmp_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
- char *name;
- kstat_t *ksp;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
+ char *module;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&ssh->list, sizeof (spa_mmp_history_t),
- offsetof(spa_mmp_history_t, smh_link));
+ shl->size = 0;
- ssh->count = 0;
- ssh->size = 0;
- ssh->private = NULL;
-
- name = kmem_asprintf("zfs/%s", spa_name(spa));
+ module = kmem_asprintf("zfs/%s", spa_name(spa));
- ksp = kstat_create(name, 0, "multihost", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
+ shl->procfs_list.pl_private = shl;
+ procfs_list_install(module,
+ "multihost",
+ &shl->procfs_list,
+ spa_mmp_history_show,
+ spa_mmp_history_show_header,
+ spa_mmp_history_clear,
+ offsetof(spa_mmp_history_t, smh_node));
- if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = NULL;
- ksp->ks_private = spa;
- ksp->ks_update = spa_mmp_history_update;
- kstat_set_raw_ops(ksp, spa_mmp_history_headers,
- spa_mmp_history_data, spa_mmp_history_addr);
- kstat_install(ksp);
- }
- strfree(name);
+ strfree(module);
}
static void
spa_mmp_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
- spa_mmp_history_t *smh;
- kstat_t *ksp;
-
- ksp = ssh->kstat;
- if (ksp)
- kstat_delete(ksp);
-
- mutex_enter(&ssh->lock);
- while ((smh = list_remove_head(&ssh->list))) {
- ssh->size--;
- if (smh->vdev_path)
- strfree(smh->vdev_path);
- kmem_free(smh, sizeof (spa_mmp_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- list_destroy(&ssh->list);
- mutex_exit(&ssh->lock);
-
- mutex_destroy(&ssh->lock);
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
+ procfs_list_uninstall(&shl->procfs_list);
+ spa_mmp_history_truncate(shl, 0);
+ procfs_list_destroy(&shl->procfs_list);
}
/*
* Set duration in existing "skip" record to how long we have waited for a leaf
* vdev to become available.
*
- * Important that we start search at the head of the list where new
+ * Important that we start search at the tail of the list where new
* records are inserted, so this is normally an O(1) operation.
*/
int
-spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
+spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
int error = ENOENT;
- if (zfs_multihost_history == 0 && ssh->size == 0)
+ if (zfs_multihost_history == 0 && shl->size == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (smh = list_head(&ssh->list); smh != NULL;
- smh = list_next(&ssh->list, smh)) {
- if (smh->mmp_kstat_id == mmp_kstat_id) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
+ smh = list_prev(&shl->procfs_list.pl_list, smh)) {
+ if (smh->mmp_node_id == mmp_node_id) {
ASSERT3U(smh->io_error, !=, 0);
smh->duration = gethrtime() - smh->error_start;
smh->vdev_guid++;
@@ -912,7 +752,7 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -922,20 +762,20 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
* See comment re: search order above spa_mmp_history_set_skip().
*/
int
-spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
+spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
hrtime_t duration)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
int error = ENOENT;
- if (zfs_multihost_history == 0 && ssh->size == 0)
+ if (zfs_multihost_history == 0 && shl->size == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (smh = list_head(&ssh->list); smh != NULL;
- smh = list_next(&ssh->list, smh)) {
- if (smh->mmp_kstat_id == mmp_kstat_id) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
+ smh = list_prev(&shl->procfs_list.pl_list, smh)) {
+ if (smh->mmp_node_id == mmp_node_id) {
ASSERT(smh->io_error == 0);
smh->io_error = io_error;
smh->duration = duration;
@@ -943,7 +783,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -953,16 +793,16 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
* error == 0 : a write was issued.
* error != 0 : a write was not issued because no leaves were found.
*/
-void *
+void
spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
- uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
+ uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
int error)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
- spa_mmp_history_t *smh, *rm;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
+ spa_mmp_history_t *smh;
- if (zfs_multihost_history == 0 && ssh->size == 0)
- return (NULL);
+ if (zfs_multihost_history == 0 && shl->size == 0)
+ return;
smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
smh->txg = txg;
@@ -974,7 +814,7 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
smh->vdev_path = strdup(vd->vdev_path);
}
smh->vdev_label = label;
- smh->mmp_kstat_id = mmp_kstat_id;
+ smh->mmp_node_id = mmp_node_id;
if (error) {
smh->io_error = error;
@@ -982,21 +822,11 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
smh->vdev_guid = 1;
}
- mutex_enter(&ssh->lock);
-
- list_insert_head(&ssh->list, smh);
- ssh->size++;
-
- while (ssh->size > zfs_multihost_history) {
- ssh->size--;
- rm = list_remove_tail(&ssh->list);
- if (rm->vdev_path)
- strfree(rm->vdev_path);
- kmem_free(rm, sizeof (spa_mmp_history_t));
- }
-
- mutex_exit(&ssh->lock);
- return ((void *)smh);
+ mutex_enter(&shl->procfs_list.pl_lock);
+ procfs_list_add(&shl->procfs_list, smh);
+ shl->size++;
+ spa_mmp_history_truncate(shl, zfs_multihost_history);
+ mutex_exit(&shl->procfs_list.pl_lock);
}
static void *
@@ -1023,19 +853,19 @@ spa_state_data(char *buf, size_t size, void *data)
static void
spa_state_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.state;
+ spa_history_kstat_t *shk = &spa->spa_stats.state;
char *name;
kstat_t *ksp;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "state", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
+ shk->kstat = ksp;
if (ksp) {
- ksp->ks_lock = &ssh->lock;
+ ksp->ks_lock = &shk->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
@@ -1049,12 +879,12 @@ spa_state_init(spa_t *spa)
static void
spa_health_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.state;
- kstat_t *ksp = ssh->kstat;
+ spa_history_kstat_t *shk = &spa->spa_stats.state;
+ kstat_t *ksp = shk->kstat;
if (ksp)
kstat_delete(ksp);
- mutex_destroy(&ssh->lock);
+ mutex_destroy(&shk->lock);
}
void
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 30a883f85..89cdf7d81 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -429,16 +429,16 @@ static void
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_waitq_enter(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
+ if (shk->kstat != NULL) {
+ mutex_enter(&shk->lock);
+ kstat_waitq_enter(shk->kstat->ks_data);
+ mutex_exit(&shk->lock);
}
}
@@ -446,16 +446,16 @@ static void
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_waitq_exit(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
+ if (shk->kstat != NULL) {
+ mutex_enter(&shk->lock);
+ kstat_waitq_exit(shk->kstat->ks_data);
+ mutex_exit(&shk->lock);
}
}
@@ -463,17 +463,17 @@ static void
vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
vq->vq_class[zio->io_priority].vqc_active++;
avl_add(&vq->vq_active_tree, zio);
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_runq_enter(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
+ if (shk->kstat != NULL) {
+ mutex_enter(&shk->lock);
+ kstat_runq_enter(shk->kstat->ks_data);
+ mutex_exit(&shk->lock);
}
}
@@ -481,17 +481,17 @@ static void
vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
vq->vq_class[zio->io_priority].vqc_active--;
avl_remove(&vq->vq_active_tree, zio);
- if (ssh->kstat != NULL) {
- kstat_io_t *ksio = ssh->kstat->ks_data;
+ if (shk->kstat != NULL) {
+ kstat_io_t *ksio = shk->kstat->ks_data;
- mutex_enter(&ssh->lock);
+ mutex_enter(&shk->lock);
kstat_runq_exit(ksio);
if (zio->io_type == ZIO_TYPE_READ) {
ksio->reads++;
@@ -500,7 +500,7 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
ksio->writes++;
ksio->nwritten += zio->io_size;
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shk->lock);
}
}
diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c
index ca79893c9..b5f93fd9b 100644
--- a/module/zfs/zfs_debug.c
+++ b/module/zfs/zfs_debug.c
@@ -24,13 +24,17 @@
*/
#include <sys/zfs_context.h>
-#include <sys/kstat.h>
-list_t zfs_dbgmsgs;
+typedef struct zfs_dbgmsg {
+ procfs_list_node_t zdm_node;
+ time_t zdm_timestamp;
+ int zdm_size;
+ char zdm_msg[1]; /* variable length allocation */
+} zfs_dbgmsg_t;
+
+procfs_list_t zfs_dbgmsgs;
int zfs_dbgmsg_size = 0;
-kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
-kstat_t *zfs_dbgmsg_kstat;
/*
* Internal ZFS debug messages are enabled by default.
@@ -47,123 +51,71 @@ kstat_t *zfs_dbgmsg_kstat;
int zfs_dbgmsg_enable = 1;
static int
-zfs_dbgmsg_headers(char *buf, size_t size)
+zfs_dbgmsg_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message");
-
+ seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
return (0);
}
static int
-zfs_dbgmsg_data(char *buf, size_t size, void *data)
+zfs_dbgmsg_show(struct seq_file *f, void *p)
{
- zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
-
- (void) snprintf(buf, size, "%-12llu %-s\n",
+ zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
+ seq_printf(f, "%-12llu %-s\n",
(u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
-
return (0);
}
-static void *
-zfs_dbgmsg_addr(kstat_t *ksp, loff_t n)
-{
- zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private;
-
- ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
-
- if (n == 0)
- ksp->ks_private = list_head(&zfs_dbgmsgs);
- else if (zdm)
- ksp->ks_private = list_next(&zfs_dbgmsgs, zdm);
-
- return (ksp->ks_private);
-}
-
static void
zfs_dbgmsg_purge(int max_size)
{
- zfs_dbgmsg_t *zdm;
- int size;
-
- ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
-
while (zfs_dbgmsg_size > max_size) {
- zdm = list_remove_head(&zfs_dbgmsgs);
+ zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
if (zdm == NULL)
return;
- size = zdm->zdm_size;
+ int size = zdm->zdm_size;
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
}
static int
-zfs_dbgmsg_update(kstat_t *ksp, int rw)
+zfs_dbgmsg_clear(procfs_list_t *procfs_list)
{
- if (rw == KSTAT_WRITE)
- zfs_dbgmsg_purge(0);
-
+ mutex_enter(&zfs_dbgmsgs.pl_lock);
+ zfs_dbgmsg_purge(0);
+ mutex_exit(&zfs_dbgmsgs.pl_lock);
return (0);
}
void
zfs_dbgmsg_init(void)
{
- list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
+ procfs_list_install("zfs",
+ "dbgmsg",
+ &zfs_dbgmsgs,
+ zfs_dbgmsg_show,
+ zfs_dbgmsg_show_header,
+ zfs_dbgmsg_clear,
offsetof(zfs_dbgmsg_t, zdm_node));
- mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
-
- zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- if (zfs_dbgmsg_kstat) {
- zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock;
- zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX;
- zfs_dbgmsg_kstat->ks_private = NULL;
- zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update;
- kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers,
- zfs_dbgmsg_data, zfs_dbgmsg_addr);
- kstat_install(zfs_dbgmsg_kstat);
- }
}
void
zfs_dbgmsg_fini(void)
{
- if (zfs_dbgmsg_kstat)
- kstat_delete(zfs_dbgmsg_kstat);
+ procfs_list_uninstall(&zfs_dbgmsgs);
+ zfs_dbgmsg_purge(0);
+
/*
* TODO - decide how to make this permanent
*/
#ifdef _KERNEL
- mutex_enter(&zfs_dbgmsgs_lock);
- zfs_dbgmsg_purge(0);
- mutex_exit(&zfs_dbgmsgs_lock);
- mutex_destroy(&zfs_dbgmsgs_lock);
+ procfs_list_destroy(&zfs_dbgmsgs);
#endif
}
void
-__zfs_dbgmsg(char *buf)
-{
- zfs_dbgmsg_t *zdm;
- int size;
-
- size = sizeof (zfs_dbgmsg_t) + strlen(buf);
- zdm = kmem_zalloc(size, KM_SLEEP);
- zdm->zdm_size = size;
- zdm->zdm_timestamp = gethrestime_sec();
- strcpy(zdm->zdm_msg, buf);
-
- mutex_enter(&zfs_dbgmsgs_lock);
- list_insert_tail(&zfs_dbgmsgs, zdm);
- zfs_dbgmsg_size += size;
- zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
- mutex_exit(&zfs_dbgmsgs_lock);
-}
-
-void
__set_error(const char *file, const char *func, int line, int err)
{
/*
@@ -176,6 +128,22 @@ __set_error(const char *file, const char *func, int line, int err)
}
#ifdef _KERNEL
+static void
+__zfs_dbgmsg(char *buf)
+{
+ int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
+ zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
+ zdm->zdm_size = size;
+ zdm->zdm_timestamp = gethrestime_sec();
+ strcpy(zdm->zdm_msg, buf);
+
+ mutex_enter(&zfs_dbgmsgs.pl_lock);
+ procfs_list_add(&zfs_dbgmsgs, zdm);
+ zfs_dbgmsg_size += size;
+ zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
+ mutex_exit(&zfs_dbgmsgs.pl_lock);
+}
+
void
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
{
@@ -244,14 +212,12 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
void
zfs_dbgmsg_print(const char *tag)
{
- zfs_dbgmsg_t *zdm;
-
(void) printf("ZFS_DBGMSG(%s):\n", tag);
- mutex_enter(&zfs_dbgmsgs_lock);
- for (zdm = list_head(&zfs_dbgmsgs); zdm;
- zdm = list_next(&zfs_dbgmsgs, zdm))
+ mutex_enter(&zfs_dbgmsgs.pl_lock);
+ for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
+ zdm = list_next(&zfs_dbgmsgs.pl_list, zdm))
(void) printf("%s\n", zdm->zdm_msg);
- mutex_exit(&zfs_dbgmsgs_lock);
+ mutex_exit(&zfs_dbgmsgs.pl_lock);
}
#endif /* _KERNEL */