aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configure.ac2
-rw-r--r--include/spl/sys/Makefile.am1
-rw-r--r--include/spl/sys/kstat.h20
-rw-r--r--include/spl/sys/procfs_list.h71
-rw-r--r--include/sys/spa.h23
-rw-r--r--include/sys/zfs_context.h32
-rw-r--r--include/sys/zfs_debug.h7
-rw-r--r--lib/libzpool/kernel.c51
-rw-r--r--module/spl/Makefile.in1
-rw-r--r--module/spl/spl-kstat.c100
-rw-r--r--module/spl/spl-procfs-list.c256
-rw-r--r--module/zfs/spa_stats.c600
-rw-r--r--module/zfs/vdev_queue.c40
-rw-r--r--module/zfs/zfs_debug.c132
-rw-r--r--tests/runfiles/linux.run9
-rw-r--r--tests/zfs-tests/tests/functional/Makefile.am2
-rw-r--r--tests/zfs-tests/tests/functional/kstat/Makefile.am5
-rw-r--r--tests/zfs-tests/tests/functional/procfs/Makefile.am8
-rwxr-xr-xtests/zfs-tests/tests/functional/procfs/cleanup.ksh (renamed from tests/zfs-tests/tests/functional/kstat/cleanup.ksh)3
-rwxr-xr-xtests/zfs-tests/tests/functional/procfs/pool_state.ksh (renamed from tests/zfs-tests/tests/functional/kstat/state.ksh)0
-rwxr-xr-xtests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh95
-rwxr-xr-xtests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh82
-rwxr-xr-xtests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh98
-rwxr-xr-xtests/zfs-tests/tests/functional/procfs/setup.ksh (renamed from tests/zfs-tests/tests/functional/kstat/setup.ksh)6
24 files changed, 1086 insertions, 558 deletions
diff --git a/configure.ac b/configure.ac
index 18d91b359..301258e7f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -283,7 +283,6 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/inheritance/Makefile
tests/zfs-tests/tests/functional/inuse/Makefile
tests/zfs-tests/tests/functional/io/Makefile
- tests/zfs-tests/tests/functional/kstat/Makefile
tests/zfs-tests/tests/functional/large_files/Makefile
tests/zfs-tests/tests/functional/largest_pool/Makefile
tests/zfs-tests/tests/functional/link_count/Makefile
@@ -301,6 +300,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/pool_checkpoint/Makefile
tests/zfs-tests/tests/functional/poolversion/Makefile
tests/zfs-tests/tests/functional/privilege/Makefile
+ tests/zfs-tests/tests/functional/procfs/Makefile
tests/zfs-tests/tests/functional/projectquota/Makefile
tests/zfs-tests/tests/functional/pyzfs/Makefile
tests/zfs-tests/tests/functional/quota/Makefile
diff --git a/include/spl/sys/Makefile.am b/include/spl/sys/Makefile.am
index d58ed0e20..e596ff373 100644
--- a/include/spl/sys/Makefile.am
+++ b/include/spl/sys/Makefile.am
@@ -28,6 +28,7 @@ KERNEL_H = \
$(top_srcdir)/include/spl/sys/param.h \
$(top_srcdir)/include/spl/sys/processor.h \
$(top_srcdir)/include/spl/sys/proc.h \
+ $(top_srcdir)/include/spl/sys/procfs_list.h \
$(top_srcdir)/include/spl/sys/random.h \
$(top_srcdir)/include/spl/sys/rwlock.h \
$(top_srcdir)/include/spl/sys/shrinker.h \
diff --git a/include/spl/sys/kstat.h b/include/spl/sys/kstat.h
index f197ce455..53274d8f5 100644
--- a/include/spl/sys/kstat.h
+++ b/include/spl/sys/kstat.h
@@ -98,30 +98,34 @@ typedef struct kstat_raw_ops {
void *(*addr)(kstat_t *ksp, loff_t index);
} kstat_raw_ops_t;
+typedef struct kstat_proc_entry {
+ char kpe_name[KSTAT_STRLEN+1]; /* kstat name */
+ char kpe_module[KSTAT_STRLEN+1]; /* provider module name */
+ kstat_module_t *kpe_owner; /* kstat module linkage */
+ struct list_head kpe_list; /* kstat linkage */
+ struct proc_dir_entry *kpe_proc; /* procfs entry */
+} kstat_proc_entry_t;
+
struct kstat_s {
int ks_magic; /* magic value */
kid_t ks_kid; /* unique kstat ID */
hrtime_t ks_crtime; /* creation time */
hrtime_t ks_snaptime; /* last access time */
- char ks_module[KSTAT_STRLEN+1]; /* provider module name */
int ks_instance; /* provider module instance */
- char ks_name[KSTAT_STRLEN+1]; /* kstat name */
char ks_class[KSTAT_STRLEN+1]; /* kstat class */
uchar_t ks_type; /* kstat data type */
uchar_t ks_flags; /* kstat flags */
void *ks_data; /* kstat type-specific data */
uint_t ks_ndata; /* # of data records */
size_t ks_data_size; /* size of kstat data section */
- struct proc_dir_entry *ks_proc; /* proc linkage */
kstat_update_t *ks_update; /* dynamic updates */
void *ks_private; /* private data */
kmutex_t ks_private_lock; /* kstat private data lock */
kmutex_t *ks_lock; /* kstat data lock */
- struct list_head ks_list; /* kstat linkage */
- kstat_module_t *ks_owner; /* kstat module linkage */
kstat_raw_ops_t ks_raw_ops; /* ops table for raw type */
char *ks_raw_buf; /* buf used for raw ops */
size_t ks_raw_bufsize; /* size of raw ops buffer */
+ kstat_proc_entry_t ks_proc; /* data for procfs entry */
};
typedef struct kstat_named_s {
@@ -189,6 +193,12 @@ extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
const char *ks_name, const char *ks_class, uchar_t ks_type,
uint_t ks_ndata, uchar_t ks_flags);
+extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep,
+ const char *module, const char *name);
+extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep);
+extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep,
+ const struct file_operations *file_ops, void *data);
+
extern void __kstat_install(kstat_t *ksp);
extern void __kstat_delete(kstat_t *ksp);
extern void kstat_waitq_enter(kstat_io_t *);
diff --git a/include/spl/sys/procfs_list.h b/include/spl/sys/procfs_list.h
new file mode 100644
index 000000000..cbcb4bcff
--- /dev/null
+++ b/include/spl/sys/procfs_list.h
@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#ifndef _SPL_PROCFS_LIST_H
+#define _SPL_PROCFS_LIST_H
+
+#include <sys/kstat.h>
+#include <sys/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+typedef struct procfs_list procfs_list_t;
+struct procfs_list {
+ /* Accessed only by user of a procfs_list */
+ void *pl_private;
+
+ /*
+ * Accessed both by user of a procfs_list and by procfs_list
+ * implementation
+ */
+ kmutex_t pl_lock;
+ list_t pl_list;
+
+ /* Accessed only by procfs_list implementation */
+ uint64_t pl_next_id;
+ int (*pl_show)(struct seq_file *f, void *p);
+ int (*pl_show_header)(struct seq_file *f);
+ int (*pl_clear)(procfs_list_t *procfs_list);
+ size_t pl_node_offset;
+ kstat_proc_entry_t pl_kstat_entry;
+};
+
+typedef struct procfs_list_node {
+ list_node_t pln_link;
+ uint64_t pln_id;
+} procfs_list_node_t;
+
+void procfs_list_install(const char *module,
+ const char *name,
+ procfs_list_t *procfs_list,
+ int (*show)(struct seq_file *f, void *p),
+ int (*show_header)(struct seq_file *f),
+ int (*clear)(procfs_list_t *procfs_list),
+ size_t procfs_list_node_off);
+void procfs_list_uninstall(procfs_list_t *procfs_list);
+void procfs_list_destroy(procfs_list_t *procfs_list);
+
+void procfs_list_add(procfs_list_t *procfs_list, void *p);
+
+#endif /* _SPL_PROCFS_LIST_H */
diff --git a/include/sys/spa.h b/include/sys/spa.h
index b86c65557..443d835a1 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -863,22 +863,27 @@ extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)
/* Historical pool statistics */
-typedef struct spa_stats_history {
+typedef struct spa_history_kstat {
kmutex_t lock;
uint64_t count;
uint64_t size;
kstat_t *kstat;
void *private;
list_t list;
-} spa_stats_history_t;
+} spa_history_kstat_t;
+
+typedef struct spa_history_list {
+ uint64_t size;
+ procfs_list_t procfs_list;
+} spa_history_list_t;
typedef struct spa_stats {
- spa_stats_history_t read_history;
- spa_stats_history_t txg_history;
- spa_stats_history_t tx_assign_histogram;
- spa_stats_history_t io_history;
- spa_stats_history_t mmp_history;
- spa_stats_history_t state; /* pool state */
+ spa_history_list_t read_history;
+ spa_history_list_t txg_history;
+ spa_history_kstat_t tx_assign_histogram;
+ spa_history_kstat_t io_history;
+ spa_history_list_t mmp_history;
+ spa_history_kstat_t state; /* pool state */
} spa_stats_t;
typedef enum txg_state {
@@ -911,7 +916,7 @@ extern void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs);
extern int spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id);
extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
hrtime_t duration);
-extern void *spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
+extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
int error);
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 6f502897e..11c048c23 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -62,6 +62,7 @@
#include <sys/ctype.h>
#include <sys/disp.h>
#include <sys/trace.h>
+#include <sys/procfs_list.h>
#include <linux/dcache_compat.h>
#include <linux/utsname_compat.h>
@@ -352,6 +353,37 @@ extern void kstat_set_raw_ops(kstat_t *ksp,
void *(*addr)(kstat_t *ksp, loff_t index));
/*
+ * procfs list manipulation
+ */
+
+struct seq_file { };
+void seq_printf(struct seq_file *m, const char *fmt, ...);
+
+typedef struct procfs_list {
+ void *pl_private;
+ kmutex_t pl_lock;
+ list_t pl_list;
+ uint64_t pl_next_id;
+ size_t pl_node_offset;
+} procfs_list_t;
+
+typedef struct procfs_list_node {
+ list_node_t pln_link;
+ uint64_t pln_id;
+} procfs_list_node_t;
+
+void procfs_list_install(const char *module,
+ const char *name,
+ procfs_list_t *procfs_list,
+ int (*show)(struct seq_file *f, void *p),
+ int (*show_header)(struct seq_file *f),
+ int (*clear)(procfs_list_t *procfs_list),
+ size_t procfs_list_node_off);
+void procfs_list_uninstall(procfs_list_t *procfs_list);
+void procfs_list_destroy(procfs_list_t *procfs_list);
+void procfs_list_add(procfs_list_t *procfs_list, void *p);
+
+/*
* Kernel memory
*/
#define KM_SLEEP UMEM_NOFAIL
diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h
index aa9bfe21f..f3a936ae7 100644
--- a/include/sys/zfs_debug.h
+++ b/include/sys/zfs_debug.h
@@ -76,13 +76,6 @@ extern void __dprintf(const char *file, const char *func,
extern void zfs_panic_recover(const char *fmt, ...);
-typedef struct zfs_dbgmsg {
- list_node_t zdm_node;
- time_t zdm_timestamp;
- int zdm_size;
- char zdm_msg[1]; /* variable length allocation */
-} zfs_dbgmsg_t;
-
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index 341548ac3..5baf52514 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -426,6 +426,57 @@ cv_broadcast(kcondvar_t *cv)
/*
* =========================================================================
+ * procfs list
+ * =========================================================================
+ */
+
+void
+seq_printf(struct seq_file *m, const char *fmt, ...)
+{}
+
+void
+procfs_list_install(const char *module,
+ const char *name,
+ procfs_list_t *procfs_list,
+ int (*show)(struct seq_file *f, void *p),
+ int (*show_header)(struct seq_file *f),
+ int (*clear)(procfs_list_t *procfs_list),
+ size_t procfs_list_node_off)
+{
+ mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&procfs_list->pl_list,
+ procfs_list_node_off + sizeof (procfs_list_node_t),
+ procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+ procfs_list->pl_next_id = 1;
+ procfs_list->pl_node_offset = procfs_list_node_off;
+}
+
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{}
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+ ASSERT(list_is_empty(&procfs_list->pl_list));
+ list_destroy(&procfs_list->pl_list);
+ mutex_destroy(&procfs_list->pl_lock);
+}
+
+#define NODE_ID(procfs_list, obj) \
+ (((procfs_list_node_t *)(((char *)obj) + \
+ (procfs_list)->pl_node_offset))->pln_id)
+
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+ ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+ NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+ list_insert_tail(&procfs_list->pl_list, p);
+}
+
+/*
+ * =========================================================================
* vnode operations
* =========================================================================
*/
diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in
index 97a431f22..3bcbf63cb 100644
--- a/module/spl/Makefile.in
+++ b/module/spl/Makefile.in
@@ -18,6 +18,7 @@ $(MODULE)-objs += spl-kobj.o
$(MODULE)-objs += spl-kstat.o
$(MODULE)-objs += spl-mutex.o
$(MODULE)-objs += spl-proc.o
+$(MODULE)-objs += spl-procfs-list.o
$(MODULE)-objs += spl-rwlock.o
$(MODULE)-objs += spl-taskq.o
$(MODULE)-objs += spl-thread.o
diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c
index c3fc2e4b2..8683693c8 100644
--- a/module/spl/spl-kstat.c
+++ b/module/spl/spl-kstat.c
@@ -530,6 +530,18 @@ __kstat_set_raw_ops(kstat_t *ksp,
}
EXPORT_SYMBOL(__kstat_set_raw_ops);
+void
+kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
+ const char *name)
+{
+ kpep->kpe_owner = NULL;
+ kpep->kpe_proc = NULL;
+ INIT_LIST_HEAD(&kpep->kpe_list);
+ strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
+ strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
+}
+EXPORT_SYMBOL(kstat_proc_entry_init);
+
kstat_t *
__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
@@ -556,13 +568,10 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
ksp->ks_magic = KS_MAGIC;
mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
ksp->ks_lock = &ksp->ks_private_lock;
- INIT_LIST_HEAD(&ksp->ks_list);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
- strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
ksp->ks_instance = ks_instance;
- strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = ks_flags;
@@ -573,6 +582,7 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
ksp->ks_raw_ops.addr = NULL;
ksp->ks_raw_buf = NULL;
ksp->ks_raw_bufsize = 0;
+ kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
@@ -614,14 +624,14 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
EXPORT_SYMBOL(__kstat_create);
static int
-kstat_detect_collision(kstat_t *ksp)
+kstat_detect_collision(kstat_proc_entry_t *kpep)
{
kstat_module_t *module;
- kstat_t *tmp;
+ kstat_proc_entry_t *tmp;
char *parent;
char *cp;
- parent = kmem_asprintf("%s", ksp->ks_module);
+ parent = kmem_asprintf("%s", kpep->kpe_module);
if ((cp = strrchr(parent, '/')) == NULL) {
strfree(parent);
@@ -630,8 +640,8 @@ kstat_detect_collision(kstat_t *ksp)
cp[0] = '\0';
if ((module = kstat_find_module(parent)) != NULL) {
- list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
- if (strncmp(tmp->ks_name, cp+1, KSTAT_STRLEN) == 0) {
+ list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
+ if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
strfree(parent);
return (EEXIST);
}
@@ -642,24 +652,30 @@ kstat_detect_collision(kstat_t *ksp)
return (0);
}
+/*
+ * Add a file to the proc filesystem under the kstat namespace (i.e.
+ * /proc/spl/kstat/). The file need not necessarily be implemented as a
+ * kstat.
+ */
void
-__kstat_install(kstat_t *ksp)
+kstat_proc_entry_install(kstat_proc_entry_t *kpep,
+ const struct file_operations *file_ops, void *data)
{
kstat_module_t *module;
- kstat_t *tmp;
+ kstat_proc_entry_t *tmp;
- ASSERT(ksp);
+ ASSERT(kpep);
mutex_enter(&kstat_module_lock);
- module = kstat_find_module(ksp->ks_module);
+ module = kstat_find_module(kpep->kpe_module);
if (module == NULL) {
- if (kstat_detect_collision(ksp) != 0) {
+ if (kstat_detect_collision(kpep) != 0) {
cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
- " collision", ksp->ks_module, ksp->ks_name);
+ " collision", kpep->kpe_module, kpep->kpe_name);
goto out;
}
- module = kstat_create_module(ksp->ks_module);
+ module = kstat_create_module(kpep->kpe_module);
if (module == NULL)
goto out;
}
@@ -668,44 +684,60 @@ __kstat_install(kstat_t *ksp)
* Only one entry by this name per-module, on failure the module
* shouldn't be deleted because we know it has at least one entry.
*/
- list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list) {
- if (strncmp(tmp->ks_name, ksp->ks_name, KSTAT_STRLEN) == 0)
+ list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
+ if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
goto out;
}
- list_add_tail(&ksp->ks_list, &module->ksm_kstat_list);
+ list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
- mutex_enter(ksp->ks_lock);
- ksp->ks_owner = module;
- ksp->ks_proc = proc_create_data(ksp->ks_name, 0644,
- module->ksm_proc, &proc_kstat_operations, (void *)ksp);
- if (ksp->ks_proc == NULL) {
- list_del_init(&ksp->ks_list);
+ kpep->kpe_owner = module;
+ kpep->kpe_proc = proc_create_data(kpep->kpe_name, 0644,
+ module->ksm_proc, file_ops, data);
+ if (kpep->kpe_proc == NULL) {
+ list_del_init(&kpep->kpe_list);
if (list_empty(&module->ksm_kstat_list))
kstat_delete_module(module);
}
- mutex_exit(ksp->ks_lock);
out:
mutex_exit(&kstat_module_lock);
+
+}
+EXPORT_SYMBOL(kstat_proc_entry_install);
+
+void
+__kstat_install(kstat_t *ksp)
+{
+ ASSERT(ksp);
+ kstat_proc_entry_install(&ksp->ks_proc, &proc_kstat_operations, ksp);
}
EXPORT_SYMBOL(__kstat_install);
void
-__kstat_delete(kstat_t *ksp)
+kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
{
- kstat_module_t *module = ksp->ks_owner;
+ kstat_module_t *module = kpep->kpe_owner;
+ if (kpep->kpe_proc)
+ remove_proc_entry(kpep->kpe_name, module->ksm_proc);
mutex_enter(&kstat_module_lock);
- list_del_init(&ksp->ks_list);
+ list_del_init(&kpep->kpe_list);
+
+ /*
+ * Remove top level module directory if it wasn't empty before, but now
+ * is.
+ */
+ if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
+ kstat_delete_module(module);
mutex_exit(&kstat_module_lock);
- if (ksp->ks_proc) {
- remove_proc_entry(ksp->ks_name, module->ksm_proc);
+}
+EXPORT_SYMBOL(kstat_proc_entry_delete);
- /* Remove top level module directory if it's empty */
- if (list_empty(&module->ksm_kstat_list))
- kstat_delete_module(module);
- }
+void
+__kstat_delete(kstat_t *ksp)
+{
+ kstat_proc_entry_delete(&ksp->ks_proc);
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);
diff --git a/module/spl/spl-procfs-list.c b/module/spl/spl-procfs-list.c
new file mode 100644
index 000000000..4902e0a56
--- /dev/null
+++ b/module/spl/spl-procfs-list.c
@@ -0,0 +1,256 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+#include <sys/list.h>
+#include <sys/mutex.h>
+#include <sys/procfs_list.h>
+#include <linux/proc_fs.h>
+
+/*
+ * A procfs_list is a wrapper around a linked list which implements the seq_file
+ * interface, allowing the contents of the list to be exposed through procfs.
+ * The kernel already has some utilities to help implement the seq_file
+ * interface for linked lists (seq_list_*), but they aren't appropriate for use
+ * with lists that have many entries, because seq_list_start walks the list at
+ * the start of each read syscall to find where it left off, so reading a file
+ * ends up being quadratic in the number of entries in the list.
+ *
+ * This implementation avoids this penalty by maintaining a separate cursor into
+ * the list per instance of the file that is open. It also maintains some extra
+ * information in each node of the list to prevent reads of entries that have
+ * been dropped from the list.
+ *
+ * Callers should only add elements to the list using procfs_list_add, which
+ * adds an element to the tail of the list. Other operations can be performed
+ * directly on the wrapped list using the normal list manipulation functions,
+ * but elements should only be removed from the head of the list.
+ */
+
+#define NODE_ID(procfs_list, obj) \
+ (((procfs_list_node_t *)(((char *)obj) + \
+ (procfs_list)->pl_node_offset))->pln_id)
+
+typedef struct procfs_list_cursor {
+ procfs_list_t *procfs_list; /* List into which this cursor points */
+ void *cached_node; /* Most recently accessed node */
+ loff_t cached_pos; /* Position of cached_node */
+} procfs_list_cursor_t;
+
+static int
+procfs_list_seq_show(struct seq_file *f, void *p)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+
+ ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+ if (p == SEQ_START_TOKEN) {
+ if (procfs_list->pl_show_header != NULL)
+ return (procfs_list->pl_show_header(f));
+ else
+ return (0);
+ }
+ return (procfs_list->pl_show(f, p));
+}
+
+static void *
+procfs_list_next_node(procfs_list_cursor_t *cursor, loff_t *pos)
+{
+ void *next_node;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+
+ if (cursor->cached_node == SEQ_START_TOKEN)
+ next_node = list_head(&procfs_list->pl_list);
+ else
+ next_node = list_next(&procfs_list->pl_list,
+ cursor->cached_node);
+
+ if (next_node != NULL) {
+ cursor->cached_node = next_node;
+ cursor->cached_pos = NODE_ID(procfs_list, cursor->cached_node);
+ *pos = cursor->cached_pos;
+ }
+ return (next_node);
+}
+
+static void *
+procfs_list_seq_start(struct seq_file *f, loff_t *pos)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+
+ mutex_enter(&procfs_list->pl_lock);
+
+ if (*pos == 0) {
+ cursor->cached_node = SEQ_START_TOKEN;
+ cursor->cached_pos = 0;
+ return (SEQ_START_TOKEN);
+ }
+
+ /*
+ * Check if our cached pointer has become stale, which happens if the
+ * the message where we left off has been dropped from the list since
+ * the last read syscall completed.
+ */
+ void *oldest_node = list_head(&procfs_list->pl_list);
+ if (cursor->cached_node != SEQ_START_TOKEN && (oldest_node == NULL ||
+ NODE_ID(procfs_list, oldest_node) > cursor->cached_pos))
+ return (ERR_PTR(-EIO));
+
+ /*
+ * If it isn't starting from the beginning of the file, the seq_file
+ * code will either pick up at the same position it visited last or the
+ * following one.
+ */
+ if (*pos == cursor->cached_pos) {
+ return (cursor->cached_node);
+ } else {
+ ASSERT3U(*pos, ==, cursor->cached_pos + 1);
+ return (procfs_list_next_node(cursor, pos));
+ }
+}
+
+static void *
+procfs_list_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ ASSERT(MUTEX_HELD(&cursor->procfs_list->pl_lock));
+ return (procfs_list_next_node(cursor, pos));
+}
+
+static void
+procfs_list_seq_stop(struct seq_file *f, void *p)
+{
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+ mutex_exit(&procfs_list->pl_lock);
+}
+
+static struct seq_operations procfs_list_seq_ops = {
+ .show = procfs_list_seq_show,
+ .start = procfs_list_seq_start,
+ .next = procfs_list_seq_next,
+ .stop = procfs_list_seq_stop,
+};
+
+static int
+procfs_list_open(struct inode *inode, struct file *filp)
+{
+ int rc = seq_open_private(filp, &procfs_list_seq_ops,
+ sizeof (procfs_list_cursor_t));
+ if (rc != 0)
+ return (rc);
+
+ struct seq_file *f = filp->private_data;
+ procfs_list_cursor_t *cursor = f->private;
+ cursor->procfs_list = PDE_DATA(inode);
+ cursor->cached_node = NULL;
+ cursor->cached_pos = 0;
+
+ return (0);
+}
+
+static ssize_t
+procfs_list_write(struct file *filp, const char __user *buf, size_t len,
+ loff_t *ppos)
+{
+ struct seq_file *f = filp->private_data;
+ procfs_list_cursor_t *cursor = f->private;
+ procfs_list_t *procfs_list = cursor->procfs_list;
+ int rc;
+
+ if (procfs_list->pl_clear != NULL &&
+ (rc = procfs_list->pl_clear(procfs_list)) != 0)
+ return (-rc);
+ return (len);
+}
+
+static struct file_operations procfs_list_operations = {
+ .owner = THIS_MODULE,
+ .open = procfs_list_open,
+ .write = procfs_list_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*
+ * Initialize a procfs_list and create a file for it in the proc filesystem
+ * under the kstat namespace.
+ */
+void
+procfs_list_install(const char *module,
+ const char *name,
+ procfs_list_t *procfs_list,
+ int (*show)(struct seq_file *f, void *p),
+ int (*show_header)(struct seq_file *f),
+ int (*clear)(procfs_list_t *procfs_list),
+ size_t procfs_list_node_off)
+{
+ mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&procfs_list->pl_list,
+ procfs_list_node_off + sizeof (procfs_list_node_t),
+ procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+ procfs_list->pl_next_id = 1; /* Save id 0 for SEQ_START_TOKEN */
+ procfs_list->pl_show = show;
+ procfs_list->pl_show_header = show_header;
+ procfs_list->pl_clear = clear;
+ procfs_list->pl_node_offset = procfs_list_node_off;
+
+ kstat_proc_entry_init(&procfs_list->pl_kstat_entry, module, name);
+ kstat_proc_entry_install(&procfs_list->pl_kstat_entry,
+ &procfs_list_operations, procfs_list);
+}
+EXPORT_SYMBOL(procfs_list_install);
+
+/* Remove the proc filesystem file corresponding to the given list */
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{
+ kstat_proc_entry_delete(&procfs_list->pl_kstat_entry);
+}
+EXPORT_SYMBOL(procfs_list_uninstall);
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+ ASSERT(list_is_empty(&procfs_list->pl_list));
+ list_destroy(&procfs_list->pl_list);
+ mutex_destroy(&procfs_list->pl_lock);
+}
+EXPORT_SYMBOL(procfs_list_destroy);
+
+/*
+ * Add a new node to the tail of the list. While the standard list manipulation
+ * functions can be use for all other operation, adding elements to the list
+ * should only be done using this helper so that the id of the new node is set
+ * correctly.
+ */
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+ ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+ NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+ list_insert_tail(&procfs_list->pl_list, p);
+}
+EXPORT_SYMBOL(procfs_list_add);
diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c
index fa1cf9e98..c02ef86b5 100644
--- a/module/zfs/spa_stats.c
+++ b/module/zfs/spa_stats.c
@@ -55,7 +55,6 @@ int zfs_multihost_history = 0;
* Read statistics - Information exported regarding each arc_read call
*/
typedef struct spa_read_history {
- uint64_t uid; /* unique identifier */
hrtime_t start; /* time read completed */
uint64_t objset; /* read from this objset */
uint64_t object; /* read of this object number */
@@ -65,13 +64,13 @@ typedef struct spa_read_history {
uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
pid_t pid; /* PID of task doing read */
char comm[16]; /* process name of task doing read */
- list_node_t srh_link;
+ procfs_list_node_t srh_node;
} spa_read_history_t;
static int
-spa_read_history_headers(char *buf, size_t size)
+spa_read_history_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
+ seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
"%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
"level", "blkid", "aflags", "origin", "pid", "process");
@@ -79,13 +78,13 @@ spa_read_history_headers(char *buf, size_t size)
}
static int
-spa_read_history_data(char *buf, size_t size, void *data)
+spa_read_history_show(struct seq_file *f, void *data)
{
spa_read_history_t *srh = (spa_read_history_t *)data;
- (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
+ seq_printf(f, "%-8llu %-16llu 0x%-6llx "
"%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
- (u_longlong_t)srh->uid, srh->start,
+ (u_longlong_t)srh->srh_node.pln_id, srh->start,
(longlong_t)srh->objset, (longlong_t)srh->object,
(longlong_t)srh->level, (longlong_t)srh->blkid,
srh->aflags, srh->origin, srh->pid, srh->comm);
@@ -93,120 +92,73 @@ spa_read_history_data(char *buf, size_t size, void *data)
return (0);
}
-/*
- * Calculate the address for the next spa_stats_history_t entry. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
-static void *
-spa_read_history_addr(kstat_t *ksp, loff_t n)
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
-
- if (n == 0)
- ssh->private = list_tail(&ssh->list);
- else if (ssh->private)
- ssh->private = list_prev(&ssh->list, ssh->private);
+ spa_read_history_t *srh;
+ while (shl->size > size) {
+ srh = list_remove_head(&shl->procfs_list.pl_list);
+ ASSERT3P(srh, !=, NULL);
+ kmem_free(srh, sizeof (spa_read_history_t));
+ shl->size--;
+ }
- return (ssh->private);
+ if (size == 0)
+ ASSERT(list_is_empty(&shl->procfs_list.pl_list));
}
-/*
- * When the kstat is written discard all spa_read_history_t entries. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
static int
-spa_read_history_update(kstat_t *ksp, int rw)
+spa_read_history_clear(procfs_list_t *procfs_list)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
-
- if (rw == KSTAT_WRITE) {
- spa_read_history_t *srh;
-
- while ((srh = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(srh, sizeof (spa_read_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- }
-
- ksp->ks_ndata = ssh->size;
- ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
-
+ spa_history_list_t *shl = procfs_list->pl_private;
+ mutex_enter(&procfs_list->pl_lock);
+ spa_read_history_truncate(shl, 0);
+ mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_read_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
- char *name;
- kstat_t *ksp;
+ spa_history_list_t *shl = &spa->spa_stats.read_history;
+ char *module;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&ssh->list, sizeof (spa_read_history_t),
- offsetof(spa_read_history_t, srh_link));
+ shl->size = 0;
- ssh->count = 0;
- ssh->size = 0;
- ssh->private = NULL;
+ module = kmem_asprintf("zfs/%s", spa_name(spa));
- name = kmem_asprintf("zfs/%s", spa_name(spa));
+ shl->procfs_list.pl_private = shl;
+ procfs_list_install(module,
+ "reads",
+ &shl->procfs_list,
+ spa_read_history_show,
+ spa_read_history_show_header,
+ spa_read_history_clear,
+ offsetof(spa_read_history_t, srh_node));
- ksp = kstat_create(name, 0, "reads", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
-
- if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = NULL;
- ksp->ks_private = spa;
- ksp->ks_update = spa_read_history_update;
- kstat_set_raw_ops(ksp, spa_read_history_headers,
- spa_read_history_data, spa_read_history_addr);
- kstat_install(ksp);
- }
- strfree(name);
+ strfree(module);
}
static void
spa_read_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
- spa_read_history_t *srh;
- kstat_t *ksp;
-
- ksp = ssh->kstat;
- if (ksp)
- kstat_delete(ksp);
-
- mutex_enter(&ssh->lock);
- while ((srh = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(srh, sizeof (spa_read_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- list_destroy(&ssh->list);
- mutex_exit(&ssh->lock);
-
- mutex_destroy(&ssh->lock);
+ spa_history_list_t *shl = &spa->spa_stats.read_history;
+ procfs_list_uninstall(&shl->procfs_list);
+ spa_read_history_truncate(shl, 0);
+ procfs_list_destroy(&shl->procfs_list);
}
void
spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
{
- spa_stats_history_t *ssh = &spa->spa_stats.read_history;
- spa_read_history_t *srh, *rm;
+ spa_history_list_t *shl = &spa->spa_stats.read_history;
+ spa_read_history_t *srh;
ASSERT3P(spa, !=, NULL);
ASSERT3P(zb, !=, NULL);
- if (zfs_read_history == 0 && ssh->size == 0)
+ if (zfs_read_history == 0 && shl->size == 0)
return;
if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
@@ -222,19 +174,14 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
srh->aflags = aflags;
srh->pid = getpid();
- mutex_enter(&ssh->lock);
+ mutex_enter(&shl->procfs_list.pl_lock);
- srh->uid = ssh->count++;
- list_insert_head(&ssh->list, srh);
- ssh->size++;
+ procfs_list_add(&shl->procfs_list, srh);
+ shl->size++;
- while (ssh->size > zfs_read_history) {
- ssh->size--;
- rm = list_remove_tail(&ssh->list);
- kmem_free(rm, sizeof (spa_read_history_t));
- }
+ spa_read_history_truncate(shl, zfs_read_history);
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
}
/*
@@ -256,22 +203,21 @@ typedef struct spa_txg_history {
uint64_t writes; /* number of write operations */
uint64_t ndirty; /* number of dirty bytes */
hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
- list_node_t sth_link;
+ procfs_list_node_t sth_node;
} spa_txg_history_t;
static int
-spa_txg_history_headers(char *buf, size_t size)
+spa_txg_history_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
+ seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
"%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
"ndirty", "nread", "nwritten", "reads", "writes",
"otime", "qtime", "wtime", "stime");
-
return (0);
}
static int
-spa_txg_history_data(char *buf, size_t size, void *data)
+spa_txg_history_show(struct seq_file *f, void *data)
{
spa_txg_history_t *sth = (spa_txg_history_t *)data;
uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
@@ -303,7 +249,7 @@ spa_txg_history_data(char *buf, size_t size, void *data)
sync = sth->times[TXG_STATE_SYNCED] -
sth->times[TXG_STATE_WAIT_FOR_SYNC];
- (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
+ seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
"%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
(longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
(u_longlong_t)sth->ndirty,
@@ -315,110 +261,62 @@ spa_txg_history_data(char *buf, size_t size, void *data)
return (0);
}
-/*
- * Calculate the address for the next spa_stats_history_t entry. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
-static void *
-spa_txg_history_addr(kstat_t *ksp, loff_t n)
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
+ spa_txg_history_t *sth;
+ while (shl->size > size) {
+ sth = list_remove_head(&shl->procfs_list.pl_list);
+ ASSERT3P(sth, !=, NULL);
+ kmem_free(sth, sizeof (spa_txg_history_t));
+ shl->size--;
+ }
- if (n == 0)
- ssh->private = list_tail(&ssh->list);
- else if (ssh->private)
- ssh->private = list_prev(&ssh->list, ssh->private);
+ if (size == 0)
+ ASSERT(list_is_empty(&shl->procfs_list.pl_list));
- return (ssh->private);
}
-/*
- * When the kstat is written discard all spa_txg_history_t entries. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
static int
-spa_txg_history_update(kstat_t *ksp, int rw)
+spa_txg_history_clear(procfs_list_t *procfs_list)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
-
- if (rw == KSTAT_WRITE) {
- spa_txg_history_t *sth;
-
- while ((sth = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(sth, sizeof (spa_txg_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- }
-
- ksp->ks_ndata = ssh->size;
- ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
-
+ spa_history_list_t *shl = procfs_list->pl_private;
+ mutex_enter(&procfs_list->pl_lock);
+ spa_txg_history_truncate(shl, 0);
+ mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_txg_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
- char *name;
- kstat_t *ksp;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
+ char *module;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&ssh->list, sizeof (spa_txg_history_t),
- offsetof(spa_txg_history_t, sth_link));
+ shl->size = 0;
- ssh->count = 0;
- ssh->size = 0;
- ssh->private = NULL;
+ module = kmem_asprintf("zfs/%s", spa_name(spa));
- name = kmem_asprintf("zfs/%s", spa_name(spa));
+ shl->procfs_list.pl_private = shl;
+ procfs_list_install(module,
+ "txgs",
+ &shl->procfs_list,
+ spa_txg_history_show,
+ spa_txg_history_show_header,
+ spa_txg_history_clear,
+ offsetof(spa_txg_history_t, sth_node));
- ksp = kstat_create(name, 0, "txgs", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
-
- if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = NULL;
- ksp->ks_private = spa;
- ksp->ks_update = spa_txg_history_update;
- kstat_set_raw_ops(ksp, spa_txg_history_headers,
- spa_txg_history_data, spa_txg_history_addr);
- kstat_install(ksp);
- }
- strfree(name);
+ strfree(module);
}
static void
spa_txg_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
- spa_txg_history_t *sth;
- kstat_t *ksp;
-
- ksp = ssh->kstat;
- if (ksp)
- kstat_delete(ksp);
-
- mutex_enter(&ssh->lock);
- while ((sth = list_remove_head(&ssh->list))) {
- ssh->size--;
- kmem_free(sth, sizeof (spa_txg_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- list_destroy(&ssh->list);
- mutex_exit(&ssh->lock);
-
- mutex_destroy(&ssh->lock);
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
+ procfs_list_uninstall(&shl->procfs_list);
+ spa_txg_history_truncate(shl, 0);
+ procfs_list_destroy(&shl->procfs_list);
}
/*
@@ -427,10 +325,10 @@ spa_txg_history_destroy(spa_t *spa)
void
spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
- spa_txg_history_t *sth, *rm;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
+ spa_txg_history_t *sth;
- if (zfs_txg_history == 0 && ssh->size == 0)
+ if (zfs_txg_history == 0 && shl->size == 0)
return;
sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
@@ -438,18 +336,11 @@ spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
sth->state = TXG_STATE_OPEN;
sth->times[TXG_STATE_BIRTH] = birth_time;
- mutex_enter(&ssh->lock);
-
- list_insert_head(&ssh->list, sth);
- ssh->size++;
-
- while (ssh->size > zfs_txg_history) {
- ssh->size--;
- rm = list_remove_tail(&ssh->list);
- kmem_free(rm, sizeof (spa_txg_history_t));
- }
-
- mutex_exit(&ssh->lock);
+ mutex_enter(&shl->procfs_list.pl_lock);
+ procfs_list_add(&shl->procfs_list, sth);
+ shl->size++;
+ spa_txg_history_truncate(shl, zfs_txg_history);
+ mutex_exit(&shl->procfs_list.pl_lock);
}
/*
@@ -459,16 +350,16 @@ int
spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
hrtime_t completed_time)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
int error = ENOENT;
if (zfs_txg_history == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (sth = list_head(&ssh->list); sth != NULL;
- sth = list_next(&ssh->list, sth)) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
+ sth = list_prev(&shl->procfs_list.pl_list, sth)) {
if (sth->txg == txg) {
sth->times[completed_state] = completed_time;
sth->state++;
@@ -476,7 +367,7 @@ spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -488,16 +379,16 @@ static int
spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
{
- spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
+ spa_history_list_t *shl = &spa->spa_stats.txg_history;
spa_txg_history_t *sth;
int error = ENOENT;
if (zfs_txg_history == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (sth = list_head(&ssh->list); sth != NULL;
- sth = list_next(&ssh->list, sth)) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
+ sth = list_prev(&shl->procfs_list.pl_list, sth)) {
if (sth->txg == txg) {
sth->nread = nread;
sth->nwritten = nwritten;
@@ -508,7 +399,7 @@ spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -580,16 +471,16 @@ static int
spa_tx_assign_update(kstat_t *ksp, int rw)
{
spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
int i;
if (rw == KSTAT_WRITE) {
- for (i = 0; i < ssh->count; i++)
- ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
+ for (i = 0; i < shk->count; i++)
+ ((kstat_named_t *)shk->private)[i].value.ui64 = 0;
}
- for (i = ssh->count; i > 0; i--)
- if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
+ for (i = shk->count; i > 0; i--)
+ if (((kstat_named_t *)shk->private)[i-1].value.ui64 != 0)
break;
ksp->ks_ndata = i;
@@ -601,22 +492,22 @@ spa_tx_assign_update(kstat_t *ksp, int rw)
static void
spa_tx_assign_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
char *name;
kstat_named_t *ks;
kstat_t *ksp;
int i;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
- ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
- ssh->size = ssh->count * sizeof (kstat_named_t);
- ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
+ shk->count = 42; /* power of two buckets for 1ns to 2,199s */
+ shk->size = shk->count * sizeof (kstat_named_t);
+ shk->private = kmem_alloc(shk->size, KM_SLEEP);
name = kmem_asprintf("zfs/%s", spa_name(spa));
- for (i = 0; i < ssh->count; i++) {
- ks = &((kstat_named_t *)ssh->private)[i];
+ for (i = 0; i < shk->count; i++) {
+ ks = &((kstat_named_t *)shk->private)[i];
ks->data_type = KSTAT_DATA_UINT64;
ks->value.ui64 = 0;
(void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
@@ -625,13 +516,13 @@ spa_tx_assign_init(spa_t *spa)
ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
+ shk->kstat = ksp;
if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = ssh->private;
- ksp->ks_ndata = ssh->count;
- ksp->ks_data_size = ssh->size;
+ ksp->ks_lock = &shk->lock;
+ ksp->ks_data = shk->private;
+ ksp->ks_ndata = shk->count;
+ ksp->ks_data_size = shk->size;
ksp->ks_private = spa;
ksp->ks_update = spa_tx_assign_update;
kstat_install(ksp);
@@ -642,27 +533,27 @@ spa_tx_assign_init(spa_t *spa)
static void
spa_tx_assign_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
kstat_t *ksp;
- ksp = ssh->kstat;
+ ksp = shk->kstat;
if (ksp)
kstat_delete(ksp);
- kmem_free(ssh->private, ssh->size);
- mutex_destroy(&ssh->lock);
+ kmem_free(shk->private, shk->size);
+ mutex_destroy(&shk->lock);
}
void
spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
{
- spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
+ spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
uint64_t idx = 0;
- while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
+ while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
idx++;
- atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
+ atomic_inc_64(&((kstat_named_t *)shk->private)[idx].value.ui64);
}
/*
@@ -682,19 +573,19 @@ spa_io_history_update(kstat_t *ksp, int rw)
static void
spa_io_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
char *name;
kstat_t *ksp;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
- ssh->kstat = ksp;
+ shk->kstat = ksp;
if (ksp) {
- ksp->ks_lock = &ssh->lock;
+ ksp->ks_lock = &shk->lock;
ksp->ks_private = spa;
ksp->ks_update = spa_io_history_update;
kstat_install(ksp);
@@ -705,12 +596,12 @@ spa_io_history_init(spa_t *spa)
static void
spa_io_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
- if (ssh->kstat)
- kstat_delete(ssh->kstat);
+ if (shk->kstat)
+ kstat_delete(shk->kstat);
- mutex_destroy(&ssh->lock);
+ mutex_destroy(&shk->lock);
}
/*
@@ -733,7 +624,7 @@ spa_io_history_destroy(spa_t *spa)
*/
typedef struct spa_mmp_history {
- uint64_t mmp_kstat_id; /* unique # for updates */
+ uint64_t mmp_node_id; /* unique # for updates */
uint64_t txg; /* txg of last sync */
uint64_t timestamp; /* UTC time MMP write issued */
uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */
@@ -743,20 +634,20 @@ typedef struct spa_mmp_history {
int io_error; /* error status of MMP write */
hrtime_t error_start; /* hrtime of start of error period */
hrtime_t duration; /* time from submission to completion */
- list_node_t smh_link;
+ procfs_list_node_t smh_node;
} spa_mmp_history_t;
static int
-spa_mmp_history_headers(char *buf, size_t size)
+spa_mmp_history_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
+ seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
"%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
"mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
return (0);
}
static int
-spa_mmp_history_data(char *buf, size_t size, void *data)
+spa_mmp_history_show(struct seq_file *f, void *data)
{
spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
@@ -764,8 +655,8 @@ spa_mmp_history_data(char *buf, size_t size, void *data)
char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
"%-10lld %s\n";
- (void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt),
- (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
+ seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
+ (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
(u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
(longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
(u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
@@ -774,137 +665,86 @@ spa_mmp_history_data(char *buf, size_t size, void *data)
return (0);
}
-/*
- * Calculate the address for the next spa_stats_history_t entry. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
-static void *
-spa_mmp_history_addr(kstat_t *ksp, loff_t n)
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
+ spa_mmp_history_t *smh;
+ while (shl->size > size) {
+ smh = list_remove_head(&shl->procfs_list.pl_list);
+ if (smh->vdev_path)
+ strfree(smh->vdev_path);
+ kmem_free(smh, sizeof (spa_mmp_history_t));
+ shl->size--;
+ }
- if (n == 0)
- ssh->private = list_tail(&ssh->list);
- else if (ssh->private)
- ssh->private = list_prev(&ssh->list, ssh->private);
+ if (size == 0)
+ ASSERT(list_is_empty(&shl->procfs_list.pl_list));
- return (ssh->private);
}
-/*
- * When the kstat is written discard all spa_mmp_history_t entries. The
- * ssh->lock will be held until ksp->ks_ndata entries are processed.
- */
static int
-spa_mmp_history_update(kstat_t *ksp, int rw)
+spa_mmp_history_clear(procfs_list_t *procfs_list)
{
- spa_t *spa = ksp->ks_private;
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
-
- ASSERT(MUTEX_HELD(&ssh->lock));
-
- if (rw == KSTAT_WRITE) {
- spa_mmp_history_t *smh;
-
- while ((smh = list_remove_head(&ssh->list))) {
- ssh->size--;
- if (smh->vdev_path)
- strfree(smh->vdev_path);
- kmem_free(smh, sizeof (spa_mmp_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- }
-
- ksp->ks_ndata = ssh->size;
- ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
-
+ spa_history_list_t *shl = procfs_list->pl_private;
+ mutex_enter(&procfs_list->pl_lock);
+ spa_mmp_history_truncate(shl, 0);
+ mutex_exit(&procfs_list->pl_lock);
return (0);
}
static void
spa_mmp_history_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
- char *name;
- kstat_t *ksp;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
+ char *module;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&ssh->list, sizeof (spa_mmp_history_t),
- offsetof(spa_mmp_history_t, smh_link));
+ shl->size = 0;
- ssh->count = 0;
- ssh->size = 0;
- ssh->private = NULL;
-
- name = kmem_asprintf("zfs/%s", spa_name(spa));
+ module = kmem_asprintf("zfs/%s", spa_name(spa));
- ksp = kstat_create(name, 0, "multihost", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
+ shl->procfs_list.pl_private = shl;
+ procfs_list_install(module,
+ "multihost",
+ &shl->procfs_list,
+ spa_mmp_history_show,
+ spa_mmp_history_show_header,
+ spa_mmp_history_clear,
+ offsetof(spa_mmp_history_t, smh_node));
- if (ksp) {
- ksp->ks_lock = &ssh->lock;
- ksp->ks_data = NULL;
- ksp->ks_private = spa;
- ksp->ks_update = spa_mmp_history_update;
- kstat_set_raw_ops(ksp, spa_mmp_history_headers,
- spa_mmp_history_data, spa_mmp_history_addr);
- kstat_install(ksp);
- }
- strfree(name);
+ strfree(module);
}
static void
spa_mmp_history_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
- spa_mmp_history_t *smh;
- kstat_t *ksp;
-
- ksp = ssh->kstat;
- if (ksp)
- kstat_delete(ksp);
-
- mutex_enter(&ssh->lock);
- while ((smh = list_remove_head(&ssh->list))) {
- ssh->size--;
- if (smh->vdev_path)
- strfree(smh->vdev_path);
- kmem_free(smh, sizeof (spa_mmp_history_t));
- }
-
- ASSERT3U(ssh->size, ==, 0);
- list_destroy(&ssh->list);
- mutex_exit(&ssh->lock);
-
- mutex_destroy(&ssh->lock);
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
+ procfs_list_uninstall(&shl->procfs_list);
+ spa_mmp_history_truncate(shl, 0);
+ procfs_list_destroy(&shl->procfs_list);
}
/*
* Set duration in existing "skip" record to how long we have waited for a leaf
* vdev to become available.
*
- * Important that we start search at the head of the list where new
+ * Important that we start search at the tail of the list where new
* records are inserted, so this is normally an O(1) operation.
*/
int
-spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
+spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
int error = ENOENT;
- if (zfs_multihost_history == 0 && ssh->size == 0)
+ if (zfs_multihost_history == 0 && shl->size == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (smh = list_head(&ssh->list); smh != NULL;
- smh = list_next(&ssh->list, smh)) {
- if (smh->mmp_kstat_id == mmp_kstat_id) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
+ smh = list_prev(&shl->procfs_list.pl_list, smh)) {
+ if (smh->mmp_node_id == mmp_node_id) {
ASSERT3U(smh->io_error, !=, 0);
smh->duration = gethrtime() - smh->error_start;
smh->vdev_guid++;
@@ -912,7 +752,7 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -922,20 +762,20 @@ spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
* See comment re: search order above spa_mmp_history_set_skip().
*/
int
-spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
+spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
hrtime_t duration)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
spa_mmp_history_t *smh;
int error = ENOENT;
- if (zfs_multihost_history == 0 && ssh->size == 0)
+ if (zfs_multihost_history == 0 && shl->size == 0)
return (0);
- mutex_enter(&ssh->lock);
- for (smh = list_head(&ssh->list); smh != NULL;
- smh = list_next(&ssh->list, smh)) {
- if (smh->mmp_kstat_id == mmp_kstat_id) {
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
+ smh = list_prev(&shl->procfs_list.pl_list, smh)) {
+ if (smh->mmp_node_id == mmp_node_id) {
ASSERT(smh->io_error == 0);
smh->io_error = io_error;
smh->duration = duration;
@@ -943,7 +783,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
break;
}
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shl->procfs_list.pl_lock);
return (error);
}
@@ -953,16 +793,16 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
* error == 0 : a write was issued.
* error != 0 : a write was not issued because no leaves were found.
*/
-void *
+void
spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
- uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
+ uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
int error)
{
- spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
- spa_mmp_history_t *smh, *rm;
+ spa_history_list_t *shl = &spa->spa_stats.mmp_history;
+ spa_mmp_history_t *smh;
- if (zfs_multihost_history == 0 && ssh->size == 0)
- return (NULL);
+ if (zfs_multihost_history == 0 && shl->size == 0)
+ return;
smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
smh->txg = txg;
@@ -974,7 +814,7 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
smh->vdev_path = strdup(vd->vdev_path);
}
smh->vdev_label = label;
- smh->mmp_kstat_id = mmp_kstat_id;
+ smh->mmp_node_id = mmp_node_id;
if (error) {
smh->io_error = error;
@@ -982,21 +822,11 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
smh->vdev_guid = 1;
}
- mutex_enter(&ssh->lock);
-
- list_insert_head(&ssh->list, smh);
- ssh->size++;
-
- while (ssh->size > zfs_multihost_history) {
- ssh->size--;
- rm = list_remove_tail(&ssh->list);
- if (rm->vdev_path)
- strfree(rm->vdev_path);
- kmem_free(rm, sizeof (spa_mmp_history_t));
- }
-
- mutex_exit(&ssh->lock);
- return ((void *)smh);
+ mutex_enter(&shl->procfs_list.pl_lock);
+ procfs_list_add(&shl->procfs_list, smh);
+ shl->size++;
+ spa_mmp_history_truncate(shl, zfs_multihost_history);
+ mutex_exit(&shl->procfs_list.pl_lock);
}
static void *
@@ -1023,19 +853,19 @@ spa_state_data(char *buf, size_t size, void *data)
static void
spa_state_init(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.state;
+ spa_history_kstat_t *shk = &spa->spa_stats.state;
char *name;
kstat_t *ksp;
- mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "state", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- ssh->kstat = ksp;
+ shk->kstat = ksp;
if (ksp) {
- ksp->ks_lock = &ssh->lock;
+ ksp->ks_lock = &shk->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
@@ -1049,12 +879,12 @@ spa_state_init(spa_t *spa)
static void
spa_health_destroy(spa_t *spa)
{
- spa_stats_history_t *ssh = &spa->spa_stats.state;
- kstat_t *ksp = ssh->kstat;
+ spa_history_kstat_t *shk = &spa->spa_stats.state;
+ kstat_t *ksp = shk->kstat;
if (ksp)
kstat_delete(ksp);
- mutex_destroy(&ssh->lock);
+ mutex_destroy(&shk->lock);
}
void
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 30a883f85..89cdf7d81 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -429,16 +429,16 @@ static void
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_waitq_enter(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
+ if (shk->kstat != NULL) {
+ mutex_enter(&shk->lock);
+ kstat_waitq_enter(shk->kstat->ks_data);
+ mutex_exit(&shk->lock);
}
}
@@ -446,16 +446,16 @@ static void
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_waitq_exit(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
+ if (shk->kstat != NULL) {
+ mutex_enter(&shk->lock);
+ kstat_waitq_exit(shk->kstat->ks_data);
+ mutex_exit(&shk->lock);
}
}
@@ -463,17 +463,17 @@ static void
vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
vq->vq_class[zio->io_priority].vqc_active++;
avl_add(&vq->vq_active_tree, zio);
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_runq_enter(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
+ if (shk->kstat != NULL) {
+ mutex_enter(&shk->lock);
+ kstat_runq_enter(shk->kstat->ks_data);
+ mutex_exit(&shk->lock);
}
}
@@ -481,17 +481,17 @@ static void
vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
+ spa_history_kstat_t *shk = &spa->spa_stats.io_history;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
vq->vq_class[zio->io_priority].vqc_active--;
avl_remove(&vq->vq_active_tree, zio);
- if (ssh->kstat != NULL) {
- kstat_io_t *ksio = ssh->kstat->ks_data;
+ if (shk->kstat != NULL) {
+ kstat_io_t *ksio = shk->kstat->ks_data;
- mutex_enter(&ssh->lock);
+ mutex_enter(&shk->lock);
kstat_runq_exit(ksio);
if (zio->io_type == ZIO_TYPE_READ) {
ksio->reads++;
@@ -500,7 +500,7 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
ksio->writes++;
ksio->nwritten += zio->io_size;
}
- mutex_exit(&ssh->lock);
+ mutex_exit(&shk->lock);
}
}
diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c
index ca79893c9..b5f93fd9b 100644
--- a/module/zfs/zfs_debug.c
+++ b/module/zfs/zfs_debug.c
@@ -24,13 +24,17 @@
*/
#include <sys/zfs_context.h>
-#include <sys/kstat.h>
-list_t zfs_dbgmsgs;
+typedef struct zfs_dbgmsg {
+ procfs_list_node_t zdm_node;
+ time_t zdm_timestamp;
+ int zdm_size;
+ char zdm_msg[1]; /* variable length allocation */
+} zfs_dbgmsg_t;
+
+procfs_list_t zfs_dbgmsgs;
int zfs_dbgmsg_size = 0;
-kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
-kstat_t *zfs_dbgmsg_kstat;
/*
* Internal ZFS debug messages are enabled by default.
@@ -47,123 +51,71 @@ kstat_t *zfs_dbgmsg_kstat;
int zfs_dbgmsg_enable = 1;
static int
-zfs_dbgmsg_headers(char *buf, size_t size)
+zfs_dbgmsg_show_header(struct seq_file *f)
{
- (void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message");
-
+ seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
return (0);
}
static int
-zfs_dbgmsg_data(char *buf, size_t size, void *data)
+zfs_dbgmsg_show(struct seq_file *f, void *p)
{
- zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
-
- (void) snprintf(buf, size, "%-12llu %-s\n",
+ zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
+ seq_printf(f, "%-12llu %-s\n",
(u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
-
return (0);
}
-static void *
-zfs_dbgmsg_addr(kstat_t *ksp, loff_t n)
-{
- zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private;
-
- ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
-
- if (n == 0)
- ksp->ks_private = list_head(&zfs_dbgmsgs);
- else if (zdm)
- ksp->ks_private = list_next(&zfs_dbgmsgs, zdm);
-
- return (ksp->ks_private);
-}
-
static void
zfs_dbgmsg_purge(int max_size)
{
- zfs_dbgmsg_t *zdm;
- int size;
-
- ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
-
while (zfs_dbgmsg_size > max_size) {
- zdm = list_remove_head(&zfs_dbgmsgs);
+ zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
if (zdm == NULL)
return;
- size = zdm->zdm_size;
+ int size = zdm->zdm_size;
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
}
static int
-zfs_dbgmsg_update(kstat_t *ksp, int rw)
+zfs_dbgmsg_clear(procfs_list_t *procfs_list)
{
- if (rw == KSTAT_WRITE)
- zfs_dbgmsg_purge(0);
-
+ mutex_enter(&zfs_dbgmsgs.pl_lock);
+ zfs_dbgmsg_purge(0);
+ mutex_exit(&zfs_dbgmsgs.pl_lock);
return (0);
}
void
zfs_dbgmsg_init(void)
{
- list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
+ procfs_list_install("zfs",
+ "dbgmsg",
+ &zfs_dbgmsgs,
+ zfs_dbgmsg_show,
+ zfs_dbgmsg_show_header,
+ zfs_dbgmsg_clear,
offsetof(zfs_dbgmsg_t, zdm_node));
- mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
-
- zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc",
- KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
- if (zfs_dbgmsg_kstat) {
- zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock;
- zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX;
- zfs_dbgmsg_kstat->ks_private = NULL;
- zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update;
- kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers,
- zfs_dbgmsg_data, zfs_dbgmsg_addr);
- kstat_install(zfs_dbgmsg_kstat);
- }
}
void
zfs_dbgmsg_fini(void)
{
- if (zfs_dbgmsg_kstat)
- kstat_delete(zfs_dbgmsg_kstat);
+ procfs_list_uninstall(&zfs_dbgmsgs);
+ zfs_dbgmsg_purge(0);
+
/*
* TODO - decide how to make this permanent
*/
#ifdef _KERNEL
- mutex_enter(&zfs_dbgmsgs_lock);
- zfs_dbgmsg_purge(0);
- mutex_exit(&zfs_dbgmsgs_lock);
- mutex_destroy(&zfs_dbgmsgs_lock);
+ procfs_list_destroy(&zfs_dbgmsgs);
#endif
}
void
-__zfs_dbgmsg(char *buf)
-{
- zfs_dbgmsg_t *zdm;
- int size;
-
- size = sizeof (zfs_dbgmsg_t) + strlen(buf);
- zdm = kmem_zalloc(size, KM_SLEEP);
- zdm->zdm_size = size;
- zdm->zdm_timestamp = gethrestime_sec();
- strcpy(zdm->zdm_msg, buf);
-
- mutex_enter(&zfs_dbgmsgs_lock);
- list_insert_tail(&zfs_dbgmsgs, zdm);
- zfs_dbgmsg_size += size;
- zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
- mutex_exit(&zfs_dbgmsgs_lock);
-}
-
-void
__set_error(const char *file, const char *func, int line, int err)
{
/*
@@ -176,6 +128,22 @@ __set_error(const char *file, const char *func, int line, int err)
}
#ifdef _KERNEL
+static void
+__zfs_dbgmsg(char *buf)
+{
+ int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
+ zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
+ zdm->zdm_size = size;
+ zdm->zdm_timestamp = gethrestime_sec();
+ strcpy(zdm->zdm_msg, buf);
+
+ mutex_enter(&zfs_dbgmsgs.pl_lock);
+ procfs_list_add(&zfs_dbgmsgs, zdm);
+ zfs_dbgmsg_size += size;
+ zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
+ mutex_exit(&zfs_dbgmsgs.pl_lock);
+}
+
void
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
{
@@ -244,14 +212,12 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
void
zfs_dbgmsg_print(const char *tag)
{
- zfs_dbgmsg_t *zdm;
-
(void) printf("ZFS_DBGMSG(%s):\n", tag);
- mutex_enter(&zfs_dbgmsgs_lock);
- for (zdm = list_head(&zfs_dbgmsgs); zdm;
- zdm = list_next(&zfs_dbgmsgs, zdm))
+ mutex_enter(&zfs_dbgmsgs.pl_lock);
+ for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
+ zdm = list_next(&zfs_dbgmsgs.pl_list, zdm))
(void) printf("%s\n", zdm->zdm_msg);
- mutex_exit(&zfs_dbgmsgs_lock);
+ mutex_exit(&zfs_dbgmsgs.pl_lock);
}
#endif /* _KERNEL */
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 4b41c3f74..95e70f043 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -584,10 +584,6 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos',
post =
tags = ['functional', 'inuse']
-[tests/functional/kstat]
-tests = ['state']
-tags = ['functional', 'kstat']
-
[tests/functional/large_files]
tests = ['large_files_001_pos', 'large_files_002_pos']
tags = ['functional', 'large_files']
@@ -672,6 +668,11 @@ tags = ['functional', 'poolversion']
tests = ['privilege_001_pos', 'privilege_002_pos']
tags = ['functional', 'privilege']
+[tests/functional/procfs]
+tests = ['procfs_list_basic', 'procfs_list_concurrent_readers',
+ 'procfs_list_stale_read', 'pool_state']
+tags = ['functional', 'procfs']
+
[tests/functional/projectquota]
tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
'projectquota_001_pos', 'projectquota_002_pos', 'projectquota_003_pos',
diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am
index e0a4aca99..961a34027 100644
--- a/tests/zfs-tests/tests/functional/Makefile.am
+++ b/tests/zfs-tests/tests/functional/Makefile.am
@@ -29,7 +29,6 @@ SUBDIRS = \
inheritance \
inuse \
io \
- kstat \
large_files \
largest_pool \
libzfs \
@@ -48,6 +47,7 @@ SUBDIRS = \
pool_names \
poolversion \
privilege \
+ procfs \
projectquota \
quota \
raidz \
diff --git a/tests/zfs-tests/tests/functional/kstat/Makefile.am b/tests/zfs-tests/tests/functional/kstat/Makefile.am
deleted file mode 100644
index 8ad83ec3e..000000000
--- a/tests/zfs-tests/tests/functional/kstat/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/kstat
-dist_pkgdata_SCRIPTS = \
- setup.ksh \
- cleanup.ksh \
- state.ksh
diff --git a/tests/zfs-tests/tests/functional/procfs/Makefile.am b/tests/zfs-tests/tests/functional/procfs/Makefile.am
new file mode 100644
index 000000000..a7f022d9f
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/procfs/Makefile.am
@@ -0,0 +1,8 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/procfs
+dist_pkgdata_SCRIPTS = \
+ setup.ksh \
+ cleanup.ksh \
+ procfs_list_basic.ksh \
+ procfs_list_concurrent_readers.ksh \
+ procfs_list_stale_read.ksh \
+ pool_state.ksh
diff --git a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh b/tests/zfs-tests/tests/functional/procfs/cleanup.ksh
index 8a212ce37..8fe46577e 100755
--- a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/procfs/cleanup.ksh
@@ -19,8 +19,9 @@
#
# CDDL HEADER END
#
+
#
-# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
diff --git a/tests/zfs-tests/tests/functional/kstat/state.ksh b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
index 3c29266e5..3c29266e5 100755
--- a/tests/zfs-tests/tests/functional/kstat/state.ksh
+++ b/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh
new file mode 100755
index 000000000..c9eff3649
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_basic.ksh
@@ -0,0 +1,95 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test that we can read from and write to a file in procfs whose contents is
+# backed by a linked list.
+#
+# STRATEGY:
+# 1. Take some snapshots of a filesystem, which will cause some messages to be
+# written to the zfs dbgmsgs.
+# 2. Read the dbgmsgs via procfs and verify that the expected messages are
+# present.
+# 3. Write to the dbgmsgs file to clear the messages.
+# 4. Read the dbgmsgs again, and make sure the messages are no longer present.
+#
+
+function cleanup
+{
+ datasetexists $FS && log_must zfs destroy -r $FS
+}
+
+function count_snap_cmds
+{
+ typeset expected_count=$1
+ count=$(grep "command: zfs snapshot $FS@testsnapshot" | wc -l)
+ log_must eval "[[ $count -eq $expected_count ]]"
+}
+
+typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
+typeset -r FS=$TESTPOOL/fs
+typeset snap_msgs
+
+log_onexit cleanup
+
+# Clear out old messages
+echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG"
+
+log_must zfs create $FS
+for i in {1..20}; do
+ log_must zfs snapshot "$FS@testsnapshot$i"
+done
+log_must zpool sync $TESTPOOL
+
+#
+# Read the debug message file in small chunks to make sure that the read is
+# split up into multiple syscalls. This tests that when a syscall begins we
+# correctly pick up in the list of messages where the previous syscall left
+# off. The size of the read can affect how many bytes the seq_file code has
+# left in its internal buffer, which in turn can affect the relative pos that
+# the seq_file code picks up at when the next read starts. Try a few
+# different size reads to make sure we can handle each case.
+#
+# Check that the file has the right contents by grepping for some of the
+# messages that we expect to be present.
+#
+for chunk_sz in {1,64,256,1024,4096}; do
+ dd if=$ZFS_DBGMSG bs=$chunk_sz | count_snap_cmds 20
+done
+
+# Clear out old messages and check that they really are gone
+echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG"
+cat $ZFS_DBGMSG | count_snap_cmds 0
+#
+# Even though we don't expect any messages in the file, reading should still
+# succeed.
+#
+log_must cat $ZFS_DBGMSG
+
+log_pass "Basic reading/writing of procfs file backed by linked list successful"
diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh
new file mode 100755
index 000000000..473de5c84
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_concurrent_readers.ksh
@@ -0,0 +1,82 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Make sure that interleaving reads from different readers does not affect the
+# results that are returned.
+#
+# STRATEGY:
+# 1. Make sure a few debug messages have been logged.
+# 2. Open the procfs file and start reading from it.
+# 3. Open the file again, and read its entire contents.
+# 4. Resume reading from the first instance.
+# 5. Check that the contents read by the two instances are identical.
+#
+
+function cleanup
+{
+ [[ -z $msgs1 ]] || log_must rm $msgs1
+ [[ -z $msgs2 ]] || log_must rm $msgs2
+ datasetexists $FS && log_must zfs destroy -r $FS
+}
+
+typeset -r ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
+typeset -r FS=$TESTPOOL/fs
+typeset msgs1 msgs2
+
+log_onexit cleanup
+
+# Clear out old messages
+echo 0 >$ZFS_DBGMSG || log_fail "failed to write to $ZFS_DBGMSG"
+
+# Add some new messages
+log_must zfs create $FS
+for i in {1..20}; do
+ log_must zfs snapshot "$FS@testsnapshot$i"
+done
+log_must zpool sync $TESTPOOL
+
+msgs1=$(mktemp) || log_fail
+msgs2=$(mktemp) || log_fail
+
+#
+# Start reading file, pause and read it from another process, and then finish
+# reading.
+#
+{ dd bs=512 count=4; cat $ZFS_DBGMSG >$msgs1; cat; } <$ZFS_DBGMSG >$msgs2
+
+#
+# Truncate the result of the read that completed second in case it picked up an
+# extra message that was logged after the first read completed.
+#
+log_must truncate -s $(stat -c "%s" $msgs1) $msgs2
+
+log_must diff $msgs1 $msgs2
+
+log_pass "Concurrent readers receive identical results"
diff --git a/tests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh b/tests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh
new file mode 100755
index 000000000..c363e7f8b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/procfs/procfs_list_stale_read.ksh
@@ -0,0 +1,98 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Make sure errors caused by messages being dropped from the list backing the
+# procfs file are handled gracefully.
+#
+# STRATEGY:
+# 1. Make sure a few entries have been logged.
+# 2. Open the procfs file and start reading from it.
+# 3. Write to the file to cause its contents to be dropped.
+# 4. Resume reading from the first instance, and check that the expected
+# error is received.
+# 5. Repeat steps 1-4, except instead of dropping all the messages by writing
+# to the file, cause enough new messages to be written that the old messages
+# are dropped.
+#
+
+function cleanup
+{
+ echo $default_max_entries >$MAX_ENTRIES_PARAM || log_fail
+}
+
+function sync_n
+{
+ for i in {1..$1}; do
+ log_must zpool sync $TESTPOOL
+ done
+ return 0
+}
+
+function do_test
+{
+ typeset cmd=$1
+
+ # Clear out old entries
+ echo 0 >$TXG_HIST || log_fail
+
+ # Add some new entries
+ sync_n 20
+
+ # Confirm that there actually is something in the file.
+ [[ $(wc -l <$TXG_HIST) -ge 20 ]] || log_fail "expected more entries"
+
+ #
+ # Start reading file, pause and run a command that will cause the
+ # current offset into the file to become invalid, and then try to
+ # finish reading.
+ #
+ {
+ log_must dd bs=512 count=4 >/dev/null
+ log_must eval "$cmd"
+ cat 2>&1 >/dev/null | log_must grep "Input/output error"
+ } <$TXG_HIST
+}
+
+typeset -r TXG_HIST=/proc/spl/kstat/zfs/$TESTPOOL/txgs
+typeset MAX_ENTRIES_PARAM=/sys/module/zfs/parameters/zfs_txg_history
+typeset default_max_entries
+
+log_onexit cleanup
+
+default_max_entries=$(cat $MAX_ENTRIES_PARAM) || log_fail
+echo 50 >$MAX_ENTRIES_PARAM || log_fail
+
+# Clear all of the existing entries.
+do_test "echo 0 >$TXG_HIST"
+
+# Add enough new entries to the list that all of the old ones are dropped.
+do_test "sync_n 60"
+
+log_pass "Attempting to read dropped message returns expected error"
diff --git a/tests/zfs-tests/tests/functional/kstat/setup.ksh b/tests/zfs-tests/tests/functional/procfs/setup.ksh
index 57717a096..b3812dbdc 100755
--- a/tests/zfs-tests/tests/functional/kstat/setup.ksh
+++ b/tests/zfs-tests/tests/functional/procfs/setup.ksh
@@ -19,16 +19,16 @@
#
# CDDL HEADER END
#
+
#
-# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
if ! is_linux ; then
- log_unsupported "/proc/spl/kstat/<pool>/health only supported on Linux"
+ log_unsupported "procfs is only used on Linux"
fi
default_mirror_setup $DISKS
-
log_pass