diff options
author | Brian Behlendorf <[email protected]> | 2009-01-15 10:44:54 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2009-01-15 10:44:54 -0800 |
commit | 617d5a673cd16aa91fa9668b94cc385094fae852 (patch) | |
tree | 37c7e043f3599d458a3aa0e763363853c298fba3 /module | |
parent | f6a19c0d37992755ed6b1b50344047537a1efe5c (diff) |
Rename modules to module and update references
Diffstat (limited to 'module')
34 files changed, 13179 insertions, 0 deletions
diff --git a/module/Makefile.in b/module/Makefile.in new file mode 100644 index 000000000..86f8951ae --- /dev/null +++ b/module/Makefile.in @@ -0,0 +1,10 @@ +subdir-m += spl +subdir-m += splat + +all: + $(MAKE) -C @LINUX@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ modules + +install uninstall clean distclean maintainer-clean distdir: + $(MAKE) -C @LINUX@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ $@ + +check: diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in new file mode 100644 index 000000000..f92945b00 --- /dev/null +++ b/module/spl/Makefile.in @@ -0,0 +1,51 @@ +# Makefile.in for spl kernel module + +MODULES := spl +DISTFILES = Makefile.in \ + spl-kmem.c spl-rwlock.c spl-taskq.c \ + spl-thread.c spl-generic.c +EXTRA_CFLAGS = @KERNELCPPFLAGS@ + +# Solaris porting layer module +obj-m := spl.o + +spl-objs += spl-debug.o +spl-objs += spl-proc.o +spl-objs += spl-kmem.o +spl-objs += spl-thread.o +spl-objs += spl-taskq.o +spl-objs += spl-rwlock.o +spl-objs += spl-vnode.o +spl-objs += spl-err.o +spl-objs += spl-time.o +spl-objs += spl-kobj.o +spl-objs += spl-module.o +spl-objs += spl-generic.o +spl-objs += spl-atomic.o +spl-objs += spl-mutex.o +spl-objs += spl-kstat.o +spl-objs += spl-condvar.o + +splmodule := spl.ko +splmoduledir := @kmoduledir@/kernel/lib/ + +install: + mkdir -p $(DESTDIR)$(splmoduledir) + $(INSTALL) -m 644 $(splmodule) $(DESTDIR)$(splmoduledir)/$(splmodule) + -/sbin/depmod -a + +uninstall: + rm -f $(DESTDIR)$(splmoduledir)/$(splmodule) + -/sbin/depmod -a + +clean: + -rm -f $(splmodule) *.o .*.cmd *.mod.c *.ko *.s */*.o + +distclean: clean + rm -f Makefile + rm -rf .tmp_versions + +maintainer-clean: distclean + +distdir: $(DISTFILES) + cp -p $(DISTFILES) $(distdir) diff --git a/module/spl/spl-atomic.c b/module/spl/spl-atomic.c new file mode 100644 index 000000000..40cdb06cc --- /dev/null +++ b/module/spl/spl-atomic.c @@ -0,0 +1,40 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/atomic.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_ATOMIC + +/* Global atomic lock declarations */ +spinlock_t atomic64_lock = SPIN_LOCK_UNLOCKED; +spinlock_t atomic32_lock = SPIN_LOCK_UNLOCKED; + +EXPORT_SYMBOL(atomic64_lock); +EXPORT_SYMBOL(atomic32_lock); diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c new file mode 100644 index 000000000..163f2a1de --- /dev/null +++ b/module/spl/spl-condvar.c @@ -0,0 +1,201 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/condvar.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_CONDVAR + +void +__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) +{ + int flags = KM_SLEEP; + + ENTRY; + ASSERT(cvp); + ASSERT(name); + ASSERT(type == CV_DEFAULT); + ASSERT(arg == NULL); + + cvp->cv_magic = CV_MAGIC; + init_waitqueue_head(&cvp->cv_event); + spin_lock_init(&cvp->cv_lock); + atomic_set(&cvp->cv_waiters, 0); + cvp->cv_mutex = NULL; + cvp->cv_name = NULL; + cvp->cv_name_size = strlen(name) + 1; + + /* We may be called when there is a non-zero preempt_count or + * interrupts are disabled is which case we must not sleep. + */ + if (current_thread_info()->preempt_count || irqs_disabled()) + flags = KM_NOSLEEP; + + cvp->cv_name = kmem_alloc(cvp->cv_name_size, flags); + if (cvp->cv_name) + strcpy(cvp->cv_name, name); + + EXIT; +} +EXPORT_SYMBOL(__cv_init); + +void +__cv_destroy(kcondvar_t *cvp) +{ + ENTRY; + ASSERT(cvp); + ASSERT(cvp->cv_magic == CV_MAGIC); + spin_lock(&cvp->cv_lock); + ASSERT(atomic_read(&cvp->cv_waiters) == 0); + ASSERT(!waitqueue_active(&cvp->cv_event)); + + if (cvp->cv_name) + kmem_free(cvp->cv_name, cvp->cv_name_size); + + spin_unlock(&cvp->cv_lock); + memset(cvp, CV_POISON, sizeof(*cvp)); + EXIT; +} +EXPORT_SYMBOL(__cv_destroy); + +void +__cv_wait(kcondvar_t *cvp, kmutex_t *mp) +{ + DEFINE_WAIT(wait); + ENTRY; + + ASSERT(cvp); + ASSERT(mp); + ASSERT(cvp->cv_magic == CV_MAGIC); + spin_lock(&cvp->cv_lock); + ASSERT(mutex_owned(mp)); + + if (cvp->cv_mutex == NULL) + cvp->cv_mutex = mp; + + /* Ensure the same mutex is used by all callers */ + ASSERT(cvp->cv_mutex == mp); + spin_unlock(&cvp->cv_lock); + + prepare_to_wait_exclusive(&cvp->cv_event, &wait, + TASK_UNINTERRUPTIBLE); + atomic_inc(&cvp->cv_waiters); + + /* Mutex should be dropped after prepare_to_wait() this + * ensures we're linked in to the waiters list and avoids the + * race where 'cvp->cv_waiters > 0' but the list is empty. */ + mutex_exit(mp); + schedule(); + mutex_enter(mp); + + atomic_dec(&cvp->cv_waiters); + finish_wait(&cvp->cv_event, &wait); + EXIT; +} +EXPORT_SYMBOL(__cv_wait); + +/* 'expire_time' argument is an absolute wall clock time in jiffies. + * Return value is time left (expire_time - now) or -1 if timeout occurred. + */ +clock_t +__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time) +{ + DEFINE_WAIT(wait); + clock_t time_left; + ENTRY; + + ASSERT(cvp); + ASSERT(mp); + ASSERT(cvp->cv_magic == CV_MAGIC); + spin_lock(&cvp->cv_lock); + ASSERT(mutex_owned(mp)); + + if (cvp->cv_mutex == NULL) + cvp->cv_mutex = mp; + + /* Ensure the same mutex is used by all callers */ + ASSERT(cvp->cv_mutex == mp); + spin_unlock(&cvp->cv_lock); + + /* XXX - Does not handle jiffie wrap properly */ + time_left = expire_time - jiffies; + if (time_left <= 0) + RETURN(-1); + + prepare_to_wait_exclusive(&cvp->cv_event, &wait, + TASK_UNINTERRUPTIBLE); + atomic_inc(&cvp->cv_waiters); + + /* Mutex should be dropped after prepare_to_wait() this + * ensures we're linked in to the waiters list and avoids the + * race where 'cvp->cv_waiters > 0' but the list is empty. */ + mutex_exit(mp); + time_left = schedule_timeout(time_left); + mutex_enter(mp); + + atomic_dec(&cvp->cv_waiters); + finish_wait(&cvp->cv_event, &wait); + + RETURN(time_left > 0 ? time_left : -1); +} +EXPORT_SYMBOL(__cv_timedwait); + +void +__cv_signal(kcondvar_t *cvp) +{ + ENTRY; + ASSERT(cvp); + ASSERT(cvp->cv_magic == CV_MAGIC); + + /* All waiters are added with WQ_FLAG_EXCLUSIVE so only one + * waiter will be set runable with each call to wake_up(). + * Additionally wake_up() holds a spin_lock assoicated with + * the wait queue to ensure we don't race waking up processes. */ + if (atomic_read(&cvp->cv_waiters) > 0) + wake_up(&cvp->cv_event); + + EXIT; +} +EXPORT_SYMBOL(__cv_signal); + +void +__cv_broadcast(kcondvar_t *cvp) +{ + ASSERT(cvp); + ASSERT(cvp->cv_magic == CV_MAGIC); + ENTRY; + + /* Wake_up_all() will wake up all waiters even those which + * have the WQ_FLAG_EXCLUSIVE flag set. */ + if (atomic_read(&cvp->cv_waiters) > 0) + wake_up_all(&cvp->cv_event); + + EXIT; +} +EXPORT_SYMBOL(__cv_broadcast); diff --git a/module/spl/spl-debug.c b/module/spl/spl-debug.c new file mode 100644 index 000000000..df22cbc48 --- /dev/null +++ b/module/spl/spl-debug.c @@ -0,0 +1,1276 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * This file was originally part of Lustre, http://www.lustre.org. + * but has subsequently been adapted for use in the SPL in + * accordance with the GPL. + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Zach Brown <[email protected]> + * Phil Schwan <[email protected]> + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/kmod.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/ctype.h> +#include <linux/kthread.h> +#include <linux/hardirq.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <sys/sysmacros.h> +#include <sys/proc.h> +#include <sys/debug.h> +#include <spl-ctl.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_DEBUG + +unsigned long spl_debug_subsys = ~0; +EXPORT_SYMBOL(spl_debug_subsys); +module_param(spl_debug_subsys, long, 0644); +MODULE_PARM_DESC(spl_debug_subsys, "Subsystem debugging level mask."); + +unsigned long spl_debug_mask = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE); +EXPORT_SYMBOL(spl_debug_mask); +module_param(spl_debug_mask, long, 0644); +MODULE_PARM_DESC(spl_debug_mask, "Debugging level mask."); + +unsigned long spl_debug_printk = D_CANTMASK; +EXPORT_SYMBOL(spl_debug_printk); +module_param(spl_debug_printk, long, 0644); +MODULE_PARM_DESC(spl_debug_printk, "Console printk level mask."); + +int spl_debug_mb = -1; +EXPORT_SYMBOL(spl_debug_mb); +module_param(spl_debug_mb, int, 0644); +MODULE_PARM_DESC(spl_debug_mb, "Total debug buffer size."); + +unsigned int spl_debug_binary = 1; +EXPORT_SYMBOL(spl_debug_binary); + +unsigned int spl_debug_catastrophe; +EXPORT_SYMBOL(spl_debug_catastrophe); + +unsigned int spl_debug_panic_on_bug = 1; +EXPORT_SYMBOL(spl_debug_panic_on_bug); +module_param(spl_debug_panic_on_bug, int, 0644); +MODULE_PARM_DESC(spl_debug_panic_on_bug, "Panic on BUG"); + +static char spl_debug_file_name[PATH_MAX]; +char spl_debug_file_path[PATH_MAX] = "/var/dumps/spl-log"; + +unsigned int spl_console_ratelimit = 1; +EXPORT_SYMBOL(spl_console_ratelimit); + +long spl_console_max_delay; +EXPORT_SYMBOL(spl_console_max_delay); + +long spl_console_min_delay; +EXPORT_SYMBOL(spl_console_min_delay); + +unsigned int spl_console_backoff = SPL_DEFAULT_BACKOFF; +EXPORT_SYMBOL(spl_console_backoff); + +unsigned int spl_debug_stack; +EXPORT_SYMBOL(spl_debug_stack); + +static int spl_panic_in_progress; + +union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS] __cacheline_aligned; +char *trace_console_buffers[NR_CPUS][3]; +struct rw_semaphore trace_sem; +atomic_t trace_tage_allocated = ATOMIC_INIT(0); + +static int spl_debug_dump_all_pages(dumplog_priv_t *dp, char *); +static void trace_fini(void); + + +/* Memory percentage breakdown by type */ +static unsigned int pages_factor[TCD_TYPE_MAX] = { + 80, /* 80% pages for TCD_TYPE_PROC */ + 10, /* 10% pages for TCD_TYPE_SOFTIRQ */ + 10 /* 10% pages for TCD_TYPE_IRQ */ +}; + +const char * +spl_debug_subsys2str(int subsys) +{ + switch (subsys) { + default: + return NULL; + case S_UNDEFINED: + return "undefined"; + case S_ATOMIC: + return "atomic"; + case S_KOBJ: + return "kobj"; + case S_VNODE: + return "vnode"; + case S_TIME: + return "time"; + case S_RWLOCK: + return "rwlock"; + case S_THREAD: + return "thread"; + case S_CONDVAR: + return "condvar"; + case S_MUTEX: + return "mutex"; + case S_RNG: + return "rng"; + case S_TASKQ: + return "taskq"; + case S_KMEM: + return "kmem"; + case S_DEBUG: + return "debug"; + case S_GENERIC: + return "generic"; + case S_PROC: + return "proc"; + case S_MODULE: + return "module"; + } +} + +const char * +spl_debug_dbg2str(int debug) +{ + switch (debug) { + default: + return NULL; + case D_TRACE: + return "trace"; + case D_INFO: + return "info"; + case D_WARNING: + return "warning"; + case D_ERROR: + return "error"; + case D_EMERG: + return "emerg"; + case D_CONSOLE: + return "console"; + case D_IOCTL: + return "ioctl"; + case D_DPRINTF: + return "dprintf"; + case D_OTHER: + return "other"; + } +} + +int +spl_debug_mask2str(char *str, int size, unsigned long mask, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str : + spl_debug_dbg2str; + const char *token; + int i, bit, len = 0; + + if (mask == 0) { /* "0" */ + if (size > 0) + str[0] = '0'; + len = 1; + } else { /* space-separated tokens */ + for (i = 0; i < 32; i++) { + bit = 1 << i; + + if ((mask & bit) == 0) + continue; + + token = fn(bit); + if (token == NULL) /* unused bit */ + continue; + + if (len > 0) { /* separator? */ + if (len < size) + str[len] = ' '; + len++; + } + + while (*token != 0) { + if (len < size) + str[len] = *token; + token++; + len++; + } + } + } + + /* terminate 'str' */ + if (len < size) + str[len] = 0; + else + str[size - 1] = 0; + + return len; +} + +static int +spl_debug_token2mask(int *mask, const char *str, int len, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str : + spl_debug_dbg2str; + const char *token; + int i, j, bit; + + /* match against known tokens */ + for (i = 0; i < 32; i++) { + bit = 1 << i; + + token = fn(bit); + if (token == NULL) /* unused? */ + continue; + + /* strcasecmp */ + for (j = 0; ; j++) { + if (j == len) { /* end of token */ + if (token[j] == 0) { + *mask = bit; + return 0; + } + break; + } + + if (token[j] == 0) + break; + + if (str[j] == token[j]) + continue; + + if (str[j] < 'A' || 'Z' < str[j]) + break; + + if (str[j] - 'A' + 'a' != token[j]) + break; + } + } + + return -EINVAL; /* no match */ +} + +int +spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys) +{ + char op = 0; + int m = 0, matched, n, t; + + /* Allow a number for backwards compatibility */ + for (n = strlen(str); n > 0; n--) + if (!isspace(str[n-1])) + break; + matched = n; + + if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && matched == n) { + *mask = m; + return 0; + } + + /* <str> must be a list of debug tokens or numbers separated by + * whitespace and optionally an operator ('+' or '-'). If an operator + * appears first in <str>, '*mask' is used as the starting point + * (relative), otherwise 0 is used (absolute). An operator applies to + * all following tokens up to the next operator. */ + matched = 0; + while (*str != 0) { + while (isspace(*str)) /* skip whitespace */ + str++; + + if (*str == 0) + break; + + if (*str == '+' || *str == '-') { + op = *str++; + + /* op on first token == relative */ + if (!matched) + m = *mask; + + while (isspace(*str)) /* skip whitespace */ + str++; + + if (*str == 0) /* trailing op */ + return -EINVAL; + } + + /* find token length */ + for (n = 0; str[n] != 0 && !isspace(str[n]); n++); + + /* match token */ + if (spl_debug_token2mask(&t, str, n, is_subsys) != 0) + return -EINVAL; + + matched = 1; + if (op == '-') + m &= ~t; + else + m |= t; + + str += n; + } + + if (!matched) + return -EINVAL; + + *mask = m; + return 0; +} + +static void +spl_debug_dumplog_internal(dumplog_priv_t *dp) +{ + void *journal_info; + + journal_info = current->journal_info; + current->journal_info = NULL; + + snprintf(spl_debug_file_name, sizeof(spl_debug_file_path) - 1, + "%s.%ld.%ld", spl_debug_file_path, + get_seconds(), (long)dp->dp_pid); + printk(KERN_ALERT "SPL: dumping log to %s\n", spl_debug_file_name); + spl_debug_dump_all_pages(dp, spl_debug_file_name); + + current->journal_info = journal_info; +} + +static int +spl_debug_dumplog_thread(void *arg) +{ + dumplog_priv_t *dp = (dumplog_priv_t *)arg; + + spl_debug_dumplog_internal(dp); + atomic_set(&dp->dp_done, 1); + wake_up(&dp->dp_waitq); + complete_and_exit(NULL, 0); + + return 0; /* Unreachable */ +} + +/* When flag is set do not use a new thread for the debug dump */ +int +spl_debug_dumplog(int flags) +{ + struct task_struct *tsk; + dumplog_priv_t dp; + + init_waitqueue_head(&dp.dp_waitq); + dp.dp_pid = current->pid; + dp.dp_flags = flags; + atomic_set(&dp.dp_done, 0); + + if (dp.dp_flags & DL_NOTHREAD) { + spl_debug_dumplog_internal(&dp); + } else { + + tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug"); + if (tsk == NULL) + return -ENOMEM; + + wake_up_process(tsk); + wait_event(dp.dp_waitq, atomic_read(&dp.dp_done)); + } + + return 0; +} +EXPORT_SYMBOL(spl_debug_dumplog); + +static char * +trace_get_console_buffer(void) +{ + int cpu = get_cpu(); + int idx; + + if (in_irq()) { + idx = 0; + } else if (in_softirq()) { + idx = 1; + } else { + idx = 2; + } + + return trace_console_buffers[cpu][idx]; +} + +static void +trace_put_console_buffer(char *buffer) +{ + put_cpu(); +} + +static int +trace_lock_tcd(struct trace_cpu_data *tcd) +{ + __ASSERT(tcd->tcd_type < TCD_TYPE_MAX); + + spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); + + return 1; +} + +static void +trace_unlock_tcd(struct trace_cpu_data *tcd) +{ + __ASSERT(tcd->tcd_type < TCD_TYPE_MAX); + + spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); +} + +static struct trace_cpu_data * +trace_get_tcd(void) +{ + int cpu; + struct trace_cpu_data *tcd; + + cpu = get_cpu(); + if (in_irq()) + tcd = &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd; + else if (in_softirq()) + tcd = &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd; + else + tcd = &(*trace_data[TCD_TYPE_PROC])[cpu].tcd; + + trace_lock_tcd(tcd); + + return tcd; +} + +static void +trace_put_tcd (struct trace_cpu_data *tcd) +{ + trace_unlock_tcd(tcd); + + put_cpu(); +} + +static void +trace_set_debug_header(struct spl_debug_header *header, int subsys, + int mask, const int line, unsigned long stack) +{ + struct timeval tv; + + do_gettimeofday(&tv); + + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = line; + + return; +} + +static void +trace_print_to_console(struct spl_debug_header *hdr, int mask, const char *buf, + int len, const char *file, const char *fn) +{ + char *prefix = "SPL", *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = "SPLError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "SPLError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "SPL"; + ptype = KERN_WARNING; + } else if ((mask & (D_CONSOLE | spl_debug_printk)) != 0) { + prefix = "SPL"; + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %.*s", ptype, prefix, len, buf); + } else { + printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, + hdr->ph_pid, hdr->ph_stack, file, + hdr->ph_line_num, fn, len, buf); + } + + return; +} + +static int +trace_max_debug_mb(void) +{ + return MAX(512, ((num_physpages >> (20 - PAGE_SHIFT)) * 80) / 100); +} + +static struct trace_page * +tage_alloc(int gfp) +{ + struct page *page; + struct trace_page *tage; + + page = alloc_pages(gfp | __GFP_NOWARN, 0); + if (page == NULL) + return NULL; + + tage = kmalloc(sizeof(*tage), gfp); + if (tage == NULL) { + __free_pages(page, 0); + return NULL; + } + + tage->page = page; + atomic_inc(&trace_tage_allocated); + + return tage; +} + +static void +tage_free(struct trace_page *tage) +{ + __ASSERT(tage != NULL); + __ASSERT(tage->page != NULL); + + __free_pages(tage->page, 0); + kfree(tage); + atomic_dec(&trace_tage_allocated); +} + +static struct trace_page * +tage_from_list(struct list_head *list) +{ + return list_entry(list, struct trace_page, linkage); +} + +static void +tage_to_tail(struct trace_page *tage, struct list_head *queue) +{ + __ASSERT(tage != NULL); + __ASSERT(queue != NULL); + + list_move_tail(&tage->linkage, queue); +} + +/* try to return a page that has 'len' bytes left at the end */ +static struct trace_page * +trace_get_tage_try(struct trace_cpu_data *tcd, unsigned long len) +{ + struct trace_page *tage; + + if (tcd->tcd_cur_pages > 0) { + __ASSERT(!list_empty(&tcd->tcd_pages)); + tage = tage_from_list(tcd->tcd_pages.prev); + if (tage->used + len <= PAGE_SIZE) + return tage; + } + + if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { + if (tcd->tcd_cur_stock_pages > 0) { + tage = tage_from_list(tcd->tcd_stock_pages.prev); + tcd->tcd_cur_stock_pages--; + list_del_init(&tage->linkage); + } else { + tage = tage_alloc(GFP_ATOMIC); + if (tage == NULL) { + printk(KERN_WARNING + "failure to allocate a tage (%ld)\n", + tcd->tcd_cur_pages); + return NULL; + } + } + + tage->used = 0; + tage->cpu = smp_processor_id(); + tage->type = tcd->tcd_type; + list_add_tail(&tage->linkage, &tcd->tcd_pages); + tcd->tcd_cur_pages++; + + return tage; + } + + return NULL; +} + +/* return a page that has 'len' bytes left at the end */ +static struct trace_page * +trace_get_tage(struct trace_cpu_data *tcd, unsigned long len) +{ + struct trace_page *tage; + + __ASSERT(len <= PAGE_SIZE); + + tage = trace_get_tage_try(tcd, len); + if (tage) + return tage; + + if (tcd->tcd_cur_pages > 0) { + tage = tage_from_list(tcd->tcd_pages.next); + tage->used = 0; + tage_to_tail(tage, &tcd->tcd_pages); + } + + return tage; +} + +int +spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask, + const char *file, const char *fn, const int line, + const char *format1, va_list args, const char *format2, ...) +{ + struct trace_cpu_data *tcd = NULL; + struct spl_debug_header header = { 0, }; + struct trace_page *tage; + /* string_buf is used only if tcd != NULL, and is always set then */ + char *string_buf = NULL; + char *debug_buf; + int known_size; + int needed = 85; /* average message length */ + int max_nob; + va_list ap; + int i; + int remain; + + if (strchr(file, '/')) + file = strrchr(file, '/') + 1; + + trace_set_debug_header(&header, subsys, mask, line, CDEBUG_STACK()); + + tcd = trace_get_tcd(); + if (tcd == NULL) + goto console; + + if (tcd->tcd_shutting_down) { + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + known_size = strlen(file) + 1; + if (fn) + known_size += strlen(fn) + 1; + + if (spl_debug_binary) + known_size += sizeof(header); + + /* '2' used because vsnprintf returns real size required for output + * _without_ terminating NULL. */ + for (i = 0; i < 2; i++) { + tage = trace_get_tage(tcd, needed + known_size + 1); + if (tage == NULL) { + if (needed + known_size > PAGE_SIZE) + mask |= D_ERROR; + + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + string_buf = (char *)page_address(tage->page) + + tage->used + known_size; + + max_nob = PAGE_SIZE - tage->used - known_size; + if (max_nob <= 0) { + printk(KERN_EMERG "negative max_nob: %i\n", max_nob); + mask |= D_ERROR; + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, max_nob, format1, ap); + va_end(ap); + } + + if (format2) { + remain = max_nob - needed; + if (remain < 0) + remain = 0; + + va_start(ap, format2); + needed += vsnprintf(string_buf+needed, remain, format2, ap); + va_end(ap); + } + + if (needed < max_nob) + break; + } + + if (unlikely(*(string_buf + needed - 1) != '\n')) + printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", + file, line, fn); + + header.ph_len = known_size + needed; + debug_buf = (char *)page_address(tage->page) + tage->used; + + if (spl_debug_binary) { + memcpy(debug_buf, &header, sizeof(header)); + tage->used += sizeof(header); + debug_buf += sizeof(header); + } + + strcpy(debug_buf, file); + tage->used += strlen(file) + 1; + debug_buf += strlen(file) + 1; + + if (fn) { + strcpy(debug_buf, fn); + tage->used += strlen(fn) + 1; + debug_buf += strlen(fn) + 1; + } + + __ASSERT(debug_buf == string_buf); + + tage->used += needed; + __ASSERT (tage->used <= PAGE_SIZE); + +console: + if ((mask & spl_debug_printk) == 0) { + /* no console output requested */ + if (tcd != NULL) + trace_put_tcd(tcd); + return 1; + } + + if (cdls != NULL) { + if (spl_console_ratelimit && cdls->cdls_next != 0 && + !time_before(cdls->cdls_next, jiffies)) { + /* skipping a console message */ + cdls->cdls_count++; + if (tcd != NULL) + trace_put_tcd(tcd); + return 1; + } + + if (time_before(cdls->cdls_next + spl_console_max_delay + + (10 * HZ), jiffies)) { + /* last timeout was a long time ago */ + cdls->cdls_delay /= spl_console_backoff * 4; + } else { + cdls->cdls_delay *= spl_console_backoff; + + if (cdls->cdls_delay < spl_console_min_delay) + cdls->cdls_delay = spl_console_min_delay; + else if (cdls->cdls_delay > spl_console_max_delay) + cdls->cdls_delay = spl_console_max_delay; + } + + /* ensure cdls_next is never zero after it's been seen */ + cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1; + } + + if (tcd != NULL) { + trace_print_to_console(&header, mask, string_buf, needed, file, fn); + trace_put_tcd(tcd); + } else { + string_buf = trace_get_console_buffer(); + + needed = 0; + if (format1 != NULL) { + va_copy(ap, args); + needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap); + va_end(ap); + } + if (format2 != NULL) { + remain = TRACE_CONSOLE_BUFFER_SIZE - needed; + if (remain > 0) { + va_start(ap, format2); + needed += vsnprintf(string_buf+needed, remain, format2, ap); + va_end(ap); + } + } + trace_print_to_console(&header, mask, + string_buf, needed, file, fn); + + trace_put_console_buffer(string_buf); + } + + if (cdls != NULL && cdls->cdls_count != 0) { + string_buf = trace_get_console_buffer(); + + needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, + "Skipped %d previous similar message%s\n", + cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); + + trace_print_to_console(&header, mask, + string_buf, needed, file, fn); + + trace_put_console_buffer(string_buf); + cdls->cdls_count = 0; + } + + return 0; +} +EXPORT_SYMBOL(spl_debug_vmsg); + +/* Do the collect_pages job on a single CPU: assumes that all other + * CPUs have been stopped during a panic. If this isn't true for + * some arch, this will have to be implemented separately in each arch. + */ +static void +collect_pages_from_single_cpu(struct page_collection *pc) +{ + struct trace_cpu_data *tcd; + int i, j; + + tcd_for_each(tcd, i, j) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + } +} + +static void +collect_pages_on_all_cpus(struct page_collection *pc) +{ + struct trace_cpu_data *tcd; + int i, cpu; + + spin_lock(&pc->pc_lock); + for_each_possible_cpu(cpu) { + tcd_for_each_type_lock(tcd, i, cpu) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + } + } + spin_unlock(&pc->pc_lock); +} + +static void +collect_pages(dumplog_priv_t *dp, struct page_collection *pc) +{ + INIT_LIST_HEAD(&pc->pc_pages); + + if (spl_panic_in_progress || dp->dp_flags & DL_SINGLE_CPU) + collect_pages_from_single_cpu(pc); + else + collect_pages_on_all_cpus(pc); +} + +static void +put_pages_back_on_all_cpus(struct page_collection *pc) +{ + struct trace_cpu_data *tcd; + struct list_head *cur_head; + struct trace_page *tage; + struct trace_page *tmp; + int i, cpu; + + spin_lock(&pc->pc_lock); + + for_each_possible_cpu(cpu) { + tcd_for_each_type_lock(tcd, i, cpu) { + cur_head = tcd->tcd_pages.next; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, + linkage) { + + __ASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != cpu || tage->type != i) + continue; + + tage_to_tail(tage, cur_head); + tcd->tcd_cur_pages++; + } + } + } + + spin_unlock(&pc->pc_lock); +} + +static void +put_pages_back(struct page_collection *pc) +{ + if (!spl_panic_in_progress) + put_pages_back_on_all_cpus(pc); +} + +static struct file * +trace_filp_open (const char *name, int flags, int mode, int *err) +{ + struct file *filp = NULL; + int rc; + + filp = filp_open(name, flags, mode); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + printk(KERN_ERR "SPL: Can't open %s file: %d\n", name, rc); + if (err) + *err = rc; + filp = NULL; + } + return filp; +} + +#define trace_filp_write(fp, b, s, p) (fp)->f_op->write((fp), (b), (s), p) +#define trace_filp_fsync(fp) (fp)->f_op->fsync((fp),(fp)->f_dentry,1) +#define trace_filp_close(f) filp_close(f, NULL) +#define trace_filp_poff(f) (&(f)->f_pos) + +static int +spl_debug_dump_all_pages(dumplog_priv_t *dp, char *filename) +{ + struct page_collection pc; + struct file *filp; + struct trace_page *tage; + struct trace_page *tmp; + mm_segment_t oldfs; + int rc = 0; + + down_write(&trace_sem); + + filp = trace_filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, + 0600, &rc); + if (filp == NULL) { + if (rc != -EEXIST) + printk(KERN_ERR "SPL: Can't open %s for dump: %d\n", + filename, rc); + goto out; + } + + spin_lock_init(&pc.pc_lock); + collect_pages(dp, &pc); + if (list_empty(&pc.pc_pages)) { + rc = 0; + goto close; + } + + oldfs = get_fs(); + set_fs(get_ds()); + + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __ASSERT_TAGE_INVARIANT(tage); + + rc = trace_filp_write(filp, page_address(tage->page), + tage->used, trace_filp_poff(filp)); + if (rc != (int)tage->used) { + printk(KERN_WARNING "SPL: Wanted to write %u " + "but wrote %d\n", tage->used, rc); + put_pages_back(&pc); + __ASSERT(list_empty(&pc.pc_pages)); + break; + } + list_del(&tage->linkage); + tage_free(tage); + } + + set_fs(oldfs); + + rc = trace_filp_fsync(filp); + if (rc) + printk(KERN_ERR "SPL: Unable to sync: %d\n", rc); + close: + trace_filp_close(filp); + out: + up_write(&trace_sem); + + return rc; +} + +static void +spl_debug_flush_pages(void) +{ + dumplog_priv_t dp; + struct page_collection pc; + struct trace_page *tage; + struct trace_page *tmp; + + spin_lock_init(&pc.pc_lock); + init_waitqueue_head(&dp.dp_waitq); + dp.dp_pid = current->pid; + dp.dp_flags = 0; + atomic_set(&dp.dp_done, 0); + + collect_pages(&dp, &pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __ASSERT_TAGE_INVARIANT(tage); + list_del(&tage->linkage); + tage_free(tage); + } +} + +unsigned long +spl_debug_set_mask(unsigned long mask) { + spl_debug_mask = mask; + return 0; +} +EXPORT_SYMBOL(spl_debug_set_mask); + +unsigned long +spl_debug_get_mask(void) { + return spl_debug_mask; +} +EXPORT_SYMBOL(spl_debug_get_mask); + +unsigned long +spl_debug_set_subsys(unsigned long subsys) { + spl_debug_subsys = subsys; + return 0; +} +EXPORT_SYMBOL(spl_debug_set_subsys); + +unsigned long +spl_debug_get_subsys(void) { + return spl_debug_subsys; +} +EXPORT_SYMBOL(spl_debug_get_subsys); + +int +spl_debug_set_mb(int mb) +{ + int i, j, pages; + int limit = trace_max_debug_mb(); + struct trace_cpu_data *tcd; + + if (mb < num_possible_cpus()) { + printk(KERN_ERR "SPL: Refusing to set debug buffer size to " + "%dMB - lower limit is %d\n", mb, num_possible_cpus()); + return -EINVAL; + } + + if (mb > limit) { + printk(KERN_ERR "SPL: Refusing to set debug buffer size to " + "%dMB - upper limit is %d\n", mb, limit); + return -EINVAL; + } + + mb /= num_possible_cpus(); + pages = mb << (20 - PAGE_SHIFT); + + down_write(&trace_sem); + + tcd_for_each(tcd, i, j) + tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; + + up_write(&trace_sem); + + return 0; +} +EXPORT_SYMBOL(spl_debug_set_mb); + +int +spl_debug_get_mb(void) +{ + int i, j; + struct trace_cpu_data *tcd; + int total_pages = 0; + + down_read(&trace_sem); + + tcd_for_each(tcd, i, j) + total_pages += tcd->tcd_max_pages; + + up_read(&trace_sem); + + return (total_pages >> (20 - PAGE_SHIFT)) + 1; +} +EXPORT_SYMBOL(spl_debug_get_mb); + +void spl_debug_dumpstack(struct task_struct *tsk) +{ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + + printk(KERN_ERR "SPL: Showing stack for process %d\n", tsk->pid); + dump_stack(); +} +EXPORT_SYMBOL(spl_debug_dumpstack); + +void spl_debug_bug(char *file, const char *func, const int line, int flags) +{ + spl_debug_catastrophe = 1; + spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SBUG\n"); + + if (in_interrupt()) { + panic("SBUG in interrupt.\n"); + /* not reached */ + } + + /* Ensure all debug pages and dumped by current cpu */ + if (spl_debug_panic_on_bug) + spl_panic_in_progress = 1; + +#ifdef DEBUG + spl_debug_dumpstack(NULL); + spl_debug_dumplog(flags); +#endif + + if (spl_debug_panic_on_bug) + panic("SBUG"); + + set_task_state(current, TASK_UNINTERRUPTIBLE); + while (1) + schedule(); +} +EXPORT_SYMBOL(spl_debug_bug); + +int +spl_debug_clear_buffer(void) +{ + spl_debug_flush_pages(); + return 0; +} +EXPORT_SYMBOL(spl_debug_clear_buffer); + +int +spl_debug_mark_buffer(char *text) +{ + CDEBUG(D_WARNING, "*************************************\n"); + CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); + CDEBUG(D_WARNING, "*************************************\n"); + + return 0; +} +EXPORT_SYMBOL(spl_debug_mark_buffer); + +static int +trace_init(int max_pages) +{ + struct trace_cpu_data *tcd; + int i, j; + + init_rwsem(&trace_sem); + + /* initialize trace_data */ + memset(trace_data, 0, sizeof(trace_data)); + for (i = 0; i < TCD_TYPE_MAX; i++) { + trace_data[i] = kmalloc(sizeof(union trace_data_union) * + NR_CPUS, GFP_KERNEL); + if (trace_data[i] == NULL) + goto out; + } + + tcd_for_each(tcd, i, j) { + spin_lock_init(&tcd->tcd_lock); + tcd->tcd_pages_factor = pages_factor[i]; + tcd->tcd_type = i; + tcd->tcd_cpu = j; + INIT_LIST_HEAD(&tcd->tcd_pages); + INIT_LIST_HEAD(&tcd->tcd_stock_pages); + tcd->tcd_cur_pages = 0; + tcd->tcd_cur_stock_pages = 0; + tcd->tcd_max_pages = (max_pages * pages_factor[i]) / 100; + tcd->tcd_shutting_down = 0; + } + + for (i = 0; i < num_possible_cpus(); i++) { + for (j = 0; j < 3; j++) { + trace_console_buffers[i][j] = + kmalloc(TRACE_CONSOLE_BUFFER_SIZE, + GFP_KERNEL); + + if (trace_console_buffers[i][j] == NULL) + goto out; + } + } + + return 0; +out: + trace_fini(); + printk(KERN_ERR "SPL: Insufficient memory for debug logs\n"); + return -ENOMEM; +} + +int +debug_init(void) +{ + int rc, max = spl_debug_mb; + + spl_console_max_delay = SPL_DEFAULT_MAX_DELAY; + spl_console_min_delay = SPL_DEFAULT_MIN_DELAY; + + /* If spl_debug_mb is set to an invalid value or uninitialized + * then just make the total buffers smp_num_cpus TCD_MAX_PAGES */ + if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || + max >= 512 || max < 0) { + max = TCD_MAX_PAGES; + } else { + max = (max / num_online_cpus()) << (20 - PAGE_SHIFT); + } + + rc = trace_init(max); + if (rc) + return rc; + + return rc; +} + +static void +trace_cleanup_on_all_cpus(void) +{ + struct trace_cpu_data *tcd; + struct trace_page *tage; + struct trace_page *tmp; + int i, cpu; + + for_each_possible_cpu(cpu) { + tcd_for_each_type_lock(tcd, i, cpu) { + tcd->tcd_shutting_down = 1; + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, + linkage) { + __ASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + tage_free(tage); + } + tcd->tcd_cur_pages = 0; + } + } +} + +static void +trace_fini(void) +{ + int i, j; + + trace_cleanup_on_all_cpus(); + + for (i = 0; i < num_possible_cpus(); i++) { + for (j = 0; j < 3; j++) { + if (trace_console_buffers[i][j] != NULL) { + kfree(trace_console_buffers[i][j]); + trace_console_buffers[i][j] = NULL; + } + } + } + + for (i = 0; trace_data[i] != NULL; i++) { + kfree(trace_data[i]); + trace_data[i] = NULL; + } +} + +void +debug_fini(void) +{ + trace_fini(); +} diff --git a/module/spl/spl-err.c b/module/spl/spl-err.c new file mode 100644 index 000000000..c4508dfa2 --- /dev/null +++ b/module/spl/spl-err.c @@ -0,0 +1,78 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/sysmacros.h> +#include <sys/cmn_err.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_GENERIC + +#ifndef NDEBUG +static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; +static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; +#endif + +void +vpanic(const char *fmt, va_list ap) +{ + char msg[MAXMSGLEN]; + + vsnprintf(msg, MAXMSGLEN - 1, fmt, ap); + panic(msg); +} /* vpanic() */ +EXPORT_SYMBOL(vpanic); + +void +cmn_err(int ce, const char *fmt, ...) +{ + char msg[MAXMSGLEN]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(msg, MAXMSGLEN - 1, fmt, ap); + va_end(ap); + + CERROR("%s", msg); +} /* cmn_err() */ +EXPORT_SYMBOL(cmn_err); + +void +vcmn_err(int ce, const char *fmt, va_list ap) +{ + char msg[MAXMSGLEN]; + + if (ce == CE_PANIC) + vpanic(fmt, ap); + + if (ce != CE_NOTE) { /* suppress noise in stress testing */ + vsnprintf(msg, MAXMSGLEN - 1, fmt, ap); + CERROR("%s%s%s", ce_prefix[ce], msg, ce_suffix[ce]); + } +} /* vcmn_err() */ +EXPORT_SYMBOL(vcmn_err); diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c new file mode 100644 index 000000000..c09d9d4e3 --- /dev/null +++ b/module/spl/spl-generic.c @@ -0,0 +1,328 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/sysmacros.h> +#include <sys/vmsystm.h> +#include <sys/vnode.h> +#include <sys/kmem.h> +#include <sys/mutex.h> +#include <sys/taskq.h> +#include <sys/debug.h> +#include <sys/proc.h> +#include <sys/kstat.h> +#include <sys/utsname.h> +#include <linux/kmod.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_GENERIC + +char spl_version[16] = "SPL v" VERSION; + +long spl_hostid = 0; +EXPORT_SYMBOL(spl_hostid); + +char hw_serial[11] = "<none>"; +EXPORT_SYMBOL(hw_serial); + +int p0 = 0; +EXPORT_SYMBOL(p0); + +vmem_t *zio_alloc_arena = NULL; +EXPORT_SYMBOL(zio_alloc_arena); + +int +highbit(unsigned long i) +{ + register int h = 1; + ENTRY; + + if (i == 0) + RETURN(0); +#if BITS_PER_LONG == 64 + if (i & 0xffffffff00000000ul) { + h += 32; i >>= 32; + } +#endif + if (i & 0xffff0000) { + h += 16; i >>= 16; + } + if (i & 0xff00) { + h += 8; i >>= 8; + } + if (i & 0xf0) { + h += 4; i >>= 4; + } + if (i & 0xc) { + h += 2; i >>= 2; + } + if (i & 0x2) { + h += 1; + } + RETURN(h); +} +EXPORT_SYMBOL(highbit); + +/* + * Implementation of 64 bit division for 32-bit machines. + */ +#if BITS_PER_LONG == 32 +uint64_t __udivdi3(uint64_t dividend, uint64_t divisor) +{ +#ifdef HAVE_DIV64_64 + return div64_64(dividend, divisor); +#else + /* Taken from a 2.6.24 kernel. */ + uint32_t high, d; + + high = divisor >> 32; + if (high) { + unsigned int shift = fls(high); + + d = divisor >> shift; + dividend >>= shift; + } else + d = divisor; + + do_div(dividend, d); + + return dividend; +#endif +} +EXPORT_SYMBOL(__udivdi3); + +/* + * Implementation of 64 bit modulo for 32-bit machines. + */ +uint64_t __umoddi3(uint64_t dividend, uint64_t divisor) +{ + return dividend - divisor * (dividend / divisor); +} +EXPORT_SYMBOL(__umoddi3); +#endif + +/* NOTE: The strtoxx behavior is solely based on my reading of the Solaris + * ddi_strtol(9F) man page. I have not verified the behavior of these + * functions against their Solaris counterparts. It is possible that I + * may have misinterpretted the man page or the man page is incorrect. + */ +int ddi_strtoul(const char *, char **, int, unsigned long *); +int ddi_strtol(const char *, char **, int, long *); +int ddi_strtoull(const char *, char **, int, unsigned long long *); +int ddi_strtoll(const char *, char **, int, long long *); + +#define define_ddi_strtoux(type, valtype) \ +int ddi_strtou##type(const char *str, char **endptr, \ + int base, valtype *result) \ +{ \ + valtype last_value, value = 0; \ + char *ptr = (char *)str; \ + int flag = 1, digit; \ + \ + if (strlen(ptr) == 0) \ + return EINVAL; \ + \ + /* Auto-detect base based on prefix */ \ + if (!base) { \ + if (str[0] == '0') { \ + if (tolower(str[1])=='x' && isxdigit(str[2])) { \ + base = 16; /* hex */ \ + ptr += 2; \ + } else if (str[1] >= '0' && str[1] < 8) { \ + base = 8; /* octal */ \ + ptr += 1; \ + } else { \ + return EINVAL; \ + } \ + } else { \ + base = 10; /* decimal */ \ + } \ + } \ + \ + while (1) { \ + if (isdigit(*ptr)) \ + digit = *ptr - '0'; \ + else if (isalpha(*ptr)) \ + digit = tolower(*ptr) - 'a' + 10; \ + else \ + break; \ + \ + if (digit >= base) \ + break; \ + \ + last_value = value; \ + value = value * base + digit; \ + if (last_value > value) /* Overflow */ \ + return ERANGE; \ + \ + flag = 1; \ + ptr++; \ + } \ + \ + if (flag) \ + *result = value; \ + \ + if (endptr) \ + *endptr = (char *)(flag ? ptr : str); \ + \ + return 0; \ +} \ + +#define define_ddi_strtox(type, valtype) \ +int ddi_strto##type(const char *str, char **endptr, \ + int base, valtype *result) \ +{ \ + int rc; \ + \ + if (*str == '-') { \ + rc = ddi_strtou##type(str + 1, endptr, base, result); \ + if (!rc) { \ + if (*endptr == str + 1) \ + *endptr = (char *)str; \ + else \ + *result = -*result; \ + } \ + } else { \ + rc = ddi_strtou##type(str, endptr, base, result); \ + } \ + \ + return rc; \ +} + +define_ddi_strtoux(l, unsigned long) +define_ddi_strtox(l, long) +define_ddi_strtoux(ll, unsigned long long) +define_ddi_strtox(ll, long long) + +EXPORT_SYMBOL(ddi_strtoul); +EXPORT_SYMBOL(ddi_strtol); +EXPORT_SYMBOL(ddi_strtoll); +EXPORT_SYMBOL(ddi_strtoull); + +struct new_utsname *__utsname(void) +{ +#ifdef HAVE_INIT_UTSNAME + return init_utsname(); +#else + return &system_utsname; +#endif +} +EXPORT_SYMBOL(__utsname); + +static int +set_hostid(void) +{ + char sh_path[] = "/bin/sh"; + char *argv[] = { sh_path, + "-c", + "/usr/bin/hostid >/proc/sys/kernel/spl/hostid", + NULL }; + char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + + /* Doing address resolution in the kernel is tricky and just + * not a good idea in general. So to set the proper 'hw_serial' + * use the usermodehelper support to ask '/bin/sh' to run + * '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid + * for us to use. It's a horific solution but it will do for now. + */ + return call_usermodehelper(sh_path, argv, envp, 1); +} + +static int __init spl_init(void) +{ + int rc = 0; + + if ((rc = debug_init())) + return rc; + + if ((rc = spl_kmem_init())) + GOTO(out , rc); + + if ((rc = spl_mutex_init())) + GOTO(out2 , rc); + + if ((rc = spl_taskq_init())) + GOTO(out3, rc); + + if ((rc = vn_init())) + GOTO(out4, rc); + + if ((rc = proc_init())) + GOTO(out5, rc); + + if ((rc = kstat_init())) + GOTO(out6, rc); + + if ((rc = set_hostid())) + GOTO(out7, rc = -EADDRNOTAVAIL); + + printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION); + RETURN(rc); +out7: + kstat_fini(); +out6: + proc_fini(); +out5: + vn_fini(); +out4: + spl_taskq_fini(); +out3: + spl_mutex_fini(); +out2: + spl_kmem_fini(); +out: + debug_fini(); + + printk("SPL: Failed to Load Solaris Porting Layer v%s, " + "rc = %d\n", VERSION, rc); + return rc; +} + +static void spl_fini(void) +{ + ENTRY; + + printk("SPL: Unloaded Solaris Porting Layer v%s\n", VERSION); + kstat_fini(); + proc_fini(); + vn_fini(); + spl_taskq_fini(); + spl_mutex_fini(); + spl_kmem_fini(); + debug_fini(); +} + +module_init(spl_init); +module_exit(spl_fini); + +MODULE_AUTHOR("Lawrence Livermore National Labs"); +MODULE_DESCRIPTION("Solaris Porting Layer"); +MODULE_LICENSE("GPL"); diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c new file mode 100644 index 000000000..e580fbbdd --- /dev/null +++ b/module/spl/spl-kmem.c @@ -0,0 +1,1452 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/kmem.h> + +#ifdef DEBUG_SUBSYSTEM +# undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_KMEM + +/* + * Memory allocation interfaces and debugging for basic kmem_* + * and vmem_* style memory allocation. When DEBUG_KMEM is enable + * all allocations will be tracked when they are allocated and + * freed. When the SPL module is unload a list of all leaked + * addresses and where they were allocated will be dumped to the + * console. Enabling this feature has a significant impant on + * performance but it makes finding memory leaks staight forward. + */ +#ifdef DEBUG_KMEM +/* Shim layer memory accounting */ +atomic64_t kmem_alloc_used = ATOMIC64_INIT(0); +unsigned long long kmem_alloc_max = 0; +atomic64_t vmem_alloc_used = ATOMIC64_INIT(0); +unsigned long long vmem_alloc_max = 0; +int kmem_warning_flag = 1; + +EXPORT_SYMBOL(kmem_alloc_used); +EXPORT_SYMBOL(kmem_alloc_max); +EXPORT_SYMBOL(vmem_alloc_used); +EXPORT_SYMBOL(vmem_alloc_max); +EXPORT_SYMBOL(kmem_warning_flag); + +# ifdef DEBUG_KMEM_TRACKING + +/* XXX - Not to surprisingly with debugging enabled the xmem_locks are very + * highly contended particularly on xfree(). If we want to run with this + * detailed debugging enabled for anything other than debugging we need to + * minimize the contention by moving to a lock per xmem_table entry model. + */ + +# define KMEM_HASH_BITS 10 +# define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS) + +# define VMEM_HASH_BITS 10 +# define VMEM_TABLE_SIZE (1 << VMEM_HASH_BITS) + +typedef struct kmem_debug { + struct hlist_node kd_hlist; /* Hash node linkage */ + struct list_head kd_list; /* List of all allocations */ + void *kd_addr; /* Allocation pointer */ + size_t kd_size; /* Allocation size */ + const char *kd_func; /* Allocation function */ + int kd_line; /* Allocation line */ +} kmem_debug_t; + +spinlock_t kmem_lock; +struct hlist_head kmem_table[KMEM_TABLE_SIZE]; +struct list_head kmem_list; + +spinlock_t vmem_lock; +struct hlist_head vmem_table[VMEM_TABLE_SIZE]; +struct list_head vmem_list; + +EXPORT_SYMBOL(kmem_lock); +EXPORT_SYMBOL(kmem_table); +EXPORT_SYMBOL(kmem_list); + +EXPORT_SYMBOL(vmem_lock); +EXPORT_SYMBOL(vmem_table); +EXPORT_SYMBOL(vmem_list); +# endif + +int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); } +#else +int kmem_set_warning(int flag) { return 0; } +#endif +EXPORT_SYMBOL(kmem_set_warning); + +/* + * Slab allocation interfaces + * + * While the Linux slab implementation was inspired by the Solaris + * implemenation I cannot use it to emulate the Solaris APIs. I + * require two features which are not provided by the Linux slab. + * + * 1) Constructors AND destructors. Recent versions of the Linux + * kernel have removed support for destructors. This is a deal + * breaker for the SPL which contains particularly expensive + * initializers for mutex's, condition variables, etc. We also + * require a minimal level of cleanup for these data types unlike + * many Linux data type which do need to be explicitly destroyed. + * + * 2) Virtual address space backed slab. Callers of the Solaris slab + * expect it to work well for both small are very large allocations. + * Because of memory fragmentation the Linux slab which is backed + * by kmalloc'ed memory performs very badly when confronted with + * large numbers of large allocations. Basing the slab on the + * virtual address space removes the need for contigeous pages + * and greatly improve performance for large allocations. + * + * For these reasons, the SPL has its own slab implementation with + * the needed features. It is not as highly optimized as either the + * Solaris or Linux slabs, but it should get me most of what is + * needed until it can be optimized or obsoleted by another approach. + * + * One serious concern I do have about this method is the relatively + * small virtual address space on 32bit arches. This will seriously + * constrain the size of the slab caches and their performance. + * + * XXX: Implement work requests to keep an eye on each cache and + * shrink them via spl_slab_reclaim() when they are wasting lots + * of space. Currently this process is driven by the reapers. + * + * XXX: Improve the partial slab list by carefully maintaining a + * strict ordering of fullest to emptiest slabs based on + * the slab reference count. This gaurentees the when freeing + * slabs back to the system we need only linearly traverse the + * last N slabs in the list to discover all the freeable slabs. + * + * XXX: NUMA awareness for optionally allocating memory close to a + * particular core. This can be adventageous if you know the slab + * object will be short lived and primarily accessed from one core. + * + * XXX: Slab coloring may also yield performance improvements and would + * be desirable to implement. + * + * XXX: Proper hardware cache alignment would be good too. + */ + +struct list_head spl_kmem_cache_list; /* List of caches */ +struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ + +static int spl_cache_flush(spl_kmem_cache_t *skc, + spl_kmem_magazine_t *skm, int flush); + +#ifdef HAVE_SET_SHRINKER +static struct shrinker *spl_kmem_cache_shrinker; +#else +static int spl_kmem_cache_generic_shrinker(int nr_to_scan, + unsigned int gfp_mask); +static struct shrinker spl_kmem_cache_shrinker = { + .shrink = spl_kmem_cache_generic_shrinker, + .seeks = KMC_DEFAULT_SEEKS, +}; +#endif + +#ifdef DEBUG_KMEM +# ifdef DEBUG_KMEM_TRACKING + +static kmem_debug_t * +kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, + void *addr) +{ + struct hlist_head *head; + struct hlist_node *node; + struct kmem_debug *p; + unsigned long flags; + ENTRY; + + spin_lock_irqsave(lock, flags); + + head = &table[hash_ptr(addr, bits)]; + hlist_for_each_entry_rcu(p, node, head, kd_hlist) { + if (p->kd_addr == addr) { + hlist_del_init(&p->kd_hlist); + list_del_init(&p->kd_list); + spin_unlock_irqrestore(lock, flags); + return p; + } + } + + spin_unlock_irqrestore(lock, flags); + + RETURN(NULL); +} + +void * +kmem_alloc_track(size_t size, int flags, const char *func, int line, + int node_alloc, int node) +{ + void *ptr = NULL; + kmem_debug_t *dptr; + unsigned long irq_flags; + ENTRY; + + dptr = (kmem_debug_t *) kmalloc(sizeof(kmem_debug_t), + flags & ~__GFP_ZERO); + + if (dptr == NULL) { + CWARN("kmem_alloc(%ld, 0x%x) debug failed\n", + sizeof(kmem_debug_t), flags); + } else { + /* Marked unlikely because we should never be doing this, + * we tolerate to up 2 pages but a single page is best. */ + if (unlikely((size) > (PAGE_SIZE * 2)) && kmem_warning_flag) + CWARN("Large kmem_alloc(%llu, 0x%x) (%lld/%llu)\n", + (unsigned long long) size, flags, + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + + /* We use kstrdup() below because the string pointed to by + * __FUNCTION__ might not be available by the time we want + * to print it since the module might have been unloaded. */ + dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO); + if (unlikely(dptr->kd_func == NULL)) { + kfree(dptr); + CWARN("kstrdup() failed in kmem_alloc(%llu, 0x%x) " + "(%lld/%llu)\n", (unsigned long long) size, flags, + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + goto out; + } + + /* Use the correct allocator */ + if (node_alloc) { + ASSERT(!(flags & __GFP_ZERO)); + ptr = kmalloc_node(size, flags, node); + } else if (flags & __GFP_ZERO) { + ptr = kzalloc(size, flags & ~__GFP_ZERO); + } else { + ptr = kmalloc(size, flags); + } + + if (unlikely(ptr == NULL)) { + kfree(dptr->kd_func); + kfree(dptr); + CWARN("kmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n", + (unsigned long long) size, flags, + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + goto out; + } + + atomic64_add(size, &kmem_alloc_used); + if (unlikely(atomic64_read(&kmem_alloc_used) > + kmem_alloc_max)) + kmem_alloc_max = + atomic64_read(&kmem_alloc_used); + + INIT_HLIST_NODE(&dptr->kd_hlist); + INIT_LIST_HEAD(&dptr->kd_list); + + dptr->kd_addr = ptr; + dptr->kd_size = size; + dptr->kd_line = line; + + spin_lock_irqsave(&kmem_lock, irq_flags); + hlist_add_head_rcu(&dptr->kd_hlist, + &kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]); + list_add_tail(&dptr->kd_list, &kmem_list); + spin_unlock_irqrestore(&kmem_lock, irq_flags); + + CDEBUG_LIMIT(D_INFO, "kmem_alloc(%llu, 0x%x) = %p " + "(%lld/%llu)\n", (unsigned long long) size, flags, + ptr, atomic64_read(&kmem_alloc_used), + kmem_alloc_max); + } +out: + RETURN(ptr); +} +EXPORT_SYMBOL(kmem_alloc_track); + +void +kmem_free_track(void *ptr, size_t size) +{ + kmem_debug_t *dptr; + ENTRY; + + ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr, + (unsigned long long) size); + + dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr); + + ASSERT(dptr); /* Must exist in hash due to kmem_alloc() */ + + /* Size must match */ + ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), " + "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size, + (unsigned long long) size, dptr->kd_func, dptr->kd_line); + + atomic64_sub(size, &kmem_alloc_used); + + CDEBUG_LIMIT(D_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr, + (unsigned long long) size, atomic64_read(&kmem_alloc_used), + kmem_alloc_max); + + kfree(dptr->kd_func); + + memset(dptr, 0x5a, sizeof(kmem_debug_t)); + kfree(dptr); + + memset(ptr, 0x5a, size); + kfree(ptr); + + EXIT; +} +EXPORT_SYMBOL(kmem_free_track); + +void * +vmem_alloc_track(size_t size, int flags, const char *func, int line) +{ + void *ptr = NULL; + kmem_debug_t *dptr; + unsigned long irq_flags; + ENTRY; + + ASSERT(flags & KM_SLEEP); + + dptr = (kmem_debug_t *) kmalloc(sizeof(kmem_debug_t), flags); + if (dptr == NULL) { + CWARN("vmem_alloc(%ld, 0x%x) debug failed\n", + sizeof(kmem_debug_t), flags); + } else { + /* We use kstrdup() below because the string pointed to by + * __FUNCTION__ might not be available by the time we want + * to print it, since the module might have been unloaded. */ + dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO); + if (unlikely(dptr->kd_func == NULL)) { + kfree(dptr); + CWARN("kstrdup() failed in vmem_alloc(%llu, 0x%x) " + "(%lld/%llu)\n", (unsigned long long) size, flags, + atomic64_read(&vmem_alloc_used), vmem_alloc_max); + goto out; + } + + ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO, + PAGE_KERNEL); + + if (unlikely(ptr == NULL)) { + kfree(dptr->kd_func); + kfree(dptr); + CWARN("vmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n", + (unsigned long long) size, flags, + atomic64_read(&vmem_alloc_used), vmem_alloc_max); + goto out; + } + + if (flags & __GFP_ZERO) + memset(ptr, 0, size); + + atomic64_add(size, &vmem_alloc_used); + if (unlikely(atomic64_read(&vmem_alloc_used) > + vmem_alloc_max)) + vmem_alloc_max = + atomic64_read(&vmem_alloc_used); + + INIT_HLIST_NODE(&dptr->kd_hlist); + INIT_LIST_HEAD(&dptr->kd_list); + + dptr->kd_addr = ptr; + dptr->kd_size = size; + dptr->kd_line = line; + + spin_lock_irqsave(&vmem_lock, irq_flags); + hlist_add_head_rcu(&dptr->kd_hlist, + &vmem_table[hash_ptr(ptr, VMEM_HASH_BITS)]); + list_add_tail(&dptr->kd_list, &vmem_list); + spin_unlock_irqrestore(&vmem_lock, irq_flags); + + CDEBUG_LIMIT(D_INFO, "vmem_alloc(%llu, 0x%x) = %p " + "(%lld/%llu)\n", (unsigned long long) size, flags, + ptr, atomic64_read(&vmem_alloc_used), + vmem_alloc_max); + } +out: + RETURN(ptr); +} +EXPORT_SYMBOL(vmem_alloc_track); + +void +vmem_free_track(void *ptr, size_t size) +{ + kmem_debug_t *dptr; + ENTRY; + + ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr, + (unsigned long long) size); + + dptr = kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr); + ASSERT(dptr); /* Must exist in hash due to vmem_alloc() */ + + /* Size must match */ + ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), " + "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size, + (unsigned long long) size, dptr->kd_func, dptr->kd_line); + + atomic64_sub(size, &vmem_alloc_used); + CDEBUG_LIMIT(D_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr, + (unsigned long long) size, atomic64_read(&vmem_alloc_used), + vmem_alloc_max); + + kfree(dptr->kd_func); + + memset(dptr, 0x5a, sizeof(kmem_debug_t)); + kfree(dptr); + + memset(ptr, 0x5a, size); + vfree(ptr); + + EXIT; +} +EXPORT_SYMBOL(vmem_free_track); + +# else /* DEBUG_KMEM_TRACKING */ + +void * +kmem_alloc_debug(size_t size, int flags, const char *func, int line, + int node_alloc, int node) +{ + void *ptr; + ENTRY; + + /* Marked unlikely because we should never be doing this, + * we tolerate to up 2 pages but a single page is best. */ + if (unlikely(size > (PAGE_SIZE * 2)) && kmem_warning_flag) + CWARN("Large kmem_alloc(%llu, 0x%x) (%lld/%llu)\n", + (unsigned long long) size, flags, + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + + /* Use the correct allocator */ + if (node_alloc) { + ASSERT(!(flags & __GFP_ZERO)); + ptr = kmalloc_node(size, flags, node); + } else if (flags & __GFP_ZERO) { + ptr = kzalloc(size, flags & (~__GFP_ZERO)); + } else { + ptr = kmalloc(size, flags); + } + + if (ptr == NULL) { + CWARN("kmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n", + (unsigned long long) size, flags, + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + } else { + atomic64_add(size, &kmem_alloc_used); + if (unlikely(atomic64_read(&kmem_alloc_used) > kmem_alloc_max)) + kmem_alloc_max = atomic64_read(&kmem_alloc_used); + + CDEBUG_LIMIT(D_INFO, "kmem_alloc(%llu, 0x%x) = %p " + "(%lld/%llu)\n", (unsigned long long) size, flags, ptr, + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + } + RETURN(ptr); +} +EXPORT_SYMBOL(kmem_alloc_debug); + +void +kmem_free_debug(void *ptr, size_t size) +{ + ENTRY; + + ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr, + (unsigned long long) size); + + atomic64_sub(size, &kmem_alloc_used); + + CDEBUG_LIMIT(D_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr, + (unsigned long long) size, atomic64_read(&kmem_alloc_used), + kmem_alloc_max); + + memset(ptr, 0x5a, size); + kfree(ptr); + + EXIT; +} +EXPORT_SYMBOL(kmem_free_debug); + +void * +vmem_alloc_debug(size_t size, int flags, const char *func, int line) +{ + void *ptr; + ENTRY; + + ASSERT(flags & KM_SLEEP); + + ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO, + PAGE_KERNEL); + if (ptr == NULL) { + CWARN("vmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n", + (unsigned long long) size, flags, + atomic64_read(&vmem_alloc_used), vmem_alloc_max); + } else { + if (flags & __GFP_ZERO) + memset(ptr, 0, size); + + atomic64_add(size, &vmem_alloc_used); + + if (unlikely(atomic64_read(&vmem_alloc_used) > vmem_alloc_max)) + vmem_alloc_max = atomic64_read(&vmem_alloc_used); + + CDEBUG_LIMIT(D_INFO, "vmem_alloc(%llu, 0x%x) = %p " + "(%lld/%llu)\n", (unsigned long long) size, flags, ptr, + atomic64_read(&vmem_alloc_used), vmem_alloc_max); + } + + RETURN(ptr); +} +EXPORT_SYMBOL(vmem_alloc_debug); + +void +vmem_free_debug(void *ptr, size_t size) +{ + ENTRY; + + ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr, + (unsigned long long) size); + + atomic64_sub(size, &vmem_alloc_used); + + CDEBUG_LIMIT(D_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr, + (unsigned long long) size, atomic64_read(&vmem_alloc_used), + vmem_alloc_max); + + memset(ptr, 0x5a, size); + vfree(ptr); + + EXIT; +} +EXPORT_SYMBOL(vmem_free_debug); + +# endif /* DEBUG_KMEM_TRACKING */ +#endif /* DEBUG_KMEM */ + +static void * +kv_alloc(spl_kmem_cache_t *skc, int size, int flags) +{ + void *ptr; + + if (skc->skc_flags & KMC_KMEM) { + if (size > (2 * PAGE_SIZE)) { + ptr = (void *)__get_free_pages(flags, get_order(size)); + } else + ptr = kmem_alloc(size, flags); + } else { + ptr = vmem_alloc(size, flags); + } + + return ptr; +} + +static void +kv_free(spl_kmem_cache_t *skc, void *ptr, int size) +{ + if (skc->skc_flags & KMC_KMEM) { + if (size > (2 * PAGE_SIZE)) + free_pages((unsigned long)ptr, get_order(size)); + else + kmem_free(ptr, size); + } else { + vmem_free(ptr, size); + } +} + +static spl_kmem_slab_t * +spl_slab_alloc(spl_kmem_cache_t *skc, int flags) +{ + spl_kmem_slab_t *sks; + spl_kmem_obj_t *sko, *n; + void *base, *obj; + int i, size, rc = 0; + + /* It's important that we pack the spl_kmem_obj_t structure + * and the actual objects in to one large address space + * to minimize the number of calls to the allocator. It + * is far better to do a few large allocations and then + * subdivide it ourselves. Now which allocator we use + * requires balancling a few trade offs. + * + * For small objects we use kmem_alloc() because as long + * as you are only requesting a small number of pages + * (ideally just one) its cheap. However, when you start + * requesting multiple pages kmem_alloc() get increasingly + * expensive since it requires contigeous pages. For this + * reason we shift to vmem_alloc() for slabs of large + * objects which removes the need for contigeous pages. + * We do not use vmem_alloc() in all cases because there + * is significant locking overhead in __get_vm_area_node(). + * This function takes a single global lock when aquiring + * an available virtual address range which serialize all + * vmem_alloc()'s for all slab caches. Using slightly + * different allocation functions for small and large + * objects should give us the best of both worlds. + * + * sks struct: sizeof(spl_kmem_slab_t) + * obj data: skc->skc_obj_size + * obj struct: sizeof(spl_kmem_obj_t) + * <N obj data + obj structs> + * + * XXX: It would probably be a good idea to more carefully + * align these data structures in memory. + */ + base = kv_alloc(skc, skc->skc_slab_size, flags); + if (base == NULL) + RETURN(NULL); + + sks = (spl_kmem_slab_t *)base; + sks->sks_magic = SKS_MAGIC; + sks->sks_objs = skc->skc_slab_objs; + sks->sks_age = jiffies; + sks->sks_cache = skc; + INIT_LIST_HEAD(&sks->sks_list); + INIT_LIST_HEAD(&sks->sks_free_list); + sks->sks_ref = 0; + size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size; + + for (i = 0; i < sks->sks_objs; i++) { + if (skc->skc_flags & KMC_OFFSLAB) { + obj = kv_alloc(skc, size, flags); + if (!obj) + GOTO(out, rc = -ENOMEM); + } else { + obj = base + sizeof(spl_kmem_slab_t) + i * size; + } + + sko = obj + skc->skc_obj_size; + sko->sko_addr = obj; + sko->sko_magic = SKO_MAGIC; + sko->sko_slab = sks; + INIT_LIST_HEAD(&sko->sko_list); + list_add_tail(&sko->sko_list, &sks->sks_free_list); + } + + list_for_each_entry(sko, &sks->sks_free_list, sko_list) + if (skc->skc_ctor) + skc->skc_ctor(sko->sko_addr, skc->skc_private, flags); +out: + if (rc) { + if (skc->skc_flags & KMC_OFFSLAB) + list_for_each_entry_safe(sko,n,&sks->sks_free_list,sko_list) + kv_free(skc, sko->sko_addr, size); + + kv_free(skc, base, skc->skc_slab_size); + sks = NULL; + } + + RETURN(sks); +} + +/* Removes slab from complete or partial list, so it must + * be called with the 'skc->skc_lock' held. + */ +static void +spl_slab_free(spl_kmem_slab_t *sks) { + spl_kmem_cache_t *skc; + spl_kmem_obj_t *sko, *n; + int size; + ENTRY; + + ASSERT(sks->sks_magic == SKS_MAGIC); + ASSERT(sks->sks_ref == 0); + + skc = sks->sks_cache; + ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(spin_is_locked(&skc->skc_lock)); + + skc->skc_obj_total -= sks->sks_objs; + skc->skc_slab_total--; + list_del(&sks->sks_list); + size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size; + + /* Run destructors slab is being released */ + list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { + ASSERT(sko->sko_magic == SKO_MAGIC); + + if (skc->skc_dtor) + skc->skc_dtor(sko->sko_addr, skc->skc_private); + + if (skc->skc_flags & KMC_OFFSLAB) + kv_free(skc, sko->sko_addr, size); + } + + kv_free(skc, sks, skc->skc_slab_size); + EXIT; +} + +static int +__spl_slab_reclaim(spl_kmem_cache_t *skc) +{ + spl_kmem_slab_t *sks, *m; + int rc = 0; + ENTRY; + + ASSERT(spin_is_locked(&skc->skc_lock)); + /* + * Free empty slabs which have not been touched in skc_delay + * seconds. This delay time is important to avoid thrashing. + * Empty slabs will be at the end of the skc_partial_list. + */ + list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list, + sks_list) { + if (sks->sks_ref > 0) + break; + + if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) { + spl_slab_free(sks); + rc++; + } + } + + /* Returns number of slabs reclaimed */ + RETURN(rc); +} + +static int +spl_slab_reclaim(spl_kmem_cache_t *skc) +{ + int rc; + ENTRY; + + spin_lock(&skc->skc_lock); + rc = __spl_slab_reclaim(skc); + spin_unlock(&skc->skc_lock); + + RETURN(rc); +} + +static int +spl_magazine_size(spl_kmem_cache_t *skc) +{ + int size; + ENTRY; + + /* Guesses for reasonable magazine sizes, they + * should really adapt based on observed usage. */ + if (skc->skc_obj_size > (PAGE_SIZE * 256)) + size = 4; + else if (skc->skc_obj_size > (PAGE_SIZE * 32)) + size = 16; + else if (skc->skc_obj_size > (PAGE_SIZE)) + size = 64; + else if (skc->skc_obj_size > (PAGE_SIZE / 4)) + size = 128; + else + size = 512; + + RETURN(size); +} + +static spl_kmem_magazine_t * +spl_magazine_alloc(spl_kmem_cache_t *skc, int node) +{ + spl_kmem_magazine_t *skm; + int size = sizeof(spl_kmem_magazine_t) + + sizeof(void *) * skc->skc_mag_size; + ENTRY; + + skm = kmem_alloc_node(size, GFP_KERNEL, node); + if (skm) { + skm->skm_magic = SKM_MAGIC; + skm->skm_avail = 0; + skm->skm_size = skc->skc_mag_size; + skm->skm_refill = skc->skc_mag_refill; + if (!(skc->skc_flags & KMC_NOTOUCH)) + skm->skm_age = jiffies; + } + + RETURN(skm); +} + +static void +spl_magazine_free(spl_kmem_magazine_t *skm) +{ + int size = sizeof(spl_kmem_magazine_t) + + sizeof(void *) * skm->skm_size; + + ENTRY; + ASSERT(skm->skm_magic == SKM_MAGIC); + ASSERT(skm->skm_avail == 0); + + kmem_free(skm, size); + EXIT; +} + +static int +spl_magazine_create(spl_kmem_cache_t *skc) +{ + int i; + ENTRY; + + skc->skc_mag_size = spl_magazine_size(skc); + skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2; + + for_each_online_cpu(i) { + skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i)); + if (!skc->skc_mag[i]) { + for (i--; i >= 0; i--) + spl_magazine_free(skc->skc_mag[i]); + + RETURN(-ENOMEM); + } + } + + RETURN(0); +} + +static void +spl_magazine_destroy(spl_kmem_cache_t *skc) +{ + spl_kmem_magazine_t *skm; + int i; + ENTRY; + + for_each_online_cpu(i) { + skm = skc->skc_mag[i]; + (void)spl_cache_flush(skc, skm, skm->skm_avail); + spl_magazine_free(skm); + } + + EXIT; +} + +spl_kmem_cache_t * +spl_kmem_cache_create(char *name, size_t size, size_t align, + spl_kmem_ctor_t ctor, + spl_kmem_dtor_t dtor, + spl_kmem_reclaim_t reclaim, + void *priv, void *vmp, int flags) +{ + spl_kmem_cache_t *skc; + uint32_t slab_max, slab_size, slab_objs; + int rc, kmem_flags = KM_SLEEP; + ENTRY; + + ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags); + ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags); + ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags); + + /* We may be called when there is a non-zero preempt_count or + * interrupts are disabled is which case we must not sleep. + */ + if (current_thread_info()->preempt_count || irqs_disabled()) + kmem_flags = KM_NOSLEEP; + + /* Allocate new cache memory and initialize. */ + skc = (spl_kmem_cache_t *)kmem_zalloc(sizeof(*skc), kmem_flags); + if (skc == NULL) + RETURN(NULL); + + skc->skc_magic = SKC_MAGIC; + skc->skc_name_size = strlen(name) + 1; + skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags); + if (skc->skc_name == NULL) { + kmem_free(skc, sizeof(*skc)); + RETURN(NULL); + } + strncpy(skc->skc_name, name, skc->skc_name_size); + + skc->skc_ctor = ctor; + skc->skc_dtor = dtor; + skc->skc_reclaim = reclaim; + skc->skc_private = priv; + skc->skc_vmp = vmp; + skc->skc_flags = flags; + skc->skc_obj_size = size; + skc->skc_delay = SPL_KMEM_CACHE_DELAY; + + INIT_LIST_HEAD(&skc->skc_list); + INIT_LIST_HEAD(&skc->skc_complete_list); + INIT_LIST_HEAD(&skc->skc_partial_list); + spin_lock_init(&skc->skc_lock); + skc->skc_slab_fail = 0; + skc->skc_slab_create = 0; + skc->skc_slab_destroy = 0; + skc->skc_slab_total = 0; + skc->skc_slab_alloc = 0; + skc->skc_slab_max = 0; + skc->skc_obj_total = 0; + skc->skc_obj_alloc = 0; + skc->skc_obj_max = 0; + + /* If none passed select a cache type based on object size */ + if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) { + if (skc->skc_obj_size < (PAGE_SIZE / 8)) { + skc->skc_flags |= KMC_KMEM; + } else { + skc->skc_flags |= KMC_VMEM; + } + } + + /* Size slabs properly so ensure they are not too large */ + slab_max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE; + if (skc->skc_flags & KMC_OFFSLAB) { + skc->skc_slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; + skc->skc_slab_size = sizeof(spl_kmem_slab_t); + ASSERT(skc->skc_obj_size < slab_max); + } else { + slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB + 1; + + do { + slab_objs--; + slab_size = sizeof(spl_kmem_slab_t) + slab_objs * + (skc->skc_obj_size+sizeof(spl_kmem_obj_t)); + } while (slab_size > slab_max); + + skc->skc_slab_objs = slab_objs; + skc->skc_slab_size = slab_size; + } + + rc = spl_magazine_create(skc); + if (rc) { + kmem_free(skc->skc_name, skc->skc_name_size); + kmem_free(skc, sizeof(*skc)); + RETURN(NULL); + } + + down_write(&spl_kmem_cache_sem); + list_add_tail(&skc->skc_list, &spl_kmem_cache_list); + up_write(&spl_kmem_cache_sem); + + RETURN(skc); +} +EXPORT_SYMBOL(spl_kmem_cache_create); + +void +spl_kmem_cache_destroy(spl_kmem_cache_t *skc) +{ + spl_kmem_slab_t *sks, *m; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + + down_write(&spl_kmem_cache_sem); + list_del_init(&skc->skc_list); + up_write(&spl_kmem_cache_sem); + + spl_magazine_destroy(skc); + spin_lock(&skc->skc_lock); + + /* Validate there are no objects in use and free all the + * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ + ASSERT(list_empty(&skc->skc_complete_list)); + ASSERT(skc->skc_slab_alloc == 0); + ASSERT(skc->skc_obj_alloc == 0); + + list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) + spl_slab_free(sks); + + ASSERT(skc->skc_slab_total == 0); + ASSERT(skc->skc_obj_total == 0); + + kmem_free(skc->skc_name, skc->skc_name_size); + spin_unlock(&skc->skc_lock); + + kmem_free(skc, sizeof(*skc)); + + EXIT; +} +EXPORT_SYMBOL(spl_kmem_cache_destroy); + +static void * +spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) +{ + spl_kmem_obj_t *sko; + + ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(sks->sks_magic == SKS_MAGIC); + ASSERT(spin_is_locked(&skc->skc_lock)); + + sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list); + ASSERT(sko->sko_magic == SKO_MAGIC); + ASSERT(sko->sko_addr != NULL); + + /* Remove from sks_free_list */ + list_del_init(&sko->sko_list); + + sks->sks_age = jiffies; + sks->sks_ref++; + skc->skc_obj_alloc++; + + /* Track max obj usage statistics */ + if (skc->skc_obj_alloc > skc->skc_obj_max) + skc->skc_obj_max = skc->skc_obj_alloc; + + /* Track max slab usage statistics */ + if (sks->sks_ref == 1) { + skc->skc_slab_alloc++; + + if (skc->skc_slab_alloc > skc->skc_slab_max) + skc->skc_slab_max = skc->skc_slab_alloc; + } + + return sko->sko_addr; +} + +/* No available objects create a new slab. Since this is an + * expensive operation we do it without holding the spinlock + * and only briefly aquire it when we link in the fully + * allocated and constructed slab. + */ +static spl_kmem_slab_t * +spl_cache_grow(spl_kmem_cache_t *skc, int flags) +{ + spl_kmem_slab_t *sks; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + + if (flags & __GFP_WAIT) { + flags |= __GFP_NOFAIL; + local_irq_enable(); + might_sleep(); + } + + sks = spl_slab_alloc(skc, flags); + if (sks == NULL) { + if (flags & __GFP_WAIT) + local_irq_disable(); + + RETURN(NULL); + } + + if (flags & __GFP_WAIT) + local_irq_disable(); + + /* Link the new empty slab in to the end of skc_partial_list */ + spin_lock(&skc->skc_lock); + skc->skc_slab_total++; + skc->skc_obj_total += sks->sks_objs; + list_add_tail(&sks->sks_list, &skc->skc_partial_list); + spin_unlock(&skc->skc_lock); + + RETURN(sks); +} + +static int +spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) +{ + spl_kmem_slab_t *sks; + int rc = 0, refill; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(skm->skm_magic == SKM_MAGIC); + + /* XXX: Check for refill bouncing by age perhaps */ + refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail); + + spin_lock(&skc->skc_lock); + + while (refill > 0) { + /* No slabs available we must grow the cache */ + if (list_empty(&skc->skc_partial_list)) { + spin_unlock(&skc->skc_lock); + + sks = spl_cache_grow(skc, flags); + if (!sks) + GOTO(out, rc); + + /* Rescheduled to different CPU skm is not local */ + if (skm != skc->skc_mag[smp_processor_id()]) + GOTO(out, rc); + + /* Potentially rescheduled to the same CPU but + * allocations may have occured from this CPU while + * we were sleeping so recalculate max refill. */ + refill = MIN(refill, skm->skm_size - skm->skm_avail); + + spin_lock(&skc->skc_lock); + continue; + } + + /* Grab the next available slab */ + sks = list_entry((&skc->skc_partial_list)->next, + spl_kmem_slab_t, sks_list); + ASSERT(sks->sks_magic == SKS_MAGIC); + ASSERT(sks->sks_ref < sks->sks_objs); + ASSERT(!list_empty(&sks->sks_free_list)); + + /* Consume as many objects as needed to refill the requested + * cache. We must also be careful not to overfill it. */ + while (sks->sks_ref < sks->sks_objs && refill-- > 0 && ++rc) { + ASSERT(skm->skm_avail < skm->skm_size); + ASSERT(rc < skm->skm_size); + skm->skm_objs[skm->skm_avail++]=spl_cache_obj(skc,sks); + } + + /* Move slab to skc_complete_list when full */ + if (sks->sks_ref == sks->sks_objs) { + list_del(&sks->sks_list); + list_add(&sks->sks_list, &skc->skc_complete_list); + } + } + + spin_unlock(&skc->skc_lock); +out: + /* Returns the number of entries added to cache */ + RETURN(rc); +} + +static void +spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) +{ + spl_kmem_slab_t *sks = NULL; + spl_kmem_obj_t *sko = NULL; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(spin_is_locked(&skc->skc_lock)); + + sko = obj + skc->skc_obj_size; + ASSERT(sko->sko_magic == SKO_MAGIC); + + sks = sko->sko_slab; + ASSERT(sks->sks_magic == SKS_MAGIC); + ASSERT(sks->sks_cache == skc); + list_add(&sko->sko_list, &sks->sks_free_list); + + sks->sks_age = jiffies; + sks->sks_ref--; + skc->skc_obj_alloc--; + + /* Move slab to skc_partial_list when no longer full. Slabs + * are added to the head to keep the partial list is quasi-full + * sorted order. Fuller at the head, emptier at the tail. */ + if (sks->sks_ref == (sks->sks_objs - 1)) { + list_del(&sks->sks_list); + list_add(&sks->sks_list, &skc->skc_partial_list); + } + + /* Move emply slabs to the end of the partial list so + * they can be easily found and freed during reclamation. */ + if (sks->sks_ref == 0) { + list_del(&sks->sks_list); + list_add_tail(&sks->sks_list, &skc->skc_partial_list); + skc->skc_slab_alloc--; + } + + EXIT; +} + +static int +spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) +{ + int i, count = MIN(flush, skm->skm_avail); + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(skm->skm_magic == SKM_MAGIC); + + spin_lock(&skc->skc_lock); + + for (i = 0; i < count; i++) + spl_cache_shrink(skc, skm->skm_objs[i]); + +// __spl_slab_reclaim(skc); + skm->skm_avail -= count; + memmove(skm->skm_objs, &(skm->skm_objs[count]), + sizeof(void *) * skm->skm_avail); + + spin_unlock(&skc->skc_lock); + + RETURN(count); +} + +void * +spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) +{ + spl_kmem_magazine_t *skm; + unsigned long irq_flags; + void *obj = NULL; + int id; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(flags & KM_SLEEP); /* XXX: KM_NOSLEEP not yet supported */ + local_irq_save(irq_flags); + +restart: + /* Safe to update per-cpu structure without lock, but + * in the restart case we must be careful to reaquire + * the local magazine since this may have changed + * when we need to grow the cache. */ + id = smp_processor_id(); + ASSERTF(id < 4, "cache=%p smp_processor_id=%d\n", skc, id); + skm = skc->skc_mag[smp_processor_id()]; + ASSERTF(skm->skm_magic == SKM_MAGIC, "%x != %x: %s/%p/%p %x/%x/%x\n", + skm->skm_magic, SKM_MAGIC, skc->skc_name, skc, skm, + skm->skm_size, skm->skm_refill, skm->skm_avail); + + if (likely(skm->skm_avail)) { + /* Object available in CPU cache, use it */ + obj = skm->skm_objs[--skm->skm_avail]; + if (!(skc->skc_flags & KMC_NOTOUCH)) + skm->skm_age = jiffies; + } else { + /* Per-CPU cache empty, directly allocate from + * the slab and refill the per-CPU cache. */ + (void)spl_cache_refill(skc, skm, flags); + GOTO(restart, obj = NULL); + } + + local_irq_restore(irq_flags); + ASSERT(obj); + + /* Pre-emptively migrate object to CPU L1 cache */ + prefetchw(obj); + + RETURN(obj); +} +EXPORT_SYMBOL(spl_kmem_cache_alloc); + +void +spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) +{ + spl_kmem_magazine_t *skm; + unsigned long flags; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + local_irq_save(flags); + + /* Safe to update per-cpu structure without lock, but + * no remote memory allocation tracking is being performed + * it is entirely possible to allocate an object from one + * CPU cache and return it to another. */ + skm = skc->skc_mag[smp_processor_id()]; + ASSERT(skm->skm_magic == SKM_MAGIC); + + /* Per-CPU cache full, flush it to make space */ + if (unlikely(skm->skm_avail >= skm->skm_size)) + (void)spl_cache_flush(skc, skm, skm->skm_refill); + + /* Available space in cache, use it */ + skm->skm_objs[skm->skm_avail++] = obj; + + local_irq_restore(flags); + + EXIT; +} +EXPORT_SYMBOL(spl_kmem_cache_free); + +static int +spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask) +{ + spl_kmem_cache_t *skc; + + /* Under linux a shrinker is not tightly coupled with a slab + * cache. In fact linux always systematically trys calling all + * registered shrinker callbacks until its target reclamation level + * is reached. Because of this we only register one shrinker + * function in the shim layer for all slab caches. And we always + * attempt to shrink all caches when this generic shrinker is called. + */ + down_read(&spl_kmem_cache_sem); + + list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) + spl_kmem_cache_reap_now(skc); + + up_read(&spl_kmem_cache_sem); + + /* XXX: Under linux we should return the remaining number of + * entries in the cache. We should do this as well. + */ + return 1; +} + +void +spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) +{ + spl_kmem_magazine_t *skm; + int i; + ENTRY; + + ASSERT(skc->skc_magic == SKC_MAGIC); + + if (skc->skc_reclaim) + skc->skc_reclaim(skc->skc_private); + + /* Ensure per-CPU caches which are idle gradually flush */ + for_each_online_cpu(i) { + skm = skc->skc_mag[i]; + + if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) + (void)spl_cache_flush(skc, skm, skm->skm_refill); + } + + spl_slab_reclaim(skc); + + EXIT; +} +EXPORT_SYMBOL(spl_kmem_cache_reap_now); + +void +spl_kmem_reap(void) +{ + spl_kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL); +} +EXPORT_SYMBOL(spl_kmem_reap); + +#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING) +static char * +spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) +{ + int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size; + int i, flag = 1; + + ASSERT(str != NULL && len >= 17); + memset(str, 0, len); + + /* Check for a fully printable string, and while we are at + * it place the printable characters in the passed buffer. */ + for (i = 0; i < size; i++) { + str[i] = ((char *)(kd->kd_addr))[i]; + if (isprint(str[i])) { + continue; + } else { + /* Minimum number of printable characters found + * to make it worthwhile to print this as ascii. */ + if (i > min) + break; + + flag = 0; + break; + } + } + + if (!flag) { + sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x", + *((uint8_t *)kd->kd_addr), + *((uint8_t *)kd->kd_addr + 2), + *((uint8_t *)kd->kd_addr + 4), + *((uint8_t *)kd->kd_addr + 6), + *((uint8_t *)kd->kd_addr + 8), + *((uint8_t *)kd->kd_addr + 10), + *((uint8_t *)kd->kd_addr + 12), + *((uint8_t *)kd->kd_addr + 14)); + } + + return str; +} + +static int +spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size) +{ + int i; + ENTRY; + + spin_lock_init(lock); + INIT_LIST_HEAD(list); + + for (i = 0; i < size; i++) + INIT_HLIST_HEAD(&kmem_table[i]); + + RETURN(0); +} + +static void +spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) +{ + unsigned long flags; + kmem_debug_t *kd; + char str[17]; + ENTRY; + + spin_lock_irqsave(lock, flags); + if (!list_empty(list)) + printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address", + "size", "data", "func", "line"); + + list_for_each_entry(kd, list, kd_list) + printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr, + kd->kd_size, spl_sprintf_addr(kd, str, 17, 8), + kd->kd_func, kd->kd_line); + + spin_unlock_irqrestore(lock, flags); + EXIT; +} +#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ +#define spl_kmem_init_tracking(list, lock, size) +#define spl_kmem_fini_tracking(list, lock) +#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ + +int +spl_kmem_init(void) +{ + int rc = 0; + ENTRY; + + init_rwsem(&spl_kmem_cache_sem); + INIT_LIST_HEAD(&spl_kmem_cache_list); + +#ifdef HAVE_SET_SHRINKER + spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS, + spl_kmem_cache_generic_shrinker); + if (spl_kmem_cache_shrinker == NULL) + RETURN(rc = -ENOMEM); +#else + register_shrinker(&spl_kmem_cache_shrinker); +#endif + +#ifdef DEBUG_KMEM + atomic64_set(&kmem_alloc_used, 0); + atomic64_set(&vmem_alloc_used, 0); + + spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE); + spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE); +#endif + RETURN(rc); +} + +void +spl_kmem_fini(void) +{ +#ifdef DEBUG_KMEM + /* Display all unreclaimed memory addresses, including the + * allocation size and the first few bytes of what's located + * at that address to aid in debugging. Performance is not + * a serious concern here since it is module unload time. */ + if (atomic64_read(&kmem_alloc_used) != 0) + CWARN("kmem leaked %ld/%ld bytes\n", + atomic64_read(&kmem_alloc_used), kmem_alloc_max); + + + if (atomic64_read(&vmem_alloc_used) != 0) + CWARN("vmem leaked %ld/%ld bytes\n", + atomic64_read(&vmem_alloc_used), vmem_alloc_max); + + spl_kmem_fini_tracking(&kmem_list, &kmem_lock); + spl_kmem_fini_tracking(&vmem_list, &vmem_lock); +#endif /* DEBUG_KMEM */ + ENTRY; + +#ifdef HAVE_SET_SHRINKER + remove_shrinker(spl_kmem_cache_shrinker); +#else + unregister_shrinker(&spl_kmem_cache_shrinker); +#endif + + EXIT; +} diff --git a/module/spl/spl-kobj.c b/module/spl/spl-kobj.c new file mode 100644 index 000000000..e78cd9244 --- /dev/null +++ b/module/spl/spl-kobj.c @@ -0,0 +1,93 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/kobj.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_KOBJ + +struct _buf * +kobj_open_file(const char *name) +{ + struct _buf *file; + vnode_t *vp; + int rc; + ENTRY; + + file = kmalloc(sizeof(_buf_t), GFP_KERNEL); + if (file == NULL) + RETURN((_buf_t *)-1UL); + + if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) { + kfree(file); + RETURN((_buf_t *)-1UL); + } + + file->vp = vp; + + RETURN(file); +} /* kobj_open_file() */ +EXPORT_SYMBOL(kobj_open_file); + +void +kobj_close_file(struct _buf *file) +{ + ENTRY; + VOP_CLOSE(file->vp, 0, 0, 0, 0, 0); + VN_RELE(file->vp); + kfree(file); + EXIT; +} /* kobj_close_file() */ +EXPORT_SYMBOL(kobj_close_file); + +int +kobj_read_file(struct _buf *file, char *buf, ssize_t size, offset_t off) +{ + ENTRY; + RETURN(vn_rdwr(UIO_READ, file->vp, buf, size, off, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL)); +} /* kobj_read_file() */ +EXPORT_SYMBOL(kobj_read_file); + +int +kobj_get_filesize(struct _buf *file, uint64_t *size) +{ + vattr_t vap; + int rc; + ENTRY; + + rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL); + if (rc) + RETURN(rc); + + *size = vap.va_size; + + RETURN(rc); +} /* kobj_get_filesize() */ +EXPORT_SYMBOL(kobj_get_filesize); diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c new file mode 100644 index 000000000..bb6e9a504 --- /dev/null +++ b/module/spl/spl-kstat.c @@ -0,0 +1,496 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/kstat.h> + +#ifdef DEBUG_KSTAT + +static spinlock_t kstat_lock; +static struct list_head kstat_list; +static kid_t kstat_id; + +static void +kstat_seq_show_headers(struct seq_file *f) +{ + kstat_t *ksp = (kstat_t *)f->private; + ASSERT(ksp->ks_magic == KS_MAGIC); + + seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n", + ksp->ks_kid, ksp->ks_type, ksp->ks_flags, + ksp->ks_ndata, (int)ksp->ks_data_size, + ksp->ks_crtime, ksp->ks_snaptime); + + switch (ksp->ks_type) { + case KSTAT_TYPE_RAW: + seq_printf(f, "raw data"); + break; + case KSTAT_TYPE_NAMED: + seq_printf(f, "%-31s %-4s %s\n", + "name", "type", "data"); + break; + case KSTAT_TYPE_INTR: + seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n", + "hard", "soft", "watchdog", + "spurious", "multsvc"); + break; + case KSTAT_TYPE_IO: + seq_printf(f, + "%-8s %-8s %-8s %-8s %-8s %-8s " + "%-8s %-8s %-8s %-8s %-8s %-8s\n", + "nread", "nwritten", "reads", "writes", + "wtime", "wlentime", "wupdate", + "rtime", "rlentime", "rupdate", + "wcnt", "rcnt"); + break; + case KSTAT_TYPE_TIMER: + seq_printf(f, + "%-31s %-8s " + "%-8s %-8s %-8s %-8s %-8s\n", + "name", "events", "elapsed", + "min", "max", "start", "stop"); + break; + default: + SBUG(); /* Unreachable */ + } +} + +static int +kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l) +{ + int i, j; + + for (i = 0; ; i++) { + seq_printf(f, "%03x:", i); + + for (j = 0; j < 16; j++) { + if (i * 16 + j >= l) { + seq_printf(f, "\n"); + goto out; + } + + seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]); + } + seq_printf(f, "\n"); + } +out: + return 0; +} + +static int +kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp) +{ + seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type); + + switch (knp->data_type) { + case KSTAT_DATA_CHAR: + knp->value.c[15] = '\0'; /* NULL terminate */ + seq_printf(f, "%-16s", knp->value.c); + break; + /* XXX - We need to be more careful able what tokens are + * used for each arch, for now this is correct for x86_64. + */ + case KSTAT_DATA_INT32: + seq_printf(f, "%d", knp->value.i32); + break; + case KSTAT_DATA_UINT32: + seq_printf(f, "%u", knp->value.ui32); + break; + case KSTAT_DATA_INT64: + seq_printf(f, "%lld", (signed long long)knp->value.i64); + break; + case KSTAT_DATA_UINT64: + seq_printf(f, "%llu", (unsigned long long)knp->value.ui64); + break; + case KSTAT_DATA_LONG: + seq_printf(f, "%ld", knp->value.l); + break; + case KSTAT_DATA_ULONG: + seq_printf(f, "%lu", knp->value.ul); + break; + case KSTAT_DATA_STRING: + KSTAT_NAMED_STR_PTR(knp) + [KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0'; + seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp)); + break; + default: + SBUG(); /* Unreachable */ + } + + seq_printf(f, "\n"); + + return 0; +} + +static int +kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip) +{ + seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n", + kip->intrs[KSTAT_INTR_HARD], + kip->intrs[KSTAT_INTR_SOFT], + kip->intrs[KSTAT_INTR_WATCHDOG], + kip->intrs[KSTAT_INTR_SPURIOUS], + kip->intrs[KSTAT_INTR_MULTSVC]); + + return 0; +} + +static int +kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip) +{ + seq_printf(f, + "%-8llu %-8llu %-8u %-8u %-8lld %-8lld " + "%-8lld %-8lld %-8lld %-8lld %-8u %-8u\n", + kip->nread, kip->nwritten, + kip->reads, kip->writes, + kip->wtime, kip->wlentime, kip->wlastupdate, + kip->rtime, kip->wlentime, kip->rlastupdate, + kip->wcnt, kip->rcnt); + + return 0; +} + +static int +kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp) +{ + seq_printf(f, + "%-31s %-8llu %-8lld %-8lld %-8lld %-8lld %-8lld\n", + ktp->name, ktp->num_events, ktp->elapsed_time, + ktp->min_time, ktp->max_time, + ktp->start_time, ktp->stop_time); + + return 0; +} + +static int +kstat_seq_show(struct seq_file *f, void *p) +{ + kstat_t *ksp = (kstat_t *)f->private; + int rc = 0; + + ASSERT(ksp->ks_magic == KS_MAGIC); + + switch (ksp->ks_type) { + case KSTAT_TYPE_RAW: + ASSERT(ksp->ks_ndata == 1); + rc = kstat_seq_show_raw(f, ksp->ks_data, + ksp->ks_data_size); + break; + case KSTAT_TYPE_NAMED: + rc = kstat_seq_show_named(f, (kstat_named_t *)p); + break; + case KSTAT_TYPE_INTR: + rc = kstat_seq_show_intr(f, (kstat_intr_t *)p); + break; + case KSTAT_TYPE_IO: + rc = kstat_seq_show_io(f, (kstat_io_t *)p); + break; + case KSTAT_TYPE_TIMER: + rc = kstat_seq_show_timer(f, (kstat_timer_t *)p); + break; + default: + SBUG(); /* Unreachable */ + } + + return rc; +} + +static void * +kstat_seq_data_addr(kstat_t *ksp, loff_t n) +{ + void *rc = NULL; + ENTRY; + + switch (ksp->ks_type) { + case KSTAT_TYPE_RAW: + rc = ksp->ks_data; + break; + case KSTAT_TYPE_NAMED: + rc = ksp->ks_data + n * sizeof(kstat_named_t); + break; + case KSTAT_TYPE_INTR: + rc = ksp->ks_data + n * sizeof(kstat_intr_t); + break; + case KSTAT_TYPE_IO: + rc = ksp->ks_data + n * sizeof(kstat_io_t); + break; + case KSTAT_TYPE_TIMER: + rc = ksp->ks_data + n * sizeof(kstat_timer_t); + break; + default: + SBUG(); /* Unreachable */ + } + + RETURN(rc); +} + +static void * +kstat_seq_start(struct seq_file *f, loff_t *pos) +{ + loff_t n = *pos; + kstat_t *ksp = (kstat_t *)f->private; + ASSERT(ksp->ks_magic == KS_MAGIC); + ENTRY; + + spin_lock(&ksp->ks_lock); + ksp->ks_snaptime = gethrtime(); + + if (!n) + kstat_seq_show_headers(f); + + if (n >= ksp->ks_ndata) + RETURN(NULL); + + RETURN(kstat_seq_data_addr(ksp, n)); +} + +static void * +kstat_seq_next(struct seq_file *f, void *p, loff_t *pos) +{ + kstat_t *ksp = (kstat_t *)f->private; + ASSERT(ksp->ks_magic == KS_MAGIC); + ENTRY; + + ++*pos; + if (*pos >= ksp->ks_ndata) + RETURN(NULL); + + RETURN(kstat_seq_data_addr(ksp, *pos)); +} + +static void +kstat_seq_stop(struct seq_file *f, void *v) +{ + kstat_t *ksp = (kstat_t *)f->private; + ASSERT(ksp->ks_magic == KS_MAGIC); + + spin_unlock(&ksp->ks_lock); +} + +static struct seq_operations kstat_seq_ops = { + .show = kstat_seq_show, + .start = kstat_seq_start, + .next = kstat_seq_next, + .stop = kstat_seq_stop, +}; + +static int +proc_kstat_open(struct inode *inode, struct file *filp) +{ + struct seq_file *f; + int rc; + + rc = seq_open(filp, &kstat_seq_ops); + if (rc) + return rc; + + f = filp->private_data; + f->private = PDE(inode)->data; + + return rc; +} + +static struct file_operations proc_kstat_operations = { + .open = proc_kstat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +kstat_t * +__kstat_create(const char *ks_module, int ks_instance, const char *ks_name, + const char *ks_class, uchar_t ks_type, uint_t ks_ndata, + uchar_t ks_flags) +{ + kstat_t *ksp; + + ASSERT(ks_module); + ASSERT(ks_instance == 0); + ASSERT(ks_name); + ASSERT(!(ks_flags & KSTAT_FLAG_UNSUPPORTED)); + + if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO)) + ASSERT(ks_ndata == 1); + + ksp = kmem_zalloc(sizeof(*ksp), KM_SLEEP); + if (ksp == NULL) + return ksp; + + spin_lock(&kstat_lock); + ksp->ks_kid = kstat_id; + kstat_id++; + spin_unlock(&kstat_lock); + + ksp->ks_magic = KS_MAGIC; + spin_lock_init(&ksp->ks_lock); + INIT_LIST_HEAD(&ksp->ks_list); + + ksp->ks_crtime = gethrtime(); + ksp->ks_snaptime = ksp->ks_crtime; + strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN); + ksp->ks_instance = ks_instance; + strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN); + strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN); + ksp->ks_type = ks_type; + ksp->ks_flags = ks_flags; + + switch (ksp->ks_type) { + case KSTAT_TYPE_RAW: + ksp->ks_ndata = 1; + ksp->ks_data_size = ks_ndata; + break; + case KSTAT_TYPE_NAMED: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof(kstat_named_t); + break; + case KSTAT_TYPE_INTR: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof(kstat_intr_t); + break; + case KSTAT_TYPE_IO: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof(kstat_io_t); + break; + case KSTAT_TYPE_TIMER: + ksp->ks_ndata = ks_ndata; + ksp->ks_data_size = ks_ndata * sizeof(kstat_timer_t); + break; + default: + SBUG(); /* Unreachable */ + } + + if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) { + ksp->ks_data = NULL; + } else { + ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_SLEEP); + if (ksp->ks_data == NULL) { + kmem_free(ksp, sizeof(*ksp)); + ksp = NULL; + } + } + + return ksp; +} +EXPORT_SYMBOL(__kstat_create); + +void +__kstat_install(kstat_t *ksp) +{ + struct proc_dir_entry *de_module, *de_name; + kstat_t *tmp; + int rc = 0; + ENTRY; + + spin_lock(&kstat_lock); + + /* Item may only be added to the list once */ + list_for_each_entry(tmp, &kstat_list, ks_list) { + if (tmp == ksp) { + spin_unlock(&kstat_lock); + GOTO(out, rc = -EEXIST); + } + } + + list_add_tail(&ksp->ks_list, &kstat_list); + spin_unlock(&kstat_lock); + + de_module = proc_dir_entry_find(proc_spl_kstat, ksp->ks_module); + if (de_module == NULL) { + de_module = proc_mkdir(ksp->ks_module, proc_spl_kstat); + if (de_module == NULL) + GOTO(out, rc = -EUNATCH); + } + + de_name = create_proc_entry(ksp->ks_name, 0444, de_module); + if (de_name == NULL) + GOTO(out, rc = -EUNATCH); + + spin_lock(&ksp->ks_lock); + ksp->ks_proc = de_name; + de_name->proc_fops = &proc_kstat_operations; + de_name->data = (void *)ksp; + spin_unlock(&ksp->ks_lock); +out: + if (rc) { + spin_lock(&kstat_lock); + list_del_init(&ksp->ks_list); + spin_unlock(&kstat_lock); + } + + EXIT; +} +EXPORT_SYMBOL(__kstat_install); + +void +__kstat_delete(kstat_t *ksp) +{ + struct proc_dir_entry *de_module; + + spin_lock(&kstat_lock); + list_del_init(&ksp->ks_list); + spin_unlock(&kstat_lock); + + if (ksp->ks_proc) { + de_module = ksp->ks_proc->parent; + remove_proc_entry(ksp->ks_name, de_module); + + /* Remove top level module directory if it's empty */ + if (proc_dir_entries(de_module) == 0) + remove_proc_entry(de_module->name, de_module->parent); + } + + if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) + kmem_free(ksp->ks_data, ksp->ks_data_size); + + kmem_free(ksp, sizeof(*ksp)); + + return; +} +EXPORT_SYMBOL(__kstat_delete); + +#endif /* DEBUG_KSTAT */ + +int +kstat_init(void) +{ + ENTRY; +#ifdef DEBUG_KSTAT + spin_lock_init(&kstat_lock); + INIT_LIST_HEAD(&kstat_list); + kstat_id = 0; +#endif /* DEBUG_KSTAT */ + RETURN(0); +} + +void +kstat_fini(void) +{ + ENTRY; +#ifdef DEBUG_KSTAT + ASSERT(list_empty(&kstat_list)); +#endif /* DEBUG_KSTAT */ + EXIT; +} + diff --git a/module/spl/spl-module.c b/module/spl/spl-module.c new file mode 100644 index 000000000..c1d030f24 --- /dev/null +++ b/module/spl/spl-module.c @@ -0,0 +1,331 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/sysmacros.h> +#include <sys/sunddi.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_MODULE + +static spinlock_t dev_info_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(dev_info_list); + +static struct dev_info * +get_dev_info(dev_t dev) +{ + struct dev_info *di; + + spin_lock(&dev_info_lock); + + list_for_each_entry(di, &dev_info_list, di_list) + if (di->di_dev == dev) + goto out; + + di = NULL; +out: + spin_unlock(&dev_info_lock); + return di; +} + +static int +mod_generic_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct dev_info *di; + int rc, flags = 0, rvalp = 0; + cred_t *cr = NULL; + + di = get_dev_info(MKDEV(imajor(ino), iminor(ino))); + if (di == NULL) + return EINVAL; + + rc = di->di_ops->devo_cb_ops->cb_ioctl(di->di_dev, + (int)cmd,(intptr_t)arg, + flags, cr, &rvalp); + return rc; +} + +int +__ddi_create_minor_node(dev_info_t *di, char *name, int spec_type, + minor_t minor_num, char *node_type, + int flags, struct module *mod) +{ + struct cdev *cdev; + struct dev_ops *dev_ops; + struct cb_ops *cb_ops; + struct file_operations *fops; + int rc; + ENTRY; + + ASSERT(spec_type == S_IFCHR); + ASSERT(minor_num < di->di_minors); + ASSERT(!strcmp(node_type, DDI_PSEUDO)); + + fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL); + if (fops == NULL) + RETURN(DDI_FAILURE); + + cdev = cdev_alloc(); + if (cdev == NULL) { + kfree(fops); + RETURN(DDI_FAILURE); + } + + cdev->ops = fops; + + mutex_enter(&di->di_lock); + dev_ops = di->di_ops; + ASSERT(dev_ops); + cb_ops = di->di_ops->devo_cb_ops; + ASSERT(cb_ops); + + /* Setup the fops to cb_ops mapping */ + fops->owner = mod; + if (cb_ops->cb_ioctl) + fops->ioctl = mod_generic_ioctl; + +#if 0 + if (cb_ops->cb_open) + fops->open = mod_generic_open; + + if (cb_ops->cb_close) + fops->release = mod_generic_close; + + if (cb_ops->cb_read) + fops->read = mod_generic_read; + + if (cb_ops->cb_write) + fops->write = mod_generic_write; +#endif + /* XXX: Currently unsupported operations */ + ASSERT(cb_ops->cb_open == NULL); + ASSERT(cb_ops->cb_close == NULL); + ASSERT(cb_ops->cb_read == NULL); + ASSERT(cb_ops->cb_write == NULL); + ASSERT(cb_ops->cb_strategy == NULL); + ASSERT(cb_ops->cb_print == NULL); + ASSERT(cb_ops->cb_dump == NULL); + ASSERT(cb_ops->cb_devmap == NULL); + ASSERT(cb_ops->cb_mmap == NULL); + ASSERT(cb_ops->cb_segmap == NULL); + ASSERT(cb_ops->cb_chpoll == NULL); + ASSERT(cb_ops->cb_prop_op == NULL); + ASSERT(cb_ops->cb_str == NULL); + ASSERT(cb_ops->cb_aread == NULL); + ASSERT(cb_ops->cb_awrite == NULL); + + di->di_cdev = cdev; + di->di_flags = flags; + di->di_minor = minor_num; + di->di_dev = MKDEV(di->di_major, di->di_minor); + + rc = cdev_add(cdev, di->di_dev, 1); + if (rc) { + CERROR("Error adding cdev, %d\n", rc); + kfree(fops); + cdev_del(cdev); + mutex_exit(&di->di_lock); + RETURN(DDI_FAILURE); + } + + spin_lock(&dev_info_lock); + list_add(&di->di_list, &dev_info_list); + spin_unlock(&dev_info_lock); + + mutex_exit(&di->di_lock); + + RETURN(DDI_SUCCESS); +} +EXPORT_SYMBOL(__ddi_create_minor_node); + +static void +__ddi_remove_minor_node_locked(dev_info_t *di, char *name) +{ + if (di->di_cdev) { + cdev_del(di->di_cdev); + di->di_cdev = NULL; + } + + spin_lock(&dev_info_lock); + list_del_init(&di->di_list); + spin_unlock(&dev_info_lock); +} + +void +__ddi_remove_minor_node(dev_info_t *di, char *name) +{ + ENTRY; + mutex_enter(&di->di_lock); + __ddi_remove_minor_node_locked(di, name); + mutex_exit(&di->di_lock); + EXIT; +} +EXPORT_SYMBOL(__ddi_remove_minor_node); + +int +ddi_quiesce_not_needed(dev_info_t *dip) +{ + RETURN(DDI_SUCCESS); +} +EXPORT_SYMBOL(ddi_quiesce_not_needed); + +#if 0 +static int +mod_generic_open(struct inode *, struct file *) +{ + open(dev_t *devp, int flags, int otyp, cred_t *credp); +} + +static int +mod_generic_close(struct inode *, struct file *) +{ + close(dev_t dev, int flags, int otyp, cred_t *credp); +} + +static ssize_t +mod_generic_read(struct file *, char __user *, size_t, loff_t *) +{ + read(dev_t dev, struct uio *uiop, cred_t *credp); +} + +static ssize_t +mod_generic_write(struct file *, const char __user *, size_t, loff_t *) +{ + write(dev_t dev, struct uio *uiop, cred_t *credp); +} +#endif + +static struct dev_info * +dev_info_alloc(major_t major, minor_t minors, struct dev_ops *ops) { + struct dev_info *di; + + di = kmalloc(sizeof(struct dev_info), GFP_KERNEL); + if (di == NULL) + return NULL; + + mutex_init(&di->di_lock, NULL, MUTEX_DEFAULT, NULL); + INIT_LIST_HEAD(&di->di_list); + di->di_ops = ops; + di->di_class = NULL; + di->di_cdev = NULL; + di->di_major = major; + di->di_minor = 0; + di->di_minors = minors; + di->di_dev = 0; + + return di; +} + +static void +dev_info_free(struct dev_info *di) +{ + mutex_enter(&di->di_lock); + __ddi_remove_minor_node_locked(di, NULL); + mutex_exit(&di->di_lock); + mutex_destroy(&di->di_lock); + kfree(di); +} + +int +__mod_install(struct modlinkage *modlp) +{ + struct modldrv *drv = modlp->ml_modldrv; + struct dev_info *di; + int rc; + ENTRY; + + di = dev_info_alloc(modlp->ml_major, modlp->ml_minors, + drv->drv_dev_ops); + if (di == NULL) + RETURN(ENOMEM); + + /* XXX: Really we need to be calling devo_probe if it's available + * and then calling devo_attach for each device discovered. However + * for now we just call it once and let the app sort it out. + */ + rc = drv->drv_dev_ops->devo_attach(di, DDI_ATTACH); + if (rc != DDI_SUCCESS) { + dev_info_free(di); + RETURN(rc); + } + + drv->drv_dev_info = di; + + RETURN(DDI_SUCCESS); +} +EXPORT_SYMBOL(__mod_install); + +int +__mod_remove(struct modlinkage *modlp) +{ + struct modldrv *drv = modlp->ml_modldrv; + struct dev_info *di = drv->drv_dev_info; + int rc; + ENTRY; + + rc = drv->drv_dev_ops->devo_detach(di, DDI_DETACH); + if (rc != DDI_SUCCESS) + RETURN(rc); + + dev_info_free(di); + drv->drv_dev_info = NULL; + + RETURN(DDI_SUCCESS); +} +EXPORT_SYMBOL(__mod_remove); + +int +ldi_ident_from_mod(struct modlinkage *modlp, ldi_ident_t *lip) +{ + ldi_ident_t li; + ENTRY; + + ASSERT(modlp); + ASSERT(lip); + + li = kmalloc(sizeof(struct ldi_ident), GFP_KERNEL); + if (li == NULL) + RETURN(ENOMEM); + + li->li_dev = MKDEV(modlp->ml_major, 0); + *lip = li; + + RETURN(0); +} +EXPORT_SYMBOL(ldi_ident_from_mod); + +void +ldi_ident_release(ldi_ident_t lip) +{ + ENTRY; + ASSERT(lip); + kfree(lip); + EXIT; +} +EXPORT_SYMBOL(ldi_ident_release); diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c new file mode 100644 index 000000000..e7ec41cf4 --- /dev/null +++ b/module/spl/spl-mutex.c @@ -0,0 +1,309 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/mutex.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_MUTEX + +/* Mutex implementation based on those found in Solaris. This means + * they the MUTEX_DEFAULT type is an adaptive mutex. When calling + * mutex_enter() your process will spin waiting for the lock if it's + * likely the lock will be free'd shortly. If it looks like the + * lock will be held for a longer time we schedule and sleep waiting + * for it. This determination is made by checking if the holder of + * the lock is currently running on cpu or sleeping waiting to be + * scheduled. If the holder is currently running it's likely the + * lock will be shortly dropped. + * + * XXX: This is basically a rough implementation to see if this + * helps our performance. If it does a more careful implementation + * should be done, perhaps in assembly. + */ + +/* 0: Never spin when trying to aquire lock + * -1: Spin until aquired or holder yeilds without dropping lock + * 1-MAX_INT: Spin for N attempts before sleeping for lock + */ +int mutex_spin_max = 0; + +#ifdef DEBUG_MUTEX +int mutex_stats[MUTEX_STATS_SIZE] = { 0 }; +spinlock_t mutex_stats_lock; +struct list_head mutex_stats_list; +#endif + +int +__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc) +{ + int flags = KM_SLEEP; + + ASSERT(mp); + ASSERT(name); + ASSERT(ibc == NULL); + ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */ + + mp->km_name = NULL; + mp->km_name_size = strlen(name) + 1; + + switch (type) { + case MUTEX_DEFAULT: + mp->km_type = MUTEX_ADAPTIVE; + break; + case MUTEX_SPIN: + case MUTEX_ADAPTIVE: + mp->km_type = type; + break; + default: + SBUG(); + } + + /* We may be called when there is a non-zero preempt_count or + * interrupts are disabled is which case we must not sleep. + */ + if (current_thread_info()->preempt_count || irqs_disabled()) + flags = KM_NOSLEEP; + + /* Semaphore kmem_alloc'ed to keep struct size down (<64b) */ + mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags); + if (mp->km_sem == NULL) + return -ENOMEM; + + mp->km_name = kmem_alloc(mp->km_name_size, flags); + if (mp->km_name == NULL) { + kmem_free(mp->km_sem, sizeof(struct semaphore)); + return -ENOMEM; + } + + sema_init(mp->km_sem, 1); + strncpy(mp->km_name, name, mp->km_name_size); + +#ifdef DEBUG_MUTEX + mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags); + if (mp->km_stats == NULL) { + kmem_free(mp->km_name, mp->km_name_size); + kmem_free(mp->km_sem, sizeof(struct semaphore)); + return -ENOMEM; + } + + /* XXX - This appears to be a much more contended lock than I + * would have expected. To run with this debugging enabled and + * get reasonable performance we may need to be more clever and + * do something like hash the mutex ptr on to one of several + * lists to ease this single point of contention. + */ + spin_lock(&mutex_stats_lock); + list_add_tail(&mp->km_list, &mutex_stats_list); + spin_unlock(&mutex_stats_lock); +#endif + mp->km_magic = KM_MAGIC; + mp->km_owner = NULL; + + return 0; +} +EXPORT_SYMBOL(__spl_mutex_init); + +void +__spl_mutex_destroy(kmutex_t *mp) +{ + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + +#ifdef DEBUG_MUTEX + spin_lock(&mutex_stats_lock); + list_del_init(&mp->km_list); + spin_unlock(&mutex_stats_lock); + + kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE); +#endif + kmem_free(mp->km_name, mp->km_name_size); + kmem_free(mp->km_sem, sizeof(struct semaphore)); + + memset(mp, KM_POISON, sizeof(*mp)); +} +EXPORT_SYMBOL(__spl_mutex_destroy); + +/* Return 1 if we acquired the mutex, else zero. */ +int +__mutex_tryenter(kmutex_t *mp) +{ + int rc; + ENTRY; + + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL); + MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL); + + rc = down_trylock(mp->km_sem); + if (rc == 0) { + ASSERT(mp->km_owner == NULL); + mp->km_owner = current; + MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD); + MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD); + } + + RETURN(!rc); +} +EXPORT_SYMBOL(__mutex_tryenter); + +#ifndef HAVE_TASK_CURR +#define task_curr(owner) 0 +#endif + + +static void +mutex_enter_adaptive(kmutex_t *mp) +{ + struct task_struct *owner; + int count = 0; + + /* Lock is not held so we expect to aquire the lock */ + if ((owner = mp->km_owner) == NULL) { + down(mp->km_sem); + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD); + } else { + /* The lock is held by a currently running task which + * we expect will drop the lock before leaving the + * head of the runqueue. So the ideal thing to do + * is spin until we aquire the lock and avoid a + * context switch. However it is also possible the + * task holding the lock yields the processor with + * out dropping lock. In which case, we know it's + * going to be a while so we stop spinning and go + * to sleep waiting for the lock to be available. + * This should strike the optimum balance between + * spinning and sleeping waiting for a lock. + */ + while (task_curr(owner) && (count <= mutex_spin_max)) { + if (down_trylock(mp->km_sem) == 0) { + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); + GOTO(out, count); + } + count++; + } + + /* The lock is held by a sleeping task so it's going to + * cost us minimally one context switch. We might as + * well sleep and yield the processor to other tasks. + */ + down(mp->km_sem); + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP); + } +out: + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL); +} + +void +__mutex_enter(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + + switch (mp->km_type) { + case MUTEX_SPIN: + while (down_trylock(mp->km_sem)); + MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); + MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); + break; + case MUTEX_ADAPTIVE: + mutex_enter_adaptive(mp); + break; + } + + ASSERT(mp->km_owner == NULL); + mp->km_owner = current; + + EXIT; +} +EXPORT_SYMBOL(__mutex_enter); + +void +__mutex_exit(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + ASSERT(mp->km_owner == current); + mp->km_owner = NULL; + up(mp->km_sem); + EXIT; +} +EXPORT_SYMBOL(__mutex_exit); + +/* Return 1 if mutex is held by current process, else zero. */ +int +__mutex_owned(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + RETURN(mp->km_owner == current); +} +EXPORT_SYMBOL(__mutex_owned); + +/* Return owner if mutex is owned, else NULL. */ +kthread_t * +__spl_mutex_owner(kmutex_t *mp) +{ + ENTRY; + ASSERT(mp); + ASSERT(mp->km_magic == KM_MAGIC); + RETURN(mp->km_owner); +} +EXPORT_SYMBOL(__spl_mutex_owner); + +int +spl_mutex_init(void) +{ + ENTRY; +#ifdef DEBUG_MUTEX + spin_lock_init(&mutex_stats_lock); + INIT_LIST_HEAD(&mutex_stats_list); +#endif + RETURN(0); +} + +void +spl_mutex_fini(void) +{ + ENTRY; +#ifdef DEBUG_MUTEX + ASSERT(list_empty(&mutex_stats_list)); +#endif + EXIT; +} + +module_param(mutex_spin_max, int, 0644); +MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock"); diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c new file mode 100644 index 000000000..bf185c60e --- /dev/null +++ b/module/spl/spl-proc.c @@ -0,0 +1,1049 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/proc.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_PROC + +#ifdef DEBUG_KMEM +static unsigned long table_min = 0; +static unsigned long table_max = ~0; +#endif + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *spl_header = NULL; +#endif /* CONFIG_SYSCTL */ + +#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) +static struct proc_dir_entry *proc_spl = NULL; +#ifdef DEBUG_MUTEX +static struct proc_dir_entry *proc_spl_mutex = NULL; +static struct proc_dir_entry *proc_spl_mutex_stats = NULL; +#endif /* DEBUG_MUTEX */ +#ifdef DEBUG_KMEM +static struct proc_dir_entry *proc_spl_kmem = NULL; +static struct proc_dir_entry *proc_spl_kmem_slab = NULL; +#endif /* DEBUG_KMEM */ +#ifdef DEBUG_KSTAT +struct proc_dir_entry *proc_spl_kstat = NULL; +#endif /* DEBUG_KSTAT */ +#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ + +#ifdef HAVE_CTL_UNNUMBERED + +#define CTL_SPL CTL_UNNUMBERED +#define CTL_SPL_DEBUG CTL_UNNUMBERED +#define CTL_SPL_MUTEX CTL_UNNUMBERED +#define CTL_SPL_KMEM CTL_UNNUMBERED +#define CTL_SPL_KSTAT CTL_UNNUMBERED + +#define CTL_VERSION CTL_UNNUMBERED /* Version */ +#define CTL_HOSTID CTL_UNNUMBERED /* Host id by /usr/bin/hostid */ +#define CTL_HW_SERIAL CTL_UNNUMBERED /* HW serial number by hostid */ + +#define CTL_DEBUG_SUBSYS CTL_UNNUMBERED /* Debug subsystem */ +#define CTL_DEBUG_MASK CTL_UNNUMBERED /* Debug mask */ +#define CTL_DEBUG_PRINTK CTL_UNNUMBERED /* All messages to console */ +#define CTL_DEBUG_MB CTL_UNNUMBERED /* Debug buffer size */ +#define CTL_DEBUG_BINARY CTL_UNNUMBERED /* Binary data in buffer */ +#define CTL_DEBUG_CATASTROPHE CTL_UNNUMBERED /* Set if BUG'd or panic'd */ +#define CTL_DEBUG_PANIC_ON_BUG CTL_UNNUMBERED /* Should panic on BUG */ +#define CTL_DEBUG_PATH CTL_UNNUMBERED /* Dump log location */ +#define CTL_DEBUG_DUMP CTL_UNNUMBERED /* Dump debug buffer to file */ +#define CTL_DEBUG_FORCE_BUG CTL_UNNUMBERED /* Hook to force a BUG */ +#define CTL_DEBUG_STACK_SIZE CTL_UNNUMBERED /* Max observed stack size */ + +#define CTL_CONSOLE_RATELIMIT CTL_UNNUMBERED /* Ratelimit console messages */ +#define CTL_CONSOLE_MAX_DELAY_CS CTL_UNNUMBERED /* Max delay skip messages */ +#define CTL_CONSOLE_MIN_DELAY_CS CTL_UNNUMBERED /* Init delay skip messages */ +#define CTL_CONSOLE_BACKOFF CTL_UNNUMBERED /* Delay increase factor */ + +#ifdef DEBUG_KMEM +#define CTL_KMEM_KMEMUSED CTL_UNNUMBERED /* Alloc'd kmem bytes */ +#define CTL_KMEM_KMEMMAX CTL_UNNUMBERED /* Max alloc'd by kmem bytes */ +#define CTL_KMEM_VMEMUSED CTL_UNNUMBERED /* Alloc'd vmem bytes */ +#define CTL_KMEM_VMEMMAX CTL_UNNUMBERED /* Max alloc'd by vmem bytes */ +#define CTL_KMEM_ALLOC_FAILED CTL_UNNUMBERED /* Cache allocations failed */ +#endif + +#define CTL_MUTEX_STATS CTL_UNNUMBERED /* Global mutex statistics */ +#define CTL_MUTEX_STATS_PER CTL_UNNUMBERED /* Per mutex statistics */ +#define CTL_MUTEX_SPIN_MAX CTL_UNNUMBERED /* Max mutex spin iterations */ + +#else /* HAVE_CTL_UNNUMBERED */ + +#define CTL_SPL 0x87 +#define CTL_SPL_DEBUG 0x88 +#define CTL_SPL_MUTEX 0x89 +#define CTL_SPL_KMEM 0x90 +#define CTL_SPL_KSTAT 0x91 + +enum { + CTL_VERSION = 1, /* Version */ + CTL_HOSTID, /* Host id reported by /usr/bin/hostid */ + CTL_HW_SERIAL, /* Hardware serial number from hostid */ + + CTL_DEBUG_SUBSYS, /* Debug subsystem */ + CTL_DEBUG_MASK, /* Debug mask */ + CTL_DEBUG_PRINTK, /* Force all messages to console */ + CTL_DEBUG_MB, /* Debug buffer size */ + CTL_DEBUG_BINARY, /* Include binary data in buffer */ + CTL_DEBUG_CATASTROPHE, /* Set if we have BUG'd or panic'd */ + CTL_DEBUG_PANIC_ON_BUG, /* Set if we should panic on BUG */ + CTL_DEBUG_PATH, /* Dump log location */ + CTL_DEBUG_DUMP, /* Dump debug buffer to file */ + CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */ + CTL_DEBUG_STACK_SIZE, /* Max observed stack size */ + + CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */ + CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */ + CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */ + CTL_CONSOLE_BACKOFF, /* Delay increase factor */ + +#ifdef DEBUG_KMEM + CTL_KMEM_KMEMUSED, /* Alloc'd kmem bytes */ + CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ + CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */ + CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ +#endif + + CTL_MUTEX_STATS, /* Global mutex statistics */ + CTL_MUTEX_STATS_PER, /* Per mutex statistics */ + CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */ +}; +#endif /* HAVE_CTL_UNNUMBERED */ + +static int +proc_copyin_string(char *kbuffer, int kbuffer_size, + const char *ubuffer, int ubuffer_size) +{ + int size; + + if (ubuffer_size > kbuffer_size) + return -EOVERFLOW; + + if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size)) + return -EFAULT; + + /* strip trailing whitespace */ + size = strnlen(kbuffer, ubuffer_size); + while (size-- >= 0) + if (!isspace(kbuffer[size])) + break; + + /* empty string */ + if (size < 0) + return -EINVAL; + + /* no space to terminate */ + if (size == kbuffer_size) + return -EOVERFLOW; + + kbuffer[size + 1] = 0; + return 0; +} + +static int +proc_copyout_string(char *ubuffer, int ubuffer_size, + const char *kbuffer, char *append) +{ + /* NB if 'append' != NULL, it's a single character to append to the + * copied out string - usually "\n", for /proc entries and + * (i.e. a terminating zero byte) for sysctl entries + */ + int size = MIN(strlen(kbuffer), ubuffer_size); + + if (copy_to_user(ubuffer, kbuffer, size)) + return -EFAULT; + + if (append != NULL && size < ubuffer_size) { + if (copy_to_user(ubuffer + size, append, 1)) + return -EFAULT; + + size++; + } + + return size; +} + +static int +proc_dobitmasks(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + unsigned long *mask = table->data; + int is_subsys = (mask == &spl_debug_subsys) ? 1 : 0; + int is_printk = (mask == &spl_debug_printk) ? 1 : 0; + int size = 512, rc; + char *str; + ENTRY; + + str = kmem_alloc(size, KM_SLEEP); + if (str == NULL) + RETURN(-ENOMEM); + + if (write) { + rc = proc_copyin_string(str, size, buffer, *lenp); + if (rc < 0) + RETURN(rc); + + rc = spl_debug_str2mask(mask, str, is_subsys); + /* Always print BUG/ASSERT to console, so keep this mask */ + if (is_printk) + *mask |= D_EMERG; + + *ppos += *lenp; + } else { + rc = spl_debug_mask2str(str, size, *mask, is_subsys); + if (*ppos >= rc) + rc = 0; + else + rc = proc_copyout_string(buffer, *lenp, + str + *ppos, "\n"); + if (rc >= 0) { + *lenp = rc; + *ppos += rc; + } + } + + kmem_free(str, size); + RETURN(rc); +} + +static int +proc_debug_mb(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char str[32]; + int rc, len; + ENTRY; + + if (write) { + rc = proc_copyin_string(str, sizeof(str), buffer, *lenp); + if (rc < 0) + RETURN(rc); + + rc = spl_debug_set_mb(simple_strtoul(str, NULL, 0)); + *ppos += *lenp; + } else { + len = snprintf(str, sizeof(str), "%d", spl_debug_get_mb()); + if (*ppos >= len) + rc = 0; + else + rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n"); + + if (rc >= 0) { + *lenp = rc; + *ppos += rc; + } + } + + RETURN(rc); +} + +static int +proc_dump_kernel(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + ENTRY; + + if (write) { + spl_debug_dumplog(0); + *ppos += *lenp; + } else { + *lenp = 0; + } + + RETURN(0); +} + +static int +proc_force_bug(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + ENTRY; + + if (write) { + CERROR("Crashing due to forced SBUG\n"); + SBUG(); + /* Unreachable */ + } else { + *lenp = 0; + } + + RETURN(0); +} + +static int +proc_console_max_delay_cs(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, max_delay_cs; + struct ctl_table dummy = *table; + long d; + ENTRY; + + dummy.data = &max_delay_cs; + dummy.proc_handler = &proc_dointvec; + + if (write) { + max_delay_cs = 0; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + RETURN(rc); + + if (max_delay_cs <= 0) + RETURN(-EINVAL); + + d = (max_delay_cs * HZ) / 100; + if (d == 0 || d < spl_console_min_delay) + RETURN(-EINVAL); + + spl_console_max_delay = d; + } else { + max_delay_cs = (spl_console_max_delay * 100) / HZ; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + } + + RETURN(rc); +} + +static int +proc_console_min_delay_cs(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, min_delay_cs; + struct ctl_table dummy = *table; + long d; + ENTRY; + + dummy.data = &min_delay_cs; + dummy.proc_handler = &proc_dointvec; + + if (write) { + min_delay_cs = 0; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + RETURN(rc); + + if (min_delay_cs <= 0) + RETURN(-EINVAL); + + d = (min_delay_cs * HZ) / 100; + if (d == 0 || d > spl_console_max_delay) + RETURN(-EINVAL); + + spl_console_min_delay = d; + } else { + min_delay_cs = (spl_console_min_delay * 100) / HZ; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + } + + RETURN(rc); +} + +static int +proc_console_backoff(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, backoff; + struct ctl_table dummy = *table; + ENTRY; + + dummy.data = &backoff; + dummy.proc_handler = &proc_dointvec; + + if (write) { + backoff = 0; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + RETURN(rc); + + if (backoff <= 0) + RETURN(-EINVAL); + + spl_console_backoff = backoff; + } else { + backoff = spl_console_backoff; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + } + + RETURN(rc); +} + +#ifdef DEBUG_KMEM +static int +proc_doatomic64(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc = 0; + unsigned long min = 0, max = ~0, val; + struct ctl_table dummy = *table; + ENTRY; + + dummy.data = &val; + dummy.proc_handler = &proc_dointvec; + dummy.extra1 = &min; + dummy.extra2 = &max; + + if (write) { + *ppos += *lenp; + } else { + val = atomic64_read((atomic64_t *)table->data); + rc = proc_doulongvec_minmax(&dummy, write, filp, + buffer, lenp, ppos); + } + + RETURN(rc); +} +#endif /* DEBUG_KMEM */ + +static int +proc_dohostid(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int len, rc = 0; + int32_t val; + char *end, str[32]; + ENTRY; + + if (write) { + /* We can't use proc_doulongvec_minmax() in the write + * case hear because hostid while a hex value has no + * leading 0x which confuses the helper function. */ + rc = proc_copyin_string(str, sizeof(str), buffer, *lenp); + if (rc < 0) + RETURN(rc); + + val = simple_strtol(str, &end, 16); + if (str == end) + RETURN(-EINVAL); + + spl_hostid = (long)val; + (void)snprintf(hw_serial, 11, "%u", (val >= 0) ? val : -val); + *ppos += *lenp; + } else { + len = snprintf(str, sizeof(str), "%lx", spl_hostid); + if (*ppos >= len) + rc = 0; + else + rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n"); + + if (rc >= 0) { + *lenp = rc; + *ppos += rc; + } + } + + RETURN(rc); +} + +#ifdef DEBUG_MUTEX +static void +mutex_seq_show_headers(struct seq_file *f) +{ + seq_printf(f, "%-36s %-4s %-16s\t" + "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n", + "name", "type", "owner"); +} + +static int +mutex_seq_show(struct seq_file *f, void *p) +{ + kmutex_t *mp = p; + char t = 'X'; + int i; + + ASSERT(mp->km_magic == KM_MAGIC); + + switch (mp->km_type) { + case MUTEX_DEFAULT: t = 'D'; break; + case MUTEX_SPIN: t = 'S'; break; + case MUTEX_ADAPTIVE: t = 'A'; break; + default: + SBUG(); + } + seq_printf(f, "%-36s %c ", mp->km_name, t); + if (mp->km_owner) + seq_printf(f, "%p\t", mp->km_owner); + else + seq_printf(f, "%-16s\t", "<not held>"); + + for (i = 0; i < MUTEX_STATS_SIZE; i++) + seq_printf(f, "%d%c", mp->km_stats[i], + (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t'); + + return 0; +} + +static void * +mutex_seq_start(struct seq_file *f, loff_t *pos) +{ + struct list_head *p; + loff_t n = *pos; + ENTRY; + + spin_lock(&mutex_stats_lock); + if (!n) + mutex_seq_show_headers(f); + + p = mutex_stats_list.next; + while (n--) { + p = p->next; + if (p == &mutex_stats_list) + RETURN(NULL); + } + + RETURN(list_entry(p, kmutex_t, km_list)); +} + +static void * +mutex_seq_next(struct seq_file *f, void *p, loff_t *pos) +{ + kmutex_t *mp = p; + ENTRY; + + ++*pos; + RETURN((mp->km_list.next == &mutex_stats_list) ? + NULL : list_entry(mp->km_list.next, kmutex_t, km_list)); +} + +static void +mutex_seq_stop(struct seq_file *f, void *v) +{ + spin_unlock(&mutex_stats_lock); +} + +static struct seq_operations mutex_seq_ops = { + .show = mutex_seq_show, + .start = mutex_seq_start, + .next = mutex_seq_next, + .stop = mutex_seq_stop, +}; + +static int +proc_mutex_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &mutex_seq_ops); +} + +static struct file_operations proc_mutex_operations = { + .open = proc_mutex_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* DEBUG_MUTEX */ + +#ifdef DEBUG_KMEM +static void +slab_seq_show_headers(struct seq_file *f) +{ + seq_printf(f, "%-36s\n", "name"); +} + +static int +slab_seq_show(struct seq_file *f, void *p) +{ + spl_kmem_cache_t *skc = p; + + ASSERT(skc->skc_magic == SKC_MAGIC); + + spin_lock(&skc->skc_lock); + seq_printf(f, "%-36s ", skc->skc_name); + seq_printf(f, "%u %u %u - %lu %lu %lu - %lu %lu %lu - %lu %lu %lu\n", + (unsigned)skc->skc_obj_size, + (unsigned)skc->skc_slab_objs, + (unsigned)skc->skc_slab_size, + (long unsigned)skc->skc_slab_fail, + (long unsigned)skc->skc_slab_create, + (long unsigned)skc->skc_slab_destroy, + (long unsigned)skc->skc_slab_total, + (long unsigned)skc->skc_slab_alloc, + (long unsigned)skc->skc_slab_max, + (long unsigned)skc->skc_obj_total, + (long unsigned)skc->skc_obj_alloc, + (long unsigned)skc->skc_obj_max); + + spin_unlock(&skc->skc_lock); + + return 0; +} + +static void * +slab_seq_start(struct seq_file *f, loff_t *pos) +{ + struct list_head *p; + loff_t n = *pos; + ENTRY; + + down_read(&spl_kmem_cache_sem); + if (!n) + slab_seq_show_headers(f); + + p = spl_kmem_cache_list.next; + while (n--) { + p = p->next; + if (p == &spl_kmem_cache_list) + RETURN(NULL); + } + + RETURN(list_entry(p, spl_kmem_cache_t, skc_list)); +} + +static void * +slab_seq_next(struct seq_file *f, void *p, loff_t *pos) +{ + spl_kmem_cache_t *skc = p; + ENTRY; + + ++*pos; + RETURN((skc->skc_list.next == &spl_kmem_cache_list) ? + NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list)); +} + +static void +slab_seq_stop(struct seq_file *f, void *v) +{ + up_read(&spl_kmem_cache_sem); +} + +static struct seq_operations slab_seq_ops = { + .show = slab_seq_show, + .start = slab_seq_start, + .next = slab_seq_next, + .stop = slab_seq_stop, +}; + +static int +proc_slab_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &slab_seq_ops); +} + +static struct file_operations proc_slab_operations = { + .open = proc_slab_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* DEBUG_KMEM */ + +static struct ctl_table spl_debug_table[] = { + { + .ctl_name = CTL_DEBUG_SUBSYS, + .procname = "subsystem", + .data = &spl_debug_subsys, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = CTL_DEBUG_MASK, + .procname = "mask", + .data = &spl_debug_mask, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = CTL_DEBUG_PRINTK, + .procname = "printk", + .data = &spl_debug_printk, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = CTL_DEBUG_MB, + .procname = "mb", + .mode = 0644, + .proc_handler = &proc_debug_mb, + }, + { + .ctl_name = CTL_DEBUG_BINARY, + .procname = "binary", + .data = &spl_debug_binary, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_DEBUG_CATASTROPHE, + .procname = "catastrophe", + .data = &spl_debug_catastrophe, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_DEBUG_PANIC_ON_BUG, + .procname = "panic_on_bug", + .data = &spl_debug_panic_on_bug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_DEBUG_PATH, + .procname = "path", + .data = spl_debug_file_path, + .maxlen = sizeof(spl_debug_file_path), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .ctl_name = CTL_DEBUG_DUMP, + .procname = "dump", + .mode = 0200, + .proc_handler = &proc_dump_kernel, + }, + { .ctl_name = CTL_DEBUG_FORCE_BUG, + .procname = "force_bug", + .mode = 0200, + .proc_handler = &proc_force_bug, + }, + { + .ctl_name = CTL_CONSOLE_RATELIMIT, + .procname = "console_ratelimit", + .data = &spl_console_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_CONSOLE_MAX_DELAY_CS, + .procname = "console_max_delay_centisecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_max_delay_cs, + }, + { + .ctl_name = CTL_CONSOLE_MIN_DELAY_CS, + .procname = "console_min_delay_centisecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_min_delay_cs, + }, + { + .ctl_name = CTL_CONSOLE_BACKOFF, + .procname = "console_backoff", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_backoff, + }, + { + .ctl_name = CTL_DEBUG_STACK_SIZE, + .procname = "stack_max", + .data = &spl_debug_stack, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + {0}, +}; + +#ifdef DEBUG_MUTEX +static struct ctl_table spl_mutex_table[] = { + { + .ctl_name = CTL_MUTEX_STATS, + .procname = "stats", + .data = &mutex_stats, + .maxlen = sizeof(int) * MUTEX_STATS_SIZE, + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_MUTEX_SPIN_MAX, + .procname = "spin_max", + .data = &mutex_spin_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {0}, +}; +#endif /* DEBUG_MUTEX */ + +#ifdef DEBUG_KMEM +static struct ctl_table spl_kmem_table[] = { + { + .ctl_name = CTL_KMEM_KMEMUSED, + .procname = "kmem_used", + .data = &kmem_alloc_used, + .maxlen = sizeof(atomic64_t), + .mode = 0444, + .proc_handler = &proc_doatomic64, + }, + { + .ctl_name = CTL_KMEM_KMEMMAX, + .procname = "kmem_max", + .data = &kmem_alloc_max, + .maxlen = sizeof(unsigned long), + .extra1 = &table_min, + .extra2 = &table_max, + .mode = 0444, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_KMEM_VMEMUSED, + .procname = "vmem_used", + .data = &vmem_alloc_used, + .maxlen = sizeof(atomic64_t), + .mode = 0444, + .proc_handler = &proc_doatomic64, + }, + { + .ctl_name = CTL_KMEM_VMEMMAX, + .procname = "vmem_max", + .data = &vmem_alloc_max, + .maxlen = sizeof(unsigned long), + .extra1 = &table_min, + .extra2 = &table_max, + .mode = 0444, + .proc_handler = &proc_doulongvec_minmax, + }, + {0}, +}; +#endif /* DEBUG_KMEM */ + +#ifdef DEBUG_KSTAT +static struct ctl_table spl_kstat_table[] = { + {0}, +}; +#endif /* DEBUG_KSTAT */ + +static struct ctl_table spl_table[] = { + /* NB No .strategy entries have been provided since + * sysctl(8) prefers to go via /proc for portability. + */ + { + .ctl_name = CTL_VERSION, + .procname = "version", + .data = spl_version, + .maxlen = sizeof(spl_version), + .mode = 0444, + .proc_handler = &proc_dostring, + }, + { + .ctl_name = CTL_HOSTID, + .procname = "hostid", + .data = &spl_hostid, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dohostid, + }, + { + .ctl_name = CTL_HW_SERIAL, + .procname = "hw_serial", + .data = hw_serial, + .maxlen = sizeof(hw_serial), + .mode = 0444, + .proc_handler = &proc_dostring, + }, + { + .ctl_name = CTL_SPL_DEBUG, + .procname = "debug", + .mode = 0555, + .child = spl_debug_table, + }, +#ifdef DEBUG_MUTEX + { + .ctl_name = CTL_SPL_MUTEX, + .procname = "mutex", + .mode = 0555, + .child = spl_mutex_table, + }, +#endif +#ifdef DEBUG_KMEM + { + .ctl_name = CTL_SPL_KMEM, + .procname = "kmem", + .mode = 0555, + .child = spl_kmem_table, + }, +#endif +#ifdef DEBUG_KSTAT + { + .ctl_name = CTL_SPL_KSTAT, + .procname = "kstat", + .mode = 0555, + .child = spl_kstat_table, + }, +#endif + { 0 }, +}; + +static struct ctl_table spl_dir[] = { + { + .ctl_name = CTL_SPL, + .procname = "spl", + .mode = 0555, + .child = spl_table, + }, + { 0 } +}; + +static struct ctl_table spl_root[] = { + { + .ctl_name = CTL_KERN, + .procname = "kernel", + .mode = 0555, + .child = spl_dir, + }, + { 0 } +}; + +static int +proc_dir_entry_match(int len, const char *name, struct proc_dir_entry *de) +{ + if (de->namelen != len) + return 0; + + return !memcmp(name, de->name, len); +} + +struct proc_dir_entry * +proc_dir_entry_find(struct proc_dir_entry *root, const char *str) +{ + struct proc_dir_entry *de; + + for (de = root->subdir; de; de = de->next) + if (proc_dir_entry_match(strlen(str), str, de)) + return de; + + return NULL; +} + +int +proc_dir_entries(struct proc_dir_entry *root) +{ + struct proc_dir_entry *de; + int i = 0; + + for (de = root->subdir; de; de = de->next) + i++; + + return i; +} + +int +proc_init(void) +{ + int rc = 0; + ENTRY; + +#ifdef CONFIG_SYSCTL + spl_header = spl_register_sysctl_table(spl_root, 0); + if (spl_header == NULL) + RETURN(-EUNATCH); +#endif /* CONFIG_SYSCTL */ + +#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) + proc_spl = proc_mkdir("spl", NULL); + if (proc_spl == NULL) + GOTO(out, rc = -EUNATCH); + +#ifdef DEBUG_MUTEX + proc_spl_mutex = proc_mkdir("mutex", proc_spl); + if (proc_spl_mutex == NULL) + GOTO(out, rc = -EUNATCH); + + proc_spl_mutex_stats = create_proc_entry("stats_per", 0444, + proc_spl_mutex); + if (proc_spl_mutex_stats == NULL) + GOTO(out, rc = -EUNATCH); + + proc_spl_mutex_stats->proc_fops = &proc_mutex_operations; +#endif /* DEBUG_MUTEX */ + +#ifdef DEBUG_KMEM + proc_spl_kmem = proc_mkdir("kmem", proc_spl); + if (proc_spl_kmem == NULL) + GOTO(out, rc = -EUNATCH); + + proc_spl_kmem_slab = create_proc_entry("slab", 0444, proc_spl_kmem); + if (proc_spl_kmem_slab == NULL) + GOTO(out, rc = -EUNATCH); + + proc_spl_kmem_slab->proc_fops = &proc_slab_operations; +#endif /* DEBUG_KMEM */ + +#ifdef DEBUG_KSTAT + proc_spl_kstat = proc_mkdir("kstat", proc_spl); + if (proc_spl_kstat == NULL) + GOTO(out, rc = -EUNATCH); +#endif /* DEBUG_KSTAT */ + +out: + if (rc) { + remove_proc_entry("kstat", proc_spl); +#ifdef DEBUG_KMEM + remove_proc_entry("slab", proc_spl_kmem); +#endif + remove_proc_entry("kmem", proc_spl); +#ifdef DEBUG_MUTEX + remove_proc_entry("stats_per", proc_spl_mutex); +#endif + remove_proc_entry("mutex", proc_spl); + remove_proc_entry("spl", NULL); +#ifdef CONFIG_SYSCTL + spl_unregister_sysctl_table(spl_header); +#endif /* CONFIG_SYSCTL */ + } +#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ + + RETURN(rc); +} + +void +proc_fini(void) +{ + ENTRY; + +#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) + remove_proc_entry("kstat", proc_spl); +#ifdef DEBUG_KMEM + remove_proc_entry("slab", proc_spl_kmem); +#endif + remove_proc_entry("kmem", proc_spl); +#ifdef DEBUG_MUTEX + remove_proc_entry("stats_per", proc_spl_mutex); +#endif + remove_proc_entry("mutex", proc_spl); + remove_proc_entry("spl", NULL); +#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ + +#ifdef CONFIG_SYSCTL + ASSERT(spl_header != NULL); + spl_unregister_sysctl_table(spl_header); +#endif /* CONFIG_SYSCTL */ + + EXIT; +} diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c new file mode 100644 index 000000000..07fc2aae4 --- /dev/null +++ b/module/spl/spl-rwlock.c @@ -0,0 +1,361 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/rwlock.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_RWLOCK + +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + unsigned int flags; +#define RWSEM_WAITING_FOR_READ 0x00000001 +#define RWSEM_WAITING_FOR_WRITE 0x00000002 +}; +/* wake a single writer */ +static struct rw_semaphore * +__rwsem_wake_one_writer_locked(struct rw_semaphore *sem) +{ + struct rwsem_waiter *waiter; + struct task_struct *tsk; + + sem->activity = -1; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + list_del(&waiter->list); + + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + return sem; +} + +/* release a read lock on the semaphore */ +static void +__up_read_locked(struct rw_semaphore *sem) +{ + if (--sem->activity == 0 && !list_empty(&sem->wait_list)) + (void)__rwsem_wake_one_writer_locked(sem); +} + +/* trylock for writing -- returns 1 if successful, 0 if contention */ +static int +__down_write_trylock_locked(struct rw_semaphore *sem) +{ + int ret = 0; + + if (sem->activity == 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity = -1; + ret = 1; + } + + return ret; +} +#endif + +void +__rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg) +{ + int flags = KM_SLEEP; + + ASSERT(rwlp); + ASSERT(name); + ASSERT(type == RW_DEFAULT); /* XXX no irq handler use */ + ASSERT(arg == NULL); /* XXX no irq handler use */ + + rwlp->rw_magic = RW_MAGIC; + rwlp->rw_owner = NULL; + rwlp->rw_name = NULL; + rwlp->rw_name_size = strlen(name) + 1; + + /* We may be called when there is a non-zero preempt_count or + * interrupts are disabled is which case we must not sleep. + */ + if (current_thread_info()->preempt_count || irqs_disabled()) + flags = KM_NOSLEEP; + + rwlp->rw_name = kmem_alloc(rwlp->rw_name_size, flags); + if (rwlp->rw_name == NULL) + return; + + init_rwsem(&rwlp->rw_sem); + strcpy(rwlp->rw_name, name); +} +EXPORT_SYMBOL(__rw_init); + +void +__rw_destroy(krwlock_t *rwlp) +{ + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + ASSERT(rwlp->rw_owner == NULL); + spin_lock(&rwlp->rw_sem.wait_lock); + ASSERT(list_empty(&rwlp->rw_sem.wait_list)); + spin_unlock(&rwlp->rw_sem.wait_lock); + + kmem_free(rwlp->rw_name, rwlp->rw_name_size); + + memset(rwlp, RW_POISON, sizeof(krwlock_t)); +} +EXPORT_SYMBOL(__rw_destroy); + +/* Return 0 if the lock could not be obtained without blocking. */ +int +__rw_tryenter(krwlock_t *rwlp, krw_t rw) +{ + int rc = 0; + ENTRY; + + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + + switch (rw) { + /* these functions return 1 if success, 0 if contention */ + case RW_READER: + /* Here the Solaris code would return 0 + * if there were any write waiters. Specifically + * thinking about the case where readers may have + * the lock and we would also allow this thread + * to grab the read lock with a writer waiting in the + * queue. This doesn't seem like a correctness + * issue, so just call down_read_trylock() + * for the test. We may have to revisit this if + * it becomes an issue */ + rc = down_read_trylock(&rwlp->rw_sem); + break; + case RW_WRITER: + rc = down_write_trylock(&rwlp->rw_sem); + if (rc) { + /* there better not be anyone else + * holding the write lock here */ + ASSERT(rwlp->rw_owner == NULL); + rwlp->rw_owner = current; + } + break; + default: + SBUG(); + } + + RETURN(rc); +} +EXPORT_SYMBOL(__rw_tryenter); + +void +__rw_enter(krwlock_t *rwlp, krw_t rw) +{ + ENTRY; + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + + switch (rw) { + case RW_READER: + /* Here the Solaris code would block + * if there were any write waiters. Specifically + * thinking about the case where readers may have + * the lock and we would also allow this thread + * to grab the read lock with a writer waiting in the + * queue. This doesn't seem like a correctness + * issue, so just call down_read() + * for the test. We may have to revisit this if + * it becomes an issue */ + down_read(&rwlp->rw_sem); + break; + case RW_WRITER: + down_write(&rwlp->rw_sem); + + /* there better not be anyone else + * holding the write lock here */ + ASSERT(rwlp->rw_owner == NULL); + rwlp->rw_owner = current; + break; + default: + SBUG(); + } + EXIT; +} +EXPORT_SYMBOL(__rw_enter); + +void +__rw_exit(krwlock_t *rwlp) +{ + ENTRY; + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + + /* rw_owner is held by current + * thread iff it is a writer */ + if (rwlp->rw_owner == current) { + rwlp->rw_owner = NULL; + up_write(&rwlp->rw_sem); + } else { + up_read(&rwlp->rw_sem); + } + EXIT; +} +EXPORT_SYMBOL(__rw_exit); + +void +__rw_downgrade(krwlock_t *rwlp) +{ + ENTRY; + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + ASSERT(rwlp->rw_owner == current); + + rwlp->rw_owner = NULL; + downgrade_write(&rwlp->rw_sem); + EXIT; +} +EXPORT_SYMBOL(__rw_downgrade); + +/* Return 0 if unable to perform the upgrade. + * Might be wise to fix the caller + * to acquire the write lock first? + */ +int +__rw_tryupgrade(krwlock_t *rwlp) +{ + int rc = 0; + ENTRY; + + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + + spin_lock(&rwlp->rw_sem.wait_lock); + + /* Check if there is anyone waiting for the + * lock. If there is, then we know we should + * not try to upgrade the lock */ + if (!list_empty(&rwlp->rw_sem.wait_list)) { + spin_unlock(&rwlp->rw_sem.wait_lock); + RETURN(0); + } +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + /* Note that activity is protected by + * the wait_lock. Don't try to upgrade + * if there are multiple readers currently + * holding the lock */ + if (rwlp->rw_sem.activity > 1) { +#else + /* Don't try to upgrade + * if there are multiple readers currently + * holding the lock */ + if ((rwlp->rw_sem.count & RWSEM_ACTIVE_MASK) > 1) { +#endif + spin_unlock(&rwlp->rw_sem.wait_lock); + RETURN(0); + } + +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + /* Here it should be safe to drop the + * read lock and reacquire it for writing since + * we know there are no waiters */ + __up_read_locked(&rwlp->rw_sem); + + /* returns 1 if success, 0 if contention */ + rc = __down_write_trylock_locked(&rwlp->rw_sem); +#else + /* Here it should be safe to drop the + * read lock and reacquire it for writing since + * we know there are no waiters */ + up_read(&rwlp->rw_sem); + + /* returns 1 if success, 0 if contention */ + rc = down_write_trylock(&rwlp->rw_sem); +#endif + + /* Check if upgrade failed. Should not ever happen + * if we got to this point */ + ASSERT(rc); + ASSERT(rwlp->rw_owner == NULL); + rwlp->rw_owner = current; + spin_unlock(&rwlp->rw_sem.wait_lock); + + RETURN(1); +} +EXPORT_SYMBOL(__rw_tryupgrade); + +kthread_t * +__rw_owner(krwlock_t *rwlp) +{ + ENTRY; + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + RETURN(rwlp->rw_owner); +} +EXPORT_SYMBOL(__rw_owner); + +int +__rw_read_held(krwlock_t *rwlp) +{ + ENTRY; + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + RETURN(__rw_lock_held(rwlp) && rwlp->rw_owner == NULL); +} +EXPORT_SYMBOL(__rw_read_held); + +int +__rw_write_held(krwlock_t *rwlp) +{ + ENTRY; + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + RETURN(rwlp->rw_owner == current); +} +EXPORT_SYMBOL(__rw_write_held); + +int +__rw_lock_held(krwlock_t *rwlp) +{ + int rc = 0; + ENTRY; + + ASSERT(rwlp); + ASSERT(rwlp->rw_magic == RW_MAGIC); + + spin_lock_irq(&(rwlp->rw_sem.wait_lock)); +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + if (rwlp->rw_sem.activity != 0) { +#else + if (rwlp->rw_sem.count != 0) { +#endif + rc = 1; + } + + spin_unlock_irq(&(rwlp->rw_sem.wait_lock)); + + RETURN(rc); +} +EXPORT_SYMBOL(__rw_lock_held); diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c new file mode 100644 index 000000000..799b54839 --- /dev/null +++ b/module/spl/spl-taskq.c @@ -0,0 +1,491 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/taskq.h> +#include <sys/kmem.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_TASKQ + +/* Global system-wide dynamic task queue available for all consumers */ +taskq_t *system_taskq; +EXPORT_SYMBOL(system_taskq); + +typedef struct spl_task { + spinlock_t t_lock; + struct list_head t_list; + taskqid_t t_id; + task_func_t *t_func; + void *t_arg; +} spl_task_t; + +/* NOTE: Must be called with tq->tq_lock held, returns a list_t which + * is not attached to the free, work, or pending taskq lists. + */ +static spl_task_t * +task_alloc(taskq_t *tq, uint_t flags) +{ + spl_task_t *t; + int count = 0; + ENTRY; + + ASSERT(tq); + ASSERT(flags & (TQ_SLEEP | TQ_NOSLEEP)); /* One set */ + ASSERT(!((flags & TQ_SLEEP) && (flags & TQ_NOSLEEP))); /* Not both */ + ASSERT(spin_is_locked(&tq->tq_lock)); +retry: + /* Aquire spl_task_t's from free list if available */ + if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) { + t = list_entry(tq->tq_free_list.next, spl_task_t, t_list); + list_del_init(&t->t_list); + RETURN(t); + } + + /* Free list is empty and memory allocs are prohibited */ + if (flags & TQ_NOALLOC) + RETURN(NULL); + + /* Hit maximum spl_task_t pool size */ + if (tq->tq_nalloc >= tq->tq_maxalloc) { + if (flags & TQ_NOSLEEP) + RETURN(NULL); + + /* Sleep periodically polling the free list for an available + * spl_task_t. If a full second passes and we have not found + * one gives up and return a NULL to the caller. */ + if (flags & TQ_SLEEP) { + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + schedule_timeout(HZ / 100); + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + if (count < 100) + GOTO(retry, count++); + + RETURN(NULL); + } + + /* Unreachable, TQ_SLEEP xor TQ_NOSLEEP */ + SBUG(); + } + + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + t = kmem_alloc(sizeof(spl_task_t), flags & (TQ_SLEEP | TQ_NOSLEEP)); + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + + if (t) { + spin_lock_init(&t->t_lock); + INIT_LIST_HEAD(&t->t_list); + t->t_id = 0; + t->t_func = NULL; + t->t_arg = NULL; + tq->tq_nalloc++; + } + + RETURN(t); +} + +/* NOTE: Must be called with tq->tq_lock held, expectes the spl_task_t + * to already be removed from the free, work, or pending taskq lists. + */ +static void +task_free(taskq_t *tq, spl_task_t *t) +{ + ENTRY; + + ASSERT(tq); + ASSERT(t); + ASSERT(spin_is_locked(&tq->tq_lock)); + ASSERT(list_empty(&t->t_list)); + + kmem_free(t, sizeof(spl_task_t)); + tq->tq_nalloc--; + + EXIT; +} + +/* NOTE: Must be called with tq->tq_lock held, either destroyes the + * spl_task_t if too many exist or moves it to the free list for later use. + */ +static void +task_done(taskq_t *tq, spl_task_t *t) +{ + ENTRY; + ASSERT(tq); + ASSERT(t); + ASSERT(spin_is_locked(&tq->tq_lock)); + + list_del_init(&t->t_list); + + if (tq->tq_nalloc <= tq->tq_minalloc) { + t->t_id = 0; + t->t_func = NULL; + t->t_arg = NULL; + list_add_tail(&t->t_list, &tq->tq_free_list); + } else { + task_free(tq, t); + } + + EXIT; +} + +/* Taskqid's are handed out in a monotonically increasing fashion per + * taskq_t. We don't handle taskqid wrapping yet, but fortuntely it isi + * a 64-bit value so this is probably never going to happen. The lowest + * pending taskqid is stored in the taskq_t to make it easy for any + * taskq_wait()'ers to know if the tasks they're waiting for have + * completed. Unfortunately, tq_task_lowest is kept up to date is + * a pretty brain dead way, something more clever should be done. + */ +static int +taskq_wait_check(taskq_t *tq, taskqid_t id) +{ + RETURN(tq->tq_lowest_id >= id); +} + +/* Expected to wait for all previously scheduled tasks to complete. We do + * not need to wait for tasked scheduled after this call to complete. In + * otherwords we do not need to drain the entire taskq. */ +void +__taskq_wait_id(taskq_t *tq, taskqid_t id) +{ + ENTRY; + ASSERT(tq); + + wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id)); + + EXIT; +} +EXPORT_SYMBOL(__taskq_wait_id); + +void +__taskq_wait(taskq_t *tq) +{ + taskqid_t id; + ENTRY; + ASSERT(tq); + + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + id = tq->tq_next_id; + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + + __taskq_wait_id(tq, id); + + EXIT; + +} +EXPORT_SYMBOL(__taskq_wait); + +int +__taskq_member(taskq_t *tq, void *t) +{ + int i; + ENTRY; + + ASSERT(tq); + ASSERT(t); + + for (i = 0; i < tq->tq_nthreads; i++) + if (tq->tq_threads[i] == (struct task_struct *)t) + RETURN(1); + + RETURN(0); +} +EXPORT_SYMBOL(__taskq_member); + +taskqid_t +__taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) +{ + spl_task_t *t; + taskqid_t rc = 0; + ENTRY; + + ASSERT(tq); + ASSERT(func); + if (unlikely(in_atomic() && (flags & TQ_SLEEP))) { + CERROR("May schedule while atomic: %s/0x%08x/%d\n", + current->comm, preempt_count(), current->pid); + SBUG(); + } + + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + + /* Taskq being destroyed and all tasks drained */ + if (!(tq->tq_flags & TQ_ACTIVE)) + GOTO(out, rc = 0); + + /* Do not queue the task unless there is idle thread for it */ + ASSERT(tq->tq_nactive <= tq->tq_nthreads); + if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) + GOTO(out, rc = 0); + + if ((t = task_alloc(tq, flags)) == NULL) + GOTO(out, rc = 0); + + spin_lock(&t->t_lock); + list_add_tail(&t->t_list, &tq->tq_pend_list); + t->t_id = rc = tq->tq_next_id; + tq->tq_next_id++; + t->t_func = func; + t->t_arg = arg; + spin_unlock(&t->t_lock); + + wake_up(&tq->tq_work_waitq); +out: + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + RETURN(rc); +} +EXPORT_SYMBOL(__taskq_dispatch); + +/* NOTE: Must be called with tq->tq_lock held */ +static taskqid_t +taskq_lowest_id(taskq_t *tq) +{ + taskqid_t lowest_id = ~0; + spl_task_t *t; + ENTRY; + + ASSERT(tq); + ASSERT(spin_is_locked(&tq->tq_lock)); + + list_for_each_entry(t, &tq->tq_pend_list, t_list) + if (t->t_id < lowest_id) + lowest_id = t->t_id; + + list_for_each_entry(t, &tq->tq_work_list, t_list) + if (t->t_id < lowest_id) + lowest_id = t->t_id; + + RETURN(lowest_id); +} + +static int +taskq_thread(void *args) +{ + DECLARE_WAITQUEUE(wait, current); + sigset_t blocked; + taskqid_t id; + taskq_t *tq = args; + spl_task_t *t; + ENTRY; + + ASSERT(tq); + current->flags |= PF_NOFREEZE; + + sigfillset(&blocked); + sigprocmask(SIG_BLOCK, &blocked, NULL); + flush_signals(current); + + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + tq->tq_nthreads++; + wake_up(&tq->tq_wait_waitq); + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { + + add_wait_queue(&tq->tq_work_waitq, &wait); + if (list_empty(&tq->tq_pend_list)) { + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + schedule(); + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + } else { + __set_current_state(TASK_RUNNING); + } + + remove_wait_queue(&tq->tq_work_waitq, &wait); + if (!list_empty(&tq->tq_pend_list)) { + t = list_entry(tq->tq_pend_list.next, spl_task_t, t_list); + list_del_init(&t->t_list); + list_add_tail(&t->t_list, &tq->tq_work_list); + tq->tq_nactive++; + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + + /* Perform the requested task */ + t->t_func(t->t_arg); + + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + tq->tq_nactive--; + id = t->t_id; + task_done(tq, t); + + /* Update the lowest remaining taskqid yet to run */ + if (tq->tq_lowest_id == id) { + tq->tq_lowest_id = taskq_lowest_id(tq); + ASSERT(tq->tq_lowest_id > id); + } + + wake_up_all(&tq->tq_wait_waitq); + } + + set_current_state(TASK_INTERRUPTIBLE); + + } + + __set_current_state(TASK_RUNNING); + tq->tq_nthreads--; + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + + RETURN(0); +} + +taskq_t * +__taskq_create(const char *name, int nthreads, pri_t pri, + int minalloc, int maxalloc, uint_t flags) +{ + taskq_t *tq; + struct task_struct *t; + int rc = 0, i, j = 0; + ENTRY; + + ASSERT(name != NULL); + ASSERT(pri <= maxclsyspri); + ASSERT(minalloc >= 0); + ASSERT(maxalloc <= INT_MAX); + ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */ + + tq = kmem_alloc(sizeof(*tq), KM_SLEEP); + if (tq == NULL) + RETURN(NULL); + + tq->tq_threads = kmem_alloc(nthreads * sizeof(t), KM_SLEEP); + if (tq->tq_threads == NULL) { + kmem_free(tq, sizeof(*tq)); + RETURN(NULL); + } + + spin_lock_init(&tq->tq_lock); + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + tq->tq_name = name; + tq->tq_nactive = 0; + tq->tq_nthreads = 0; + tq->tq_pri = pri; + tq->tq_minalloc = minalloc; + tq->tq_maxalloc = maxalloc; + tq->tq_nalloc = 0; + tq->tq_flags = (flags | TQ_ACTIVE); + tq->tq_next_id = 1; + tq->tq_lowest_id = 1; + INIT_LIST_HEAD(&tq->tq_free_list); + INIT_LIST_HEAD(&tq->tq_work_list); + INIT_LIST_HEAD(&tq->tq_pend_list); + init_waitqueue_head(&tq->tq_work_waitq); + init_waitqueue_head(&tq->tq_wait_waitq); + + if (flags & TASKQ_PREPOPULATE) + for (i = 0; i < minalloc; i++) + task_done(tq, task_alloc(tq, TQ_SLEEP | TQ_NEW)); + + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + + for (i = 0; i < nthreads; i++) { + t = kthread_create(taskq_thread, tq, "%s/%d", name, i); + if (t) { + tq->tq_threads[i] = t; + kthread_bind(t, i % num_online_cpus()); + set_user_nice(t, PRIO_TO_NICE(pri)); + wake_up_process(t); + j++; + } else { + tq->tq_threads[i] = NULL; + rc = 1; + } + } + + /* Wait for all threads to be started before potential destroy */ + wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j); + + if (rc) { + __taskq_destroy(tq); + tq = NULL; + } + + RETURN(tq); +} +EXPORT_SYMBOL(__taskq_create); + +void +__taskq_destroy(taskq_t *tq) +{ + spl_task_t *t; + int i, nthreads; + ENTRY; + + ASSERT(tq); + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + tq->tq_flags &= ~TQ_ACTIVE; + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + + /* TQ_ACTIVE cleared prevents new tasks being added to pending */ + __taskq_wait(tq); + + nthreads = tq->tq_nthreads; + for (i = 0; i < nthreads; i++) + if (tq->tq_threads[i]) + kthread_stop(tq->tq_threads[i]); + + spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags); + + while (!list_empty(&tq->tq_free_list)) { + t = list_entry(tq->tq_free_list.next, spl_task_t, t_list); + list_del_init(&t->t_list); + task_free(tq, t); + } + + ASSERT(tq->tq_nthreads == 0); + ASSERT(tq->tq_nalloc == 0); + ASSERT(list_empty(&tq->tq_free_list)); + ASSERT(list_empty(&tq->tq_work_list)); + ASSERT(list_empty(&tq->tq_pend_list)); + + spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags); + kmem_free(tq->tq_threads, nthreads * sizeof(spl_task_t *)); + kmem_free(tq, sizeof(taskq_t)); + + EXIT; +} +EXPORT_SYMBOL(__taskq_destroy); + +int +spl_taskq_init(void) +{ + ENTRY; + + system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, + TASKQ_PREPOPULATE); + if (system_taskq == NULL) + RETURN(1); + + RETURN(0); +} + +void +spl_taskq_fini(void) +{ + ENTRY; + taskq_destroy(system_taskq); + EXIT; +} diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c new file mode 100644 index 000000000..953c5ce7f --- /dev/null +++ b/module/spl/spl-thread.c @@ -0,0 +1,135 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/thread.h> +#include <sys/kmem.h> + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_THREAD + +/* + * Thread interfaces + */ +typedef struct thread_priv_s { + unsigned long tp_magic; /* Magic */ + int tp_name_size; /* Name size */ + char *tp_name; /* Name (without _thread suffix) */ + void (*tp_func)(void *); /* Registered function */ + void *tp_args; /* Args to be passed to function */ + size_t tp_len; /* Len to be passed to function */ + int tp_state; /* State to start thread at */ + pri_t tp_pri; /* Priority to start threat at */ +} thread_priv_t; + +static int +thread_generic_wrapper(void *arg) +{ + thread_priv_t *tp = (thread_priv_t *)arg; + void (*func)(void *); + void *args; + + ASSERT(tp->tp_magic == TP_MAGIC); + func = tp->tp_func; + args = tp->tp_args; + set_current_state(tp->tp_state); + set_user_nice((kthread_t *)get_current(), PRIO_TO_NICE(tp->tp_pri)); + kmem_free(tp->tp_name, tp->tp_name_size); + kmem_free(tp, sizeof(thread_priv_t)); + + if (func) + func(args); + + return 0; +} + +void +__thread_exit(void) +{ + ENTRY; + EXIT; + complete_and_exit(NULL, 0); + /* Unreachable */ +} +EXPORT_SYMBOL(__thread_exit); + +/* thread_create() may block forever if it cannot create a thread or + * allocate memory. This is preferable to returning a NULL which Solaris + * style callers likely never check for... since it can't fail. */ +kthread_t * +__thread_create(caddr_t stk, size_t stksize, thread_func_t func, + const char *name, void *args, size_t len, int *pp, + int state, pri_t pri) +{ + thread_priv_t *tp; + struct task_struct *tsk; + char *p; + ENTRY; + + /* Option pp is simply ignored */ + /* Variable stack size unsupported */ + ASSERT(stk == NULL); + + tp = kmem_alloc(sizeof(thread_priv_t), KM_SLEEP); + if (tp == NULL) + RETURN(NULL); + + tp->tp_magic = TP_MAGIC; + tp->tp_name_size = strlen(name) + 1; + + tp->tp_name = kmem_alloc(tp->tp_name_size, KM_SLEEP); + if (tp->tp_name == NULL) { + kmem_free(tp, sizeof(thread_priv_t)); + RETURN(NULL); + } + + strncpy(tp->tp_name, name, tp->tp_name_size); + + /* Strip trailing "_thread" from passed name which will be the func + * name since the exposed API has no parameter for passing a name. + */ + p = strstr(tp->tp_name, "_thread"); + if (p) + p[0] = '\0'; + + tp->tp_func = func; + tp->tp_args = args; + tp->tp_len = len; + tp->tp_state = state; + tp->tp_pri = pri; + + tsk = kthread_create(thread_generic_wrapper, (void *)tp, tp->tp_name); + if (IS_ERR(tsk)) { + CERROR("Failed to create thread: %ld\n", PTR_ERR(tsk)); + RETURN(NULL); + } + + wake_up_process(tsk); + RETURN((kthread_t *)tsk); +} +EXPORT_SYMBOL(__thread_create); diff --git a/module/spl/spl-time.c b/module/spl/spl-time.c new file mode 100644 index 000000000..88722afe1 --- /dev/null +++ b/module/spl/spl-time.c @@ -0,0 +1,92 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/sysmacros.h> +#include <sys/time.h> + +#ifdef HAVE_MONOTONIC_CLOCK +extern unsigned long long monotonic_clock(void); +#endif + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_TIME + +void +__gethrestime(timestruc_t *ts) +{ + struct timeval tv; + + do_gettimeofday(&tv); + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC; +} +EXPORT_SYMBOL(__gethrestime); + +/* Use monotonic_clock() by default. It's faster and is available on older + * kernels, but few architectures have them, so we must fallback to + * do_posix_clock_monotonic_gettime(). + */ +hrtime_t +__gethrtime(void) { +#ifdef HAVE_MONOTONIC_CLOCK + unsigned long long res = monotonic_clock(); + + /* Deal with signed/unsigned mismatch */ + return (hrtime_t)(res & ~(1ULL << 63)); +#else + int64_t j = get_jiffies_64(); + + return j * NSEC_PER_SEC / HZ; +#endif +} +EXPORT_SYMBOL(__gethrtime); + +/* set_normalized_timespec() API changes + * 2.6.0 - 2.6.15: Inline function provided by linux/time.h + * 2.6.16 - 2.6.25: Function prototype defined but not exported + * 2.6.26 - 2.6.x: Function defined and exported + */ +#if !defined(HAVE_SET_NORMALIZED_TIMESPEC_INLINE) && \ + !defined(HAVE_SET_NORMALIZED_TIMESPEC_EXPORT) +void +set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} +EXPORT_SYMBOL(set_normalized_timespec); +#endif diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c new file mode 100644 index 000000000..b19d9f1de --- /dev/null +++ b/module/spl/spl-vnode.c @@ -0,0 +1,678 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <sys/sysmacros.h> +#include <sys/vnode.h> + + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_VNODE + +void *rootdir = NULL; +EXPORT_SYMBOL(rootdir); + +static spl_kmem_cache_t *vn_cache; +static spl_kmem_cache_t *vn_file_cache; + +static spinlock_t vn_file_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(vn_file_list); + +static vtype_t +vn_get_sol_type(umode_t mode) +{ + if (S_ISREG(mode)) + return VREG; + + if (S_ISDIR(mode)) + return VDIR; + + if (S_ISCHR(mode)) + return VCHR; + + if (S_ISBLK(mode)) + return VBLK; + + if (S_ISFIFO(mode)) + return VFIFO; + + if (S_ISLNK(mode)) + return VLNK; + + if (S_ISSOCK(mode)) + return VSOCK; + + if (S_ISCHR(mode)) + return VCHR; + + return VNON; +} /* vn_get_sol_type() */ + +vnode_t * +vn_alloc(int flag) +{ + vnode_t *vp; + ENTRY; + + vp = kmem_cache_alloc(vn_cache, flag); + if (vp != NULL) { + vp->v_file = NULL; + vp->v_type = 0; + } + + RETURN(vp); +} /* vn_alloc() */ +EXPORT_SYMBOL(vn_alloc); + +void +vn_free(vnode_t *vp) +{ + ENTRY; + kmem_cache_free(vn_cache, vp); + EXIT; +} /* vn_free() */ +EXPORT_SYMBOL(vn_free); + +int +vn_open(const char *path, uio_seg_t seg, int flags, int mode, + vnode_t **vpp, int x1, void *x2) +{ + struct file *fp; + struct kstat stat; + int rc, saved_umask = 0; + vnode_t *vp; + ENTRY; + + ASSERT(flags & (FWRITE | FREAD)); + ASSERT(seg == UIO_SYSSPACE); + ASSERT(vpp); + *vpp = NULL; + + if (!(flags & FCREAT) && (flags & FWRITE)) + flags |= FEXCL; + + /* Note for filp_open() the two low bits must be remapped to mean: + * 01 - read-only -> 00 read-only + * 10 - write-only -> 01 write-only + * 11 - read-write -> 10 read-write + */ + flags--; + + if (flags & FCREAT) + saved_umask = xchg(¤t->fs->umask, 0); + + fp = filp_open(path, flags, mode); + + if (flags & FCREAT) + (void)xchg(¤t->fs->umask, saved_umask); + + if (IS_ERR(fp)) + RETURN(-PTR_ERR(fp)); + + rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat); + if (rc) { + filp_close(fp, 0); + RETURN(-rc); + } + + vp = vn_alloc(KM_SLEEP); + if (!vp) { + filp_close(fp, 0); + RETURN(ENOMEM); + } + + mutex_enter(&vp->v_lock); + vp->v_type = vn_get_sol_type(stat.mode); + vp->v_file = fp; + *vpp = vp; + mutex_exit(&vp->v_lock); + + RETURN(0); +} /* vn_open() */ +EXPORT_SYMBOL(vn_open); + +int +vn_openat(const char *path, uio_seg_t seg, int flags, int mode, + vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd) +{ + char *realpath; + int len, rc; + ENTRY; + + ASSERT(vp == rootdir); + + len = strlen(path) + 2; + realpath = kmalloc(len, GFP_KERNEL); + if (!realpath) + RETURN(ENOMEM); + + (void)snprintf(realpath, len, "/%s", path); + rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2); + kfree(realpath); + + RETURN(rc); +} /* vn_openat() */ +EXPORT_SYMBOL(vn_openat); + +int +vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off, + uio_seg_t seg, int x1, rlim64_t x2, void *x3, ssize_t *residp) +{ + loff_t offset; + mm_segment_t saved_fs; + struct file *fp; + int rc; + ENTRY; + + ASSERT(uio == UIO_WRITE || uio == UIO_READ); + ASSERT(vp); + ASSERT(vp->v_file); + ASSERT(seg == UIO_SYSSPACE); + ASSERT(x1 == 0); + ASSERT(x2 == RLIM64_INFINITY); + + offset = off; + fp = vp->v_file; + + /* Writable user data segment must be briefly increased for this + * process so we can use the user space read call paths to write + * in to memory allocated by the kernel. */ + saved_fs = get_fs(); + set_fs(get_ds()); + + if (uio & UIO_WRITE) + rc = vfs_write(fp, addr, len, &offset); + else + rc = vfs_read(fp, addr, len, &offset); + + set_fs(saved_fs); + + if (rc < 0) + RETURN(-rc); + + if (residp) { + *residp = len - rc; + } else { + if (rc != len) + RETURN(EIO); + } + + RETURN(0); +} /* vn_rdwr() */ +EXPORT_SYMBOL(vn_rdwr); + +int +vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4) +{ + int rc; + ENTRY; + + ASSERT(vp); + ASSERT(vp->v_file); + + rc = filp_close(vp->v_file, 0); + vn_free(vp); + + RETURN(-rc); +} /* vn_close() */ +EXPORT_SYMBOL(vn_close); + +/* vn_seek() does not actually seek it only performs bounds checking on the + * proposed seek. We perform minimal checking and allow vn_rdwr() to catch + * anything more serious. */ +int +vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) +{ + return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); +} +EXPORT_SYMBOL(vn_seek); + +static struct dentry * +vn_lookup_hash(struct nameidata *nd) +{ + return lookup_one_len(nd->last.name, nd->nd_dentry, nd->last.len); +} /* lookup_hash() */ + +static void +vn_path_release(struct nameidata *nd) +{ + dput(nd->nd_dentry); + mntput(nd->nd_mnt); +} + +/* Modified do_unlinkat() from linux/fs/namei.c, only uses exported symbols */ +int +vn_remove(const char *path, uio_seg_t seg, int flags) +{ + struct dentry *dentry; + struct nameidata nd; + struct inode *inode = NULL; + int rc = 0; + ENTRY; + + ASSERT(seg == UIO_SYSSPACE); + ASSERT(flags == RMFILE); + + rc = path_lookup(path, LOOKUP_PARENT, &nd); + if (rc) + GOTO(exit, rc); + + rc = -EISDIR; + if (nd.last_type != LAST_NORM) + GOTO(exit1, rc); + +#ifdef HAVE_INODE_I_MUTEX + mutex_lock_nested(&nd.nd_dentry->d_inode->i_mutex, I_MUTEX_PARENT); +#else + down(&nd.nd_dentry->d_inode->i_sem); +#endif + dentry = vn_lookup_hash(&nd); + rc = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + /* Why not before? Because we want correct rc value */ + if (nd.last.name[nd.last.len]) + GOTO(slashes, rc); + + inode = dentry->d_inode; + if (inode) + atomic_inc(&inode->i_count); + rc = vfs_unlink(nd.nd_dentry->d_inode, dentry); +exit2: + dput(dentry); + } +#ifdef HAVE_INODE_I_MUTEX + mutex_unlock(&nd.nd_dentry->d_inode->i_mutex); +#else + up(&nd.nd_dentry->d_inode->i_sem); +#endif + if (inode) + iput(inode); /* truncate the inode here */ +exit1: + vn_path_release(&nd); +exit: + RETURN(-rc); + +slashes: + rc = !dentry->d_inode ? -ENOENT : + S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; + GOTO(exit2, rc); +} /* vn_remove() */ +EXPORT_SYMBOL(vn_remove); + +/* Modified do_rename() from linux/fs/namei.c, only uses exported symbols */ +int +vn_rename(const char *oldname, const char *newname, int x1) +{ + struct dentry * old_dir, * new_dir; + struct dentry * old_dentry, *new_dentry; + struct dentry * trap; + struct nameidata oldnd, newnd; + int rc = 0; + ENTRY; + + rc = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + if (rc) + GOTO(exit, rc); + + rc = path_lookup(newname, LOOKUP_PARENT, &newnd); + if (rc) + GOTO(exit1, rc); + + rc = -EXDEV; + if (oldnd.nd_mnt != newnd.nd_mnt) + GOTO(exit2, rc); + + old_dir = oldnd.nd_dentry; + rc = -EBUSY; + if (oldnd.last_type != LAST_NORM) + GOTO(exit2, rc); + + new_dir = newnd.nd_dentry; + if (newnd.last_type != LAST_NORM) + GOTO(exit2, rc); + + trap = lock_rename(new_dir, old_dir); + + old_dentry = vn_lookup_hash(&oldnd); + + rc = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) + GOTO(exit3, rc); + + /* source must exist */ + rc = -ENOENT; + if (!old_dentry->d_inode) + GOTO(exit4, rc); + + /* unless the source is a directory trailing slashes give -ENOTDIR */ + if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + rc = -ENOTDIR; + if (oldnd.last.name[oldnd.last.len]) + GOTO(exit4, rc); + if (newnd.last.name[newnd.last.len]) + GOTO(exit4, rc); + } + + /* source should not be ancestor of target */ + rc = -EINVAL; + if (old_dentry == trap) + GOTO(exit4, rc); + + new_dentry = vn_lookup_hash(&newnd); + rc = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + GOTO(exit4, rc); + + /* target should not be an ancestor of source */ + rc = -ENOTEMPTY; + if (new_dentry == trap) + GOTO(exit5, rc); + + rc = vfs_rename(old_dir->d_inode, old_dentry, + new_dir->d_inode, new_dentry); +exit5: + dput(new_dentry); +exit4: + dput(old_dentry); +exit3: + unlock_rename(new_dir, old_dir); +exit2: + vn_path_release(&newnd); +exit1: + vn_path_release(&oldnd); +exit: + RETURN(-rc); +} +EXPORT_SYMBOL(vn_rename); + +int +vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4) +{ + struct file *fp; + struct kstat stat; + int rc; + ENTRY; + + ASSERT(vp); + ASSERT(vp->v_file); + ASSERT(vap); + + fp = vp->v_file; + + rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat); + if (rc) + RETURN(-rc); + + vap->va_type = vn_get_sol_type(stat.mode); + vap->va_mode = stat.mode; + vap->va_uid = stat.uid; + vap->va_gid = stat.gid; + vap->va_fsid = 0; + vap->va_nodeid = stat.ino; + vap->va_nlink = stat.nlink; + vap->va_size = stat.size; + vap->va_blocksize = stat.blksize; + vap->va_atime.tv_sec = stat.atime.tv_sec; + vap->va_atime.tv_usec = stat.atime.tv_nsec / NSEC_PER_USEC; + vap->va_mtime.tv_sec = stat.mtime.tv_sec; + vap->va_mtime.tv_usec = stat.mtime.tv_nsec / NSEC_PER_USEC; + vap->va_ctime.tv_sec = stat.ctime.tv_sec; + vap->va_ctime.tv_usec = stat.ctime.tv_nsec / NSEC_PER_USEC; + vap->va_rdev = stat.rdev; + vap->va_blocks = stat.blocks; + + RETURN(0); +} +EXPORT_SYMBOL(vn_getattr); + +int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4) +{ + int datasync = 0; + ENTRY; + + ASSERT(vp); + ASSERT(vp->v_file); + + if (flags & FDSYNC) + datasync = 1; + + RETURN(-file_fsync(vp->v_file, vp->v_file->f_dentry, datasync)); +} /* vn_fsync() */ +EXPORT_SYMBOL(vn_fsync); + +/* Function must be called while holding the vn_file_lock */ +static file_t * +file_find(int fd) +{ + file_t *fp; + + ASSERT(spin_is_locked(&vn_file_lock)); + + list_for_each_entry(fp, &vn_file_list, f_list) { + if (fd == fp->f_fd) { + ASSERT(atomic_read(&fp->f_ref) != 0); + return fp; + } + } + + return NULL; +} /* file_find() */ + +file_t * +vn_getf(int fd) +{ + struct kstat stat; + struct file *lfp; + file_t *fp; + vnode_t *vp; + int rc = 0; + ENTRY; + + /* Already open just take an extra reference */ + spin_lock(&vn_file_lock); + + fp = file_find(fd); + if (fp) { + atomic_inc(&fp->f_ref); + spin_unlock(&vn_file_lock); + RETURN(fp); + } + + spin_unlock(&vn_file_lock); + + /* File was not yet opened create the object and setup */ + fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP); + if (fp == NULL) + GOTO(out, rc); + + mutex_enter(&fp->f_lock); + + fp->f_fd = fd; + fp->f_offset = 0; + atomic_inc(&fp->f_ref); + + lfp = fget(fd); + if (lfp == NULL) + GOTO(out_mutex, rc); + + vp = vn_alloc(KM_SLEEP); + if (vp == NULL) + GOTO(out_fget, rc); + + if (vfs_getattr(lfp->f_vfsmnt, lfp->f_dentry, &stat)) + GOTO(out_vnode, rc); + + mutex_enter(&vp->v_lock); + vp->v_type = vn_get_sol_type(stat.mode); + vp->v_file = lfp; + mutex_exit(&vp->v_lock); + + fp->f_vnode = vp; + fp->f_file = lfp; + + /* Put it on the tracking list */ + spin_lock(&vn_file_lock); + list_add(&fp->f_list, &vn_file_list); + spin_unlock(&vn_file_lock); + + mutex_exit(&fp->f_lock); + RETURN(fp); + +out_vnode: + vn_free(vp); +out_fget: + fput(lfp); +out_mutex: + mutex_exit(&fp->f_lock); + kmem_cache_free(vn_file_cache, fp); +out: + RETURN(NULL); +} /* getf() */ +EXPORT_SYMBOL(getf); + +static void releasef_locked(file_t *fp) +{ + ASSERT(fp->f_file); + ASSERT(fp->f_vnode); + + /* Unlinked from list, no refs, safe to free outside mutex */ + fput(fp->f_file); + vn_free(fp->f_vnode); + + kmem_cache_free(vn_file_cache, fp); +} + +void +vn_releasef(int fd) +{ + file_t *fp; + ENTRY; + + spin_lock(&vn_file_lock); + fp = file_find(fd); + if (fp) { + atomic_dec(&fp->f_ref); + if (atomic_read(&fp->f_ref) > 0) { + spin_unlock(&vn_file_lock); + EXIT; + return; + } + + list_del(&fp->f_list); + releasef_locked(fp); + } + spin_unlock(&vn_file_lock); + + EXIT; + return; +} /* releasef() */ +EXPORT_SYMBOL(releasef); + +static int +vn_cache_constructor(void *buf, void *cdrarg, int kmflags) +{ + struct vnode *vp = buf; + + mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL); + + return (0); +} /* vn_cache_constructor() */ + +static void +vn_cache_destructor(void *buf, void *cdrarg) +{ + struct vnode *vp = buf; + + mutex_destroy(&vp->v_lock); +} /* vn_cache_destructor() */ + +static int +vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags) +{ + file_t *fp = buf; + + atomic_set(&fp->f_ref, 0); + mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL); + INIT_LIST_HEAD(&fp->f_list); + + return (0); +} /* file_cache_constructor() */ + +static void +vn_file_cache_destructor(void *buf, void *cdrarg) +{ + file_t *fp = buf; + + mutex_destroy(&fp->f_lock); +} /* vn_file_cache_destructor() */ + +int +vn_init(void) +{ + ENTRY; + vn_cache = kmem_cache_create("spl_vn_cache", + sizeof(struct vnode), 64, + vn_cache_constructor, + vn_cache_destructor, + NULL, NULL, NULL, 0); + + vn_file_cache = kmem_cache_create("spl_vn_file_cache", + sizeof(file_t), 64, + vn_file_cache_constructor, + vn_file_cache_destructor, + NULL, NULL, NULL, 0); + RETURN(0); +} /* vn_init() */ + +void +vn_fini(void) +{ + file_t *fp, *next_fp; + int leaked = 0; + ENTRY; + + spin_lock(&vn_file_lock); + + list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) { + list_del(&fp->f_list); + releasef_locked(fp); + leaked++; + } + + kmem_cache_destroy(vn_file_cache); + vn_file_cache = NULL; + spin_unlock(&vn_file_lock); + + if (leaked > 0) + CWARN("Warning %d files leaked\n", leaked); + + kmem_cache_destroy(vn_cache); + + EXIT; + return; +} /* vn_fini() */ diff --git a/module/splat/Makefile.in b/module/splat/Makefile.in new file mode 100644 index 000000000..33b2865c1 --- /dev/null +++ b/module/splat/Makefile.in @@ -0,0 +1,47 @@ +# Makefile.in for splat kernel module + +MODULES := splat +DISTFILES = Makefile.in *.c *.h +EXTRA_CFLAGS = @KERNELCPPFLAGS@ + +# Solaris Porting LAyer Tests +obj-m := splat.o + +splat-objs += splat-ctl.o +splat-objs += splat-kmem.o +splat-objs += splat-taskq.o +splat-objs += splat-random.o +splat-objs += splat-mutex.o +splat-objs += splat-condvar.o +splat-objs += splat-thread.o +splat-objs += splat-rwlock.o +splat-objs += splat-time.o +splat-objs += splat-vnode.o +splat-objs += splat-kobj.o +splat-objs += splat-atomic.o +splat-objs += splat-list.o +splat-objs += splat-generic.o + +splatmodule := splat.ko +splatmoduledir := @kmoduledir@/kernel/lib/ + +install: + mkdir -p $(DESTDIR)$(splatmoduledir) + $(INSTALL) -m 644 $(splatmodule) $(DESTDIR)$(splatmoduledir)/$(splatmodule) + -/sbin/depmod -a + +uninstall: + rm -f $(DESTDIR)$(splatmoduledir)/$(splatmodule) + -/sbin/depmod -a + +clean: + -rm -f $(splmodule) *.o .*.cmd *.mod.c *.ko *.s */*.o + +distclean: clean + rm -f Makefile + rm -rf .tmp_versions + +maintainer-clean: distclean + +distdir: $(DISTFILES) + cp -p $(DISTFILES) $(distdir) diff --git a/module/splat/splat-atomic.c b/module/splat/splat-atomic.c new file mode 100644 index 000000000..cc947d095 --- /dev/null +++ b/module/splat/splat-atomic.c @@ -0,0 +1,226 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_ATOMIC 0x0b00 +#define SPLAT_ATOMIC_NAME "atomic" +#define SPLAT_ATOMIC_DESC "Kernel Atomic Tests" + +#define SPLAT_ATOMIC_TEST1_ID 0x0b01 +#define SPLAT_ATOMIC_TEST1_NAME "64-bit" +#define SPLAT_ATOMIC_TEST1_DESC "Validate 64-bit atomic ops" + +#define SPLAT_ATOMIC_TEST_MAGIC 0x43435454UL +#define SPLAT_ATOMIC_INIT_VALUE 10000000UL + +typedef enum { + SPLAT_ATOMIC_INC_64 = 0, + SPLAT_ATOMIC_DEC_64 = 1, + SPLAT_ATOMIC_ADD_64 = 2, + SPLAT_ATOMIC_SUB_64 = 3, + SPLAT_ATOMIC_ADD_64_NV = 4, + SPLAT_ATOMIC_SUB_64_NV = 5, + SPLAT_ATOMIC_COUNT_64 = 6 +} atomic_op_t; + +typedef struct atomic_priv { + unsigned long ap_magic; + struct file *ap_file; + spinlock_t ap_lock; + wait_queue_head_t ap_waitq; + volatile uint64_t ap_atomic; + volatile uint64_t ap_atomic_exited; + atomic_op_t ap_op; + +} atomic_priv_t; + +static void +splat_atomic_work(void *priv) +{ + atomic_priv_t *ap; + atomic_op_t op; + int i; + + ap = (atomic_priv_t *)priv; + ASSERT(ap->ap_magic == SPLAT_ATOMIC_TEST_MAGIC); + + spin_lock(&ap->ap_lock); + op = ap->ap_op; + wake_up(&ap->ap_waitq); + spin_unlock(&ap->ap_lock); + + splat_vprint(ap->ap_file, SPLAT_ATOMIC_TEST1_NAME, + "Thread %d successfully started: %lu/%lu\n", op, + (long unsigned)ap->ap_atomic, + (long unsigned)ap->ap_atomic_exited); + + for (i = 0; i < SPLAT_ATOMIC_INIT_VALUE / 10; i++) { + + /* Periodically sleep to mix up the ordering */ + if ((i % (SPLAT_ATOMIC_INIT_VALUE / 100)) == 0) { + splat_vprint(ap->ap_file, SPLAT_ATOMIC_TEST1_NAME, + "Thread %d sleeping: %lu/%lu\n", op, + (long unsigned)ap->ap_atomic, + (long unsigned)ap->ap_atomic_exited); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 100); + } + + switch (op) { + case SPLAT_ATOMIC_INC_64: + atomic_inc_64(&ap->ap_atomic); + break; + case SPLAT_ATOMIC_DEC_64: + atomic_dec_64(&ap->ap_atomic); + break; + case SPLAT_ATOMIC_ADD_64: + atomic_add_64(&ap->ap_atomic, 3); + break; + case SPLAT_ATOMIC_SUB_64: + atomic_sub_64(&ap->ap_atomic, 3); + break; + case SPLAT_ATOMIC_ADD_64_NV: + atomic_add_64_nv(&ap->ap_atomic, 5); + break; + case SPLAT_ATOMIC_SUB_64_NV: + atomic_sub_64_nv(&ap->ap_atomic, 5); + break; + default: + SBUG(); + } + } + + atomic_inc_64(&ap->ap_atomic_exited); + + splat_vprint(ap->ap_file, SPLAT_ATOMIC_TEST1_NAME, + "Thread %d successfully exited: %lu/%lu\n", op, + (long unsigned)ap->ap_atomic, + (long unsigned)ap->ap_atomic_exited); + + wake_up(&ap->ap_waitq); + thread_exit(); +} + +static int +splat_atomic_test1_cond(atomic_priv_t *ap, int started) +{ + return (ap->ap_atomic_exited == started); +} + +static int +splat_atomic_test1(struct file *file, void *arg) +{ + atomic_priv_t ap; + DEFINE_WAIT(wait); + kthread_t *thr; + int i, rc = 0; + + ap.ap_magic = SPLAT_ATOMIC_TEST_MAGIC; + ap.ap_file = file; + spin_lock_init(&ap.ap_lock); + init_waitqueue_head(&ap.ap_waitq); + ap.ap_atomic = SPLAT_ATOMIC_INIT_VALUE; + ap.ap_atomic_exited = 0; + + for (i = 0; i < SPLAT_ATOMIC_COUNT_64; i++) { + spin_lock(&ap.ap_lock); + ap.ap_op = i; + + thr = (kthread_t *)thread_create(NULL, 0, splat_atomic_work, + &ap, 0, &p0, TS_RUN, + minclsyspri); + if (thr == NULL) { + rc = -ESRCH; + spin_unlock(&ap.ap_lock); + break; + } + + /* Prepare to wait, the new thread will wake us once it + * has made a copy of the unique private passed data */ + prepare_to_wait(&ap.ap_waitq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&ap.ap_lock); + schedule(); + } + + wait_event_interruptible(ap.ap_waitq, splat_atomic_test1_cond(&ap, i)); + + if (rc) { + splat_vprint(file, SPLAT_ATOMIC_TEST1_NAME, "Only started " + "%d/%d test threads\n", i, SPLAT_ATOMIC_COUNT_64); + return rc; + } + + if (ap.ap_atomic != SPLAT_ATOMIC_INIT_VALUE) { + splat_vprint(file, SPLAT_ATOMIC_TEST1_NAME, + "Final value %lu does not match initial value %lu\n", + (long unsigned)ap.ap_atomic, SPLAT_ATOMIC_INIT_VALUE); + return -EINVAL; + } + + splat_vprint(file, SPLAT_ATOMIC_TEST1_NAME, + "Success initial and final values match, %lu == %lu\n", + (long unsigned)ap.ap_atomic, SPLAT_ATOMIC_INIT_VALUE); + + return 0; +} + +splat_subsystem_t * +splat_atomic_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_ATOMIC_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_ATOMIC_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_ATOMIC; + + SPLAT_TEST_INIT(sub, SPLAT_ATOMIC_TEST1_NAME, SPLAT_ATOMIC_TEST1_DESC, + SPLAT_ATOMIC_TEST1_ID, splat_atomic_test1); + + return sub; +} + +void +splat_atomic_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_ATOMIC_TEST1_ID); + + kfree(sub); +} + +int +splat_atomic_id(void) { + return SPLAT_SUBSYSTEM_ATOMIC; +} diff --git a/module/splat/splat-condvar.c b/module/splat/splat-condvar.c new file mode 100644 index 000000000..276798818 --- /dev/null +++ b/module/splat/splat-condvar.c @@ -0,0 +1,479 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_CONDVAR 0x0500 +#define SPLAT_CONDVAR_NAME "condvar" +#define SPLAT_CONDVAR_DESC "Kernel Condition Variable Tests" + +#define SPLAT_CONDVAR_TEST1_ID 0x0501 +#define SPLAT_CONDVAR_TEST1_NAME "signal1" +#define SPLAT_CONDVAR_TEST1_DESC "Wake a single thread, cv_wait()/cv_signal()" + +#define SPLAT_CONDVAR_TEST2_ID 0x0502 +#define SPLAT_CONDVAR_TEST2_NAME "broadcast1" +#define SPLAT_CONDVAR_TEST2_DESC "Wake all threads, cv_wait()/cv_broadcast()" + +#define SPLAT_CONDVAR_TEST3_ID 0x0503 +#define SPLAT_CONDVAR_TEST3_NAME "signal2" +#define SPLAT_CONDVAR_TEST3_DESC "Wake a single thread, cv_wait_timeout()/cv_signal()" + +#define SPLAT_CONDVAR_TEST4_ID 0x0504 +#define SPLAT_CONDVAR_TEST4_NAME "broadcast2" +#define SPLAT_CONDVAR_TEST4_DESC "Wake all threads, cv_wait_timeout()/cv_broadcast()" + +#define SPLAT_CONDVAR_TEST5_ID 0x0505 +#define SPLAT_CONDVAR_TEST5_NAME "timeout" +#define SPLAT_CONDVAR_TEST5_DESC "Timeout thread, cv_wait_timeout()" + +#define SPLAT_CONDVAR_TEST_MAGIC 0x115599DDUL +#define SPLAT_CONDVAR_TEST_NAME "condvar_test" +#define SPLAT_CONDVAR_TEST_COUNT 8 + +typedef struct condvar_priv { + unsigned long cv_magic; + struct file *cv_file; + kcondvar_t cv_condvar; + kmutex_t cv_mtx; +} condvar_priv_t; + +typedef struct condvar_thr { + int ct_id; + const char *ct_name; + condvar_priv_t *ct_cvp; + int ct_rc; +} condvar_thr_t; + +int +splat_condvar_test12_thread(void *arg) +{ + condvar_thr_t *ct = (condvar_thr_t *)arg; + condvar_priv_t *cv = ct->ct_cvp; + char name[16]; + + ASSERT(cv->cv_magic == SPLAT_CONDVAR_TEST_MAGIC); + snprintf(name, sizeof(name),"%s%d",SPLAT_CONDVAR_TEST_NAME,ct->ct_id); + daemonize(name); + + mutex_enter(&cv->cv_mtx); + splat_vprint(cv->cv_file, ct->ct_name, + "%s thread sleeping with %d waiters\n", + name, atomic_read(&cv->cv_condvar.cv_waiters)); + cv_wait(&cv->cv_condvar, &cv->cv_mtx); + splat_vprint(cv->cv_file, ct->ct_name, + "%s thread woken %d waiters remain\n", + name, atomic_read(&cv->cv_condvar.cv_waiters)); + mutex_exit(&cv->cv_mtx); + + return 0; +} + +static int +splat_condvar_test1(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_CONDVAR_TEST_COUNT]; + condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT]; + condvar_priv_t cv; + + cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC; + cv.cv_file = file; + mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); + cv_init(&cv.cv_condvar, SPLAT_CONDVAR_TEST_NAME, CV_DEFAULT, NULL); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + ct[i].ct_cvp = &cv; + ct[i].ct_id = i; + ct[i].ct_name = SPLAT_CONDVAR_TEST1_NAME; + ct[i].ct_rc = 0; + + pids[i] = kernel_thread(splat_condvar_test12_thread, &ct[i], 0); + if (pids[i] >= 0) + count++; + } + + /* Wait until all threads are waiting on the condition variable */ + while (atomic_read(&cv.cv_condvar.cv_waiters) != count) + schedule(); + + /* Wake a single thread at a time, wait until it exits */ + for (i = 1; i <= count; i++) { + cv_signal(&cv.cv_condvar); + + while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i)) + schedule(); + + /* Correct behavior 1 thread woken */ + if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i)) + continue; + + splat_vprint(file, SPLAT_CONDVAR_TEST1_NAME, "Attempted to " + "wake %d thread but work %d threads woke\n", + 1, count - atomic_read(&cv.cv_condvar.cv_waiters)); + rc = -EINVAL; + break; + } + + if (!rc) + splat_vprint(file, SPLAT_CONDVAR_TEST1_NAME, "Correctly woke " + "%d sleeping threads %d at a time\n", count, 1); + + /* Wait until that last nutex is dropped */ + while (mutex_owner(&cv.cv_mtx)) + schedule(); + + /* Wake everything for the failure case */ + cv_broadcast(&cv.cv_condvar); + cv_destroy(&cv.cv_condvar); + mutex_destroy(&cv.cv_mtx); + + return rc; +} + +static int +splat_condvar_test2(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_CONDVAR_TEST_COUNT]; + condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT]; + condvar_priv_t cv; + + cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC; + cv.cv_file = file; + mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); + cv_init(&cv.cv_condvar, SPLAT_CONDVAR_TEST_NAME, CV_DEFAULT, NULL); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + ct[i].ct_cvp = &cv; + ct[i].ct_id = i; + ct[i].ct_name = SPLAT_CONDVAR_TEST2_NAME; + ct[i].ct_rc = 0; + + pids[i] = kernel_thread(splat_condvar_test12_thread, &ct[i], 0); + if (pids[i] > 0) + count++; + } + + /* Wait until all threads are waiting on the condition variable */ + while (atomic_read(&cv.cv_condvar.cv_waiters) != count) + schedule(); + + /* Wake all threads waiting on the condition variable */ + cv_broadcast(&cv.cv_condvar); + + /* Wait until all threads have exited */ + while ((atomic_read(&cv.cv_condvar.cv_waiters) > 0) || mutex_owner(&cv.cv_mtx)) + schedule(); + + splat_vprint(file, SPLAT_CONDVAR_TEST2_NAME, "Correctly woke all " + "%d sleeping threads at once\n", count); + + /* Wake everything for the failure case */ + cv_destroy(&cv.cv_condvar); + mutex_destroy(&cv.cv_mtx); + + return rc; +} + +int +splat_condvar_test34_thread(void *arg) +{ + condvar_thr_t *ct = (condvar_thr_t *)arg; + condvar_priv_t *cv = ct->ct_cvp; + char name[16]; + clock_t rc; + + ASSERT(cv->cv_magic == SPLAT_CONDVAR_TEST_MAGIC); + snprintf(name, sizeof(name), "%s%d", SPLAT_CONDVAR_TEST_NAME, ct->ct_id); + daemonize(name); + + mutex_enter(&cv->cv_mtx); + splat_vprint(cv->cv_file, ct->ct_name, + "%s thread sleeping with %d waiters\n", + name, atomic_read(&cv->cv_condvar.cv_waiters)); + + /* Sleep no longer than 3 seconds, for this test we should + * actually never sleep that long without being woken up. */ + rc = cv_timedwait(&cv->cv_condvar, &cv->cv_mtx, lbolt + HZ * 3); + if (rc == -1) { + ct->ct_rc = -ETIMEDOUT; + splat_vprint(cv->cv_file, ct->ct_name, "%s thread timed out, " + "should have been woken\n", name); + } else { + splat_vprint(cv->cv_file, ct->ct_name, + "%s thread woken %d waiters remain\n", + name, atomic_read(&cv->cv_condvar.cv_waiters)); + } + + mutex_exit(&cv->cv_mtx); + + return 0; +} + +static int +splat_condvar_test3(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_CONDVAR_TEST_COUNT]; + condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT]; + condvar_priv_t cv; + + cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC; + cv.cv_file = file; + mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); + cv_init(&cv.cv_condvar, SPLAT_CONDVAR_TEST_NAME, CV_DEFAULT, NULL); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + ct[i].ct_cvp = &cv; + ct[i].ct_id = i; + ct[i].ct_name = SPLAT_CONDVAR_TEST3_NAME; + ct[i].ct_rc = 0; + + pids[i] = kernel_thread(splat_condvar_test34_thread, &ct[i], 0); + if (pids[i] >= 0) + count++; + } + + /* Wait until all threads are waiting on the condition variable */ + while (atomic_read(&cv.cv_condvar.cv_waiters) != count) + schedule(); + + /* Wake a single thread at a time, wait until it exits */ + for (i = 1; i <= count; i++) { + cv_signal(&cv.cv_condvar); + + while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i)) + schedule(); + + /* Correct behavior 1 thread woken */ + if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i)) + continue; + + splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Attempted to " + "wake %d thread but work %d threads woke\n", + 1, count - atomic_read(&cv.cv_condvar.cv_waiters)); + rc = -EINVAL; + break; + } + + /* Validate no waiting thread timed out early */ + for (i = 0; i < count; i++) + if (ct[i].ct_rc) + rc = ct[i].ct_rc; + + if (!rc) + splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Correctly woke " + "%d sleeping threads %d at a time\n", count, 1); + + /* Wait until that last nutex is dropped */ + while (mutex_owner(&cv.cv_mtx)) + schedule(); + + /* Wake everything for the failure case */ + cv_broadcast(&cv.cv_condvar); + cv_destroy(&cv.cv_condvar); + mutex_destroy(&cv.cv_mtx); + + return rc; +} + +static int +splat_condvar_test4(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_CONDVAR_TEST_COUNT]; + condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT]; + condvar_priv_t cv; + + cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC; + cv.cv_file = file; + mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); + cv_init(&cv.cv_condvar, SPLAT_CONDVAR_TEST_NAME, CV_DEFAULT, NULL); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + ct[i].ct_cvp = &cv; + ct[i].ct_id = i; + ct[i].ct_name = SPLAT_CONDVAR_TEST3_NAME; + ct[i].ct_rc = 0; + + pids[i] = kernel_thread(splat_condvar_test34_thread, &ct[i], 0); + if (pids[i] >= 0) + count++; + } + + /* Wait until all threads are waiting on the condition variable */ + while (atomic_read(&cv.cv_condvar.cv_waiters) != count) + schedule(); + + /* Wake a single thread at a time, wait until it exits */ + for (i = 1; i <= count; i++) { + cv_signal(&cv.cv_condvar); + + while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i)) + schedule(); + + /* Correct behavior 1 thread woken */ + if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i)) + continue; + + splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Attempted to " + "wake %d thread but work %d threads woke\n", + 1, count - atomic_read(&cv.cv_condvar.cv_waiters)); + rc = -EINVAL; + break; + } + + /* Validate no waiting thread timed out early */ + for (i = 0; i < count; i++) + if (ct[i].ct_rc) + rc = ct[i].ct_rc; + + if (!rc) + splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Correctly woke " + "%d sleeping threads %d at a time\n", count, 1); + + /* Wait until that last nutex is dropped */ + while (mutex_owner(&cv.cv_mtx)) + schedule(); + + /* Wake everything for the failure case */ + cv_broadcast(&cv.cv_condvar); + cv_destroy(&cv.cv_condvar); + mutex_destroy(&cv.cv_mtx); + + return rc; +} + +static int +splat_condvar_test5(struct file *file, void *arg) +{ + kcondvar_t condvar; + kmutex_t mtx; + clock_t time_left, time_before, time_after, time_delta; + int64_t whole_delta; + int32_t remain_delta; + int rc = 0; + + mutex_init(&mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL); + cv_init(&condvar, SPLAT_CONDVAR_TEST_NAME, CV_DEFAULT, NULL); + + splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME, "Thread going to sleep for " + "%d second and expecting to be woken by timeout\n", 1); + + /* Allow a 1 second timeout, plenty long to validate correctness. */ + time_before = lbolt; + mutex_enter(&mtx); + time_left = cv_timedwait(&condvar, &mtx, lbolt + HZ); + mutex_exit(&mtx); + time_after = lbolt; + time_delta = time_after - time_before; /* XXX - Handle jiffie wrap */ + whole_delta = time_delta; + remain_delta = do_div(whole_delta, HZ); + + if (time_left == -1) { + if (time_delta >= HZ) { + splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME, + "Thread correctly timed out and was asleep " + "for %d.%d seconds (%d second min)\n", + (int)whole_delta, remain_delta, 1); + } else { + splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME, + "Thread correctly timed out but was only " + "asleep for %d.%d seconds (%d second " + "min)\n", (int)whole_delta, remain_delta, 1); + rc = -ETIMEDOUT; + } + } else { + splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME, + "Thread exited after only %d.%d seconds, it " + "did not hit the %d second timeout\n", + (int)whole_delta, remain_delta, 1); + rc = -ETIMEDOUT; + } + + cv_destroy(&condvar); + mutex_destroy(&mtx); + + return rc; +} + +splat_subsystem_t * +splat_condvar_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_CONDVAR_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_CONDVAR_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_CONDVAR; + + SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST1_NAME, SPLAT_CONDVAR_TEST1_DESC, + SPLAT_CONDVAR_TEST1_ID, splat_condvar_test1); + SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST2_NAME, SPLAT_CONDVAR_TEST2_DESC, + SPLAT_CONDVAR_TEST2_ID, splat_condvar_test2); + SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST3_NAME, SPLAT_CONDVAR_TEST3_DESC, + SPLAT_CONDVAR_TEST3_ID, splat_condvar_test3); + SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST4_NAME, SPLAT_CONDVAR_TEST4_DESC, + SPLAT_CONDVAR_TEST4_ID, splat_condvar_test4); + SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST5_NAME, SPLAT_CONDVAR_TEST5_DESC, + SPLAT_CONDVAR_TEST5_ID, splat_condvar_test5); + + return sub; +} + +void +splat_condvar_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST5_ID); + SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST1_ID); + + kfree(sub); +} + +int +splat_condvar_id(void) { + return SPLAT_SUBSYSTEM_CONDVAR; +} diff --git a/module/splat/splat-ctl.c b/module/splat/splat-ctl.c new file mode 100644 index 000000000..d7aed023c --- /dev/null +++ b/module/splat/splat-ctl.c @@ -0,0 +1,682 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * My intent is to create a loadable 'splat' (Solaris Porting LAyer + * Tests) module which can be used as an access point to run + * in kernel Solaris ABI regression tests. This provides a + * nice mechanism to validate the shim primates are working properly. + * + * The basic design is the splat module is that it is constructed of + * various splat_* source files each of which contains regression tests. + * For example the splat_linux_kmem.c file contains tests for validating + * kmem correctness. When the splat module is loaded splat_*_init() + * will be called for each subsystems tests, similarly splat_*_fini() is + * called when the splat module is removed. Each test can then be + * run by making an ioctl() call from a userspace control application + * to pick the subsystem and test which should be run. + */ + +#include "splat-internal.h" + +static spl_class *splat_class; +static spl_device *splat_device; +static struct list_head splat_module_list; +static spinlock_t splat_module_lock; + +static int +splat_open(struct inode *inode, struct file *file) +{ + unsigned int minor = iminor(inode); + splat_info_t *info; + + if (minor >= SPLAT_MINORS) + return -ENXIO; + + info = (splat_info_t *)kmalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return -ENOMEM; + + spin_lock_init(&info->info_lock); + info->info_size = SPLAT_INFO_BUFFER_SIZE; + info->info_buffer = (char *)vmalloc(SPLAT_INFO_BUFFER_SIZE); + if (info->info_buffer == NULL) { + kfree(info); + return -ENOMEM; + } + + info->info_head = info->info_buffer; + file->private_data = (void *)info; + + return 0; +} + +static int +splat_release(struct inode *inode, struct file *file) +{ + unsigned int minor = iminor(inode); + splat_info_t *info = (splat_info_t *)file->private_data; + + if (minor >= SPLAT_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + vfree(info->info_buffer); + kfree(info); + + return 0; +} + +static int +splat_buffer_clear(struct file *file, splat_cfg_t *kcfg, unsigned long arg) +{ + splat_info_t *info = (splat_info_t *)file->private_data; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + memset(info->info_buffer, 0, info->info_size); + info->info_head = info->info_buffer; + spin_unlock(&info->info_lock); + + return 0; +} + +static int +splat_buffer_size(struct file *file, splat_cfg_t *kcfg, unsigned long arg) +{ + splat_info_t *info = (splat_info_t *)file->private_data; + char *buf; + int min, size, rc = 0; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + if (kcfg->cfg_arg1 > 0) { + + size = kcfg->cfg_arg1; + buf = (char *)vmalloc(size); + if (buf == NULL) { + rc = -ENOMEM; + goto out; + } + + /* Zero fill and truncate contents when coping buffer */ + min = ((size < info->info_size) ? size : info->info_size); + memset(buf, 0, size); + memcpy(buf, info->info_buffer, min); + vfree(info->info_buffer); + info->info_size = size; + info->info_buffer = buf; + info->info_head = info->info_buffer; + } + + kcfg->cfg_rc1 = info->info_size; + + if (copy_to_user((struct splat_cfg_t __user *)arg, kcfg, sizeof(*kcfg))) + rc = -EFAULT; +out: + spin_unlock(&info->info_lock); + + return rc; +} + + +static splat_subsystem_t * +splat_subsystem_find(int id) { + splat_subsystem_t *sub; + + spin_lock(&splat_module_lock); + list_for_each_entry(sub, &splat_module_list, subsystem_list) { + if (id == sub->desc.id) { + spin_unlock(&splat_module_lock); + return sub; + } + } + spin_unlock(&splat_module_lock); + + return NULL; +} + +static int +splat_subsystem_count(splat_cfg_t *kcfg, unsigned long arg) +{ + splat_subsystem_t *sub; + int i = 0; + + spin_lock(&splat_module_lock); + list_for_each_entry(sub, &splat_module_list, subsystem_list) + i++; + + spin_unlock(&splat_module_lock); + kcfg->cfg_rc1 = i; + + if (copy_to_user((struct splat_cfg_t __user *)arg, kcfg, sizeof(*kcfg))) + return -EFAULT; + + return 0; +} + +static int +splat_subsystem_list(splat_cfg_t *kcfg, unsigned long arg) +{ + splat_subsystem_t *sub; + splat_cfg_t *tmp; + int size, i = 0; + + /* Structure will be sized large enough for N subsystem entries + * which is passed in by the caller. On exit the number of + * entries filled in with valid subsystems will be stored in + * cfg_rc1. If the caller does not provide enough entries + * for all subsystems we will truncate the list to avoid overrun. + */ + size = sizeof(*tmp) + kcfg->cfg_data.splat_subsystems.size * + sizeof(splat_user_t); + tmp = kmalloc(size, GFP_KERNEL); + if (tmp == NULL) + return -ENOMEM; + + /* Local 'tmp' is used as the structure copied back to user space */ + memset(tmp, 0, size); + memcpy(tmp, kcfg, sizeof(*kcfg)); + + spin_lock(&splat_module_lock); + list_for_each_entry(sub, &splat_module_list, subsystem_list) { + strncpy(tmp->cfg_data.splat_subsystems.descs[i].name, + sub->desc.name, SPLAT_NAME_SIZE); + strncpy(tmp->cfg_data.splat_subsystems.descs[i].desc, + sub->desc.desc, SPLAT_DESC_SIZE); + tmp->cfg_data.splat_subsystems.descs[i].id = sub->desc.id; + + /* Truncate list if we are about to overrun alloc'ed memory */ + if ((i++) == kcfg->cfg_data.splat_subsystems.size) + break; + } + spin_unlock(&splat_module_lock); + tmp->cfg_rc1 = i; + + if (copy_to_user((struct splat_cfg_t __user *)arg, tmp, size)) { + kfree(tmp); + return -EFAULT; + } + + kfree(tmp); + return 0; +} + +static int +splat_test_count(splat_cfg_t *kcfg, unsigned long arg) +{ + splat_subsystem_t *sub; + splat_test_t *test; + int i = 0; + + /* Subsystem ID passed as arg1 */ + sub = splat_subsystem_find(kcfg->cfg_arg1); + if (sub == NULL) + return -EINVAL; + + spin_lock(&(sub->test_lock)); + list_for_each_entry(test, &(sub->test_list), test_list) + i++; + + spin_unlock(&(sub->test_lock)); + kcfg->cfg_rc1 = i; + + if (copy_to_user((struct splat_cfg_t __user *)arg, kcfg, sizeof(*kcfg))) + return -EFAULT; + + return 0; +} + +static int +splat_test_list(splat_cfg_t *kcfg, unsigned long arg) +{ + splat_subsystem_t *sub; + splat_test_t *test; + splat_cfg_t *tmp; + int size, i = 0; + + /* Subsystem ID passed as arg1 */ + sub = splat_subsystem_find(kcfg->cfg_arg1); + if (sub == NULL) + return -EINVAL; + + /* Structure will be sized large enough for N test entries + * which is passed in by the caller. On exit the number of + * entries filled in with valid tests will be stored in + * cfg_rc1. If the caller does not provide enough entries + * for all tests we will truncate the list to avoid overrun. + */ + size = sizeof(*tmp)+kcfg->cfg_data.splat_tests.size*sizeof(splat_user_t); + tmp = kmalloc(size, GFP_KERNEL); + if (tmp == NULL) + return -ENOMEM; + + /* Local 'tmp' is used as the structure copied back to user space */ + memset(tmp, 0, size); + memcpy(tmp, kcfg, sizeof(*kcfg)); + + spin_lock(&(sub->test_lock)); + list_for_each_entry(test, &(sub->test_list), test_list) { + strncpy(tmp->cfg_data.splat_tests.descs[i].name, + test->desc.name, SPLAT_NAME_SIZE); + strncpy(tmp->cfg_data.splat_tests.descs[i].desc, + test->desc.desc, SPLAT_DESC_SIZE); + tmp->cfg_data.splat_tests.descs[i].id = test->desc.id; + + /* Truncate list if we are about to overrun alloc'ed memory */ + if ((i++) == kcfg->cfg_data.splat_tests.size) + break; + } + spin_unlock(&(sub->test_lock)); + tmp->cfg_rc1 = i; + + if (copy_to_user((struct splat_cfg_t __user *)arg, tmp, size)) { + kfree(tmp); + return -EFAULT; + } + + kfree(tmp); + return 0; +} + +static int +splat_validate(struct file *file, splat_subsystem_t *sub, int cmd, void *arg) +{ + splat_test_t *test; + + spin_lock(&(sub->test_lock)); + list_for_each_entry(test, &(sub->test_list), test_list) { + if (test->desc.id == cmd) { + spin_unlock(&(sub->test_lock)); + return test->test(file, arg); + } + } + spin_unlock(&(sub->test_lock)); + + return -EINVAL; +} + +static int +splat_ioctl_cfg(struct file *file, unsigned long arg) +{ + splat_cfg_t kcfg; + int rc = 0; + + if (copy_from_user(&kcfg, (splat_cfg_t *)arg, sizeof(kcfg))) + return -EFAULT; + + if (kcfg.cfg_magic != SPLAT_CFG_MAGIC) { + splat_print(file, "Bad config magic 0x%x != 0x%x\n", + kcfg.cfg_magic, SPLAT_CFG_MAGIC); + return -EINVAL; + } + + switch (kcfg.cfg_cmd) { + case SPLAT_CFG_BUFFER_CLEAR: + /* cfg_arg1 - Unused + * cfg_rc1 - Unused + */ + rc = splat_buffer_clear(file, &kcfg, arg); + break; + case SPLAT_CFG_BUFFER_SIZE: + /* cfg_arg1 - 0 - query size; >0 resize + * cfg_rc1 - Set to current buffer size + */ + rc = splat_buffer_size(file, &kcfg, arg); + break; + case SPLAT_CFG_SUBSYSTEM_COUNT: + /* cfg_arg1 - Unused + * cfg_rc1 - Set to number of subsystems + */ + rc = splat_subsystem_count(&kcfg, arg); + break; + case SPLAT_CFG_SUBSYSTEM_LIST: + /* cfg_arg1 - Unused + * cfg_rc1 - Set to number of subsystems + * cfg_data.splat_subsystems - Populated with subsystems + */ + rc = splat_subsystem_list(&kcfg, arg); + break; + case SPLAT_CFG_TEST_COUNT: + /* cfg_arg1 - Set to a target subsystem + * cfg_rc1 - Set to number of tests + */ + rc = splat_test_count(&kcfg, arg); + break; + case SPLAT_CFG_TEST_LIST: + /* cfg_arg1 - Set to a target subsystem + * cfg_rc1 - Set to number of tests + * cfg_data.splat_subsystems - Populated with tests + */ + rc = splat_test_list(&kcfg, arg); + break; + default: + splat_print(file, "Bad config command %d\n", kcfg.cfg_cmd); + rc = -EINVAL; + break; + } + + return rc; +} + +static int +splat_ioctl_cmd(struct file *file, unsigned long arg) +{ + splat_subsystem_t *sub; + splat_cmd_t kcmd; + int rc = -EINVAL; + void *data = NULL; + + if (copy_from_user(&kcmd, (splat_cfg_t *)arg, sizeof(kcmd))) + return -EFAULT; + + if (kcmd.cmd_magic != SPLAT_CMD_MAGIC) { + splat_print(file, "Bad command magic 0x%x != 0x%x\n", + kcmd.cmd_magic, SPLAT_CFG_MAGIC); + return -EINVAL; + } + + /* Allocate memory for any opaque data the caller needed to pass on */ + if (kcmd.cmd_data_size > 0) { + data = (void *)kmalloc(kcmd.cmd_data_size, GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + if (copy_from_user(data, (void *)(arg + offsetof(splat_cmd_t, + cmd_data_str)), kcmd.cmd_data_size)) { + kfree(data); + return -EFAULT; + } + } + + sub = splat_subsystem_find(kcmd.cmd_subsystem); + if (sub != NULL) + rc = splat_validate(file, sub, kcmd.cmd_test, data); + else + rc = -EINVAL; + + if (data != NULL) + kfree(data); + + return rc; +} + +static int +splat_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + int rc = 0; + + /* Ignore tty ioctls */ + if ((cmd & 0xffffff00) == ((int)'T') << 8) + return -ENOTTY; + + if (minor >= SPLAT_MINORS) + return -ENXIO; + + switch (cmd) { + case SPLAT_CFG: + rc = splat_ioctl_cfg(file, arg); + break; + case SPLAT_CMD: + rc = splat_ioctl_cmd(file, arg); + break; + default: + splat_print(file, "Bad ioctl command %d\n", cmd); + rc = -EINVAL; + break; + } + + return rc; +} + +/* I'm not sure why you would want to write in to this buffer from + * user space since its principle use is to pass test status info + * back to the user space, but I don't see any reason to prevent it. + */ +static ssize_t splat_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + splat_info_t *info = (splat_info_t *)file->private_data; + int rc = 0; + + if (minor >= SPLAT_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + /* Write beyond EOF */ + if (*ppos >= info->info_size) { + rc = -EFBIG; + goto out; + } + + /* Resize count if beyond EOF */ + if (*ppos + count > info->info_size) + count = info->info_size - *ppos; + + if (copy_from_user(info->info_buffer, buf, count)) { + rc = -EFAULT; + goto out; + } + + *ppos += count; + rc = count; +out: + spin_unlock(&info->info_lock); + return rc; +} + +static ssize_t splat_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + splat_info_t *info = (splat_info_t *)file->private_data; + int rc = 0; + + if (minor >= SPLAT_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + /* Read beyond EOF */ + if (*ppos >= info->info_size) + goto out; + + /* Resize count if beyond EOF */ + if (*ppos + count > info->info_size) + count = info->info_size - *ppos; + + if (copy_to_user(buf, info->info_buffer + *ppos, count)) { + rc = -EFAULT; + goto out; + } + + *ppos += count; + rc = count; +out: + spin_unlock(&info->info_lock); + return rc; +} + +static loff_t splat_seek(struct file *file, loff_t offset, int origin) +{ + unsigned int minor = iminor(file->f_dentry->d_inode); + splat_info_t *info = (splat_info_t *)file->private_data; + int rc = -EINVAL; + + if (minor >= SPLAT_MINORS) + return -ENXIO; + + ASSERT(info); + ASSERT(info->info_buffer); + + spin_lock(&info->info_lock); + + switch (origin) { + case 0: /* SEEK_SET - No-op just do it */ + break; + case 1: /* SEEK_CUR - Seek from current */ + offset = file->f_pos + offset; + break; + case 2: /* SEEK_END - Seek from end */ + offset = info->info_size + offset; + break; + } + + if (offset >= 0) { + file->f_pos = offset; + file->f_version = 0; + rc = offset; + } + + spin_unlock(&info->info_lock); + + return rc; +} + +static struct file_operations splat_fops = { + .owner = THIS_MODULE, + .open = splat_open, + .release = splat_release, + .ioctl = splat_ioctl, + .read = splat_read, + .write = splat_write, + .llseek = splat_seek, +}; + +static struct cdev splat_cdev = { + .owner = THIS_MODULE, + .kobj = { .name = SPLAT_NAME, }, +}; + +static int __init +splat_init(void) +{ + dev_t dev; + int rc; + + spin_lock_init(&splat_module_lock); + INIT_LIST_HEAD(&splat_module_list); + + SPLAT_SUBSYSTEM_INIT(kmem); + SPLAT_SUBSYSTEM_INIT(taskq); + SPLAT_SUBSYSTEM_INIT(krng); + SPLAT_SUBSYSTEM_INIT(mutex); + SPLAT_SUBSYSTEM_INIT(condvar); + SPLAT_SUBSYSTEM_INIT(thread); + SPLAT_SUBSYSTEM_INIT(rwlock); + SPLAT_SUBSYSTEM_INIT(time); + SPLAT_SUBSYSTEM_INIT(vnode); + SPLAT_SUBSYSTEM_INIT(kobj); + SPLAT_SUBSYSTEM_INIT(atomic); + SPLAT_SUBSYSTEM_INIT(list); + SPLAT_SUBSYSTEM_INIT(generic); + + dev = MKDEV(SPLAT_MAJOR, 0); + if ((rc = register_chrdev_region(dev, SPLAT_MINORS, SPLAT_NAME))) + goto error; + + /* Support for registering a character driver */ + cdev_init(&splat_cdev, &splat_fops); + if ((rc = cdev_add(&splat_cdev, dev, SPLAT_MINORS))) { + printk(KERN_ERR "SPLAT: Error adding cdev, %d\n", rc); + kobject_put(&splat_cdev.kobj); + unregister_chrdev_region(dev, SPLAT_MINORS); + goto error; + } + + /* Support for udev make driver info available in sysfs */ + splat_class = spl_class_create(THIS_MODULE, "splat"); + if (IS_ERR(splat_class)) { + rc = PTR_ERR(splat_class); + printk(KERN_ERR "SPLAT: Error creating splat class, %d\n", rc); + cdev_del(&splat_cdev); + unregister_chrdev_region(dev, SPLAT_MINORS); + goto error; + } + + splat_device = spl_device_create(splat_class, NULL, + MKDEV(SPLAT_MAJOR, 0), + NULL, SPLAT_NAME); + + printk(KERN_INFO "SPLAT: Loaded Solaris Porting LAyer " + "Tests v%s\n", VERSION); + return 0; +error: + printk(KERN_ERR "SPLAT: Error registering splat device, %d\n", rc); + return rc; +} + +static void +splat_fini(void) +{ + dev_t dev = MKDEV(SPLAT_MAJOR, 0); + + spl_device_destroy(splat_class, splat_device, dev); + spl_class_destroy(splat_class); + cdev_del(&splat_cdev); + unregister_chrdev_region(dev, SPLAT_MINORS); + + SPLAT_SUBSYSTEM_FINI(generic); + SPLAT_SUBSYSTEM_FINI(list); + SPLAT_SUBSYSTEM_FINI(atomic); + SPLAT_SUBSYSTEM_FINI(kobj); + SPLAT_SUBSYSTEM_FINI(vnode); + SPLAT_SUBSYSTEM_FINI(time); + SPLAT_SUBSYSTEM_FINI(rwlock); + SPLAT_SUBSYSTEM_FINI(thread); + SPLAT_SUBSYSTEM_FINI(condvar); + SPLAT_SUBSYSTEM_FINI(mutex); + SPLAT_SUBSYSTEM_FINI(krng); + SPLAT_SUBSYSTEM_FINI(taskq); + SPLAT_SUBSYSTEM_FINI(kmem); + + ASSERT(list_empty(&splat_module_list)); + printk(KERN_INFO "SPLAT: Unloaded Solaris Porting LAyer " + "Tests v%s\n", VERSION); +} + +module_init(splat_init); +module_exit(splat_fini); + +MODULE_AUTHOR("Lawrence Livermore National Labs"); +MODULE_DESCRIPTION("Solaris Porting LAyer Tests"); +MODULE_LICENSE("GPL"); diff --git a/module/splat/splat-generic.c b/module/splat/splat-generic.c new file mode 100644 index 000000000..6da7473e0 --- /dev/null +++ b/module/splat/splat-generic.c @@ -0,0 +1,233 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_GENERIC 0x0d00 +#define SPLAT_GENERIC_NAME "generic" +#define SPLAT_GENERIC_DESC "Kernel Generic Tests" + +#define SPLAT_GENERIC_TEST1_ID 0x0d01 +#define SPLAT_GENERIC_TEST1_NAME "ddi_strtoul" +#define SPLAT_GENERIC_TEST1_DESC "ddi_strtoul Test" + +#define SPLAT_GENERIC_TEST2_ID 0x0d02 +#define SPLAT_GENERIC_TEST2_NAME "ddi_strtol" +#define SPLAT_GENERIC_TEST2_DESC "ddi_strtol Test" + +#define SPLAT_GENERIC_TEST3_ID 0x0d03 +#define SPLAT_GENERIC_TEST3_NAME "ddi_strtoull" +#define SPLAT_GENERIC_TEST3_DESC "ddi_strtoull Test" + +#define SPLAT_GENERIC_TEST4_ID 0x0d04 +#define SPLAT_GENERIC_TEST4_NAME "ddi_strtoll" +#define SPLAT_GENERIC_TEST4_DESC "ddi_strtoll Test" + +#define STR_POS "123456789" +#define STR_NEG "-123456789" +#define STR_BASE "0xabcdef" +#define STR_RANGE_MAX "10000000000000000" +#define STR_RANGE_MIN "-10000000000000000" +#define STR_INVAL1 "12345U" +#define STR_INVAL2 "invald" + +#define VAL_POS 123456789 +#define VAL_NEG -123456789 +#define VAL_BASE 0xabcdef +#define VAL_INVAL1 12345U + +#define define_generic_msg_strtox(type, valtype) \ +static void \ +generic_msg_strto##type(struct file *file, char *msg, int rc, int *err, \ + const char *s, valtype d, char *endptr) \ +{ \ + splat_vprint(file, SPLAT_GENERIC_TEST1_NAME, \ + "%s (%d) %s: %s == %lld, 0x%p\n", \ + rc ? "Fail" : "Pass", *err, msg, s, \ + (unsigned long long)d, endptr); \ + *err = rc; \ +} + +define_generic_msg_strtox(ul, unsigned long); +define_generic_msg_strtox(l, long); +define_generic_msg_strtox(ull, unsigned long long); +define_generic_msg_strtox(ll, long long); + +#define define_splat_generic_test_strtox(type, valtype) \ +static int \ +splat_generic_test_strto##type(struct file *file, void *arg) \ +{ \ + int rc, rc1, rc2, rc3, rc4, rc5, rc6, rc7; \ + char str[20], *endptr; \ + valtype r; \ + \ + /* Positive value: expect success */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + rc1 = ddi_strto##type(STR_POS, &endptr, 10, &r); \ + if (rc1 == 0 && r == VAL_POS && endptr && *endptr == '\0') \ + rc = 0; \ + \ + generic_msg_strto##type(file, "positive", rc , &rc1, \ + STR_POS, r, endptr); \ + \ + /* Negative value: expect success */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + strcpy(str, STR_NEG); \ + rc2 = ddi_strto##type(str, &endptr, 10, &r); \ + if (#type[0] == 'u') { \ + if (rc2 == 0 && r == 0 && endptr == str) \ + rc = 0; \ + } else { \ + if (rc2 == 0 && r == VAL_NEG && \ + endptr && *endptr == '\0') \ + rc = 0; \ + } \ + \ + generic_msg_strto##type(file, "negative", rc, &rc2, \ + STR_NEG, r, endptr); \ + \ + /* Non decimal base: expect sucess */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + rc3 = ddi_strto##type(STR_BASE, &endptr, 0, &r); \ + if (rc3 == 0 && r == VAL_BASE && endptr && *endptr == '\0') \ + rc = 0; \ + \ + generic_msg_strto##type(file, "base", rc, &rc3, \ + STR_BASE, r, endptr); \ + \ + /* Max out of range: failure expected, r unchanged */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + rc4 = ddi_strto##type(STR_RANGE_MAX, &endptr, 16, &r); \ + if (rc4 == ERANGE && r == 0 && endptr == NULL) \ + rc = 0; \ + \ + generic_msg_strto##type(file, "max", rc, &rc4, \ + STR_RANGE_MAX, r, endptr); \ + \ + /* Min out of range: failure expected, r unchanged */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + strcpy(str, STR_RANGE_MIN); \ + rc5 = ddi_strto##type(str, &endptr, 16, &r); \ + if (#type[0] == 'u') { \ + if (rc5 == 0 && r == 0 && endptr == str) \ + rc = 0; \ + } else { \ + if (rc5 == ERANGE && r == 0 && endptr == NULL) \ + rc = 0; \ + } \ + \ + generic_msg_strto##type(file, "min", rc, &rc5, \ + STR_RANGE_MIN, r, endptr); \ + \ + /* Invalid string: success expected, endptr == 'U' */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + rc6 = ddi_strto##type(STR_INVAL1, &endptr, 10, &r); \ + if (rc6 == 0 && r == VAL_INVAL1 && endptr && *endptr == 'U') \ + rc = 0; \ + \ + generic_msg_strto##type(file, "invalid", rc, &rc6, \ + STR_INVAL1, r, endptr); \ + \ + /* Invalid string: failure expected, endptr == str */ \ + r = 0; \ + rc = 1; \ + endptr = NULL; \ + strcpy(str, STR_INVAL2); \ + rc7 = ddi_strto##type(str, &endptr, 10, &r); \ + if (rc7 == 0 && r == 0 && endptr == str) \ + rc = 0; \ + \ + generic_msg_strto##type(file, "invalid", rc, &rc7, \ + STR_INVAL2, r, endptr); \ + \ + return (rc1 || rc2 || rc3 || rc4 || rc5 || rc6 || rc7) ? \ + -EINVAL : 0; \ +} + +define_splat_generic_test_strtox(ul, unsigned long); +define_splat_generic_test_strtox(l, long); +define_splat_generic_test_strtox(ull, unsigned long long); +define_splat_generic_test_strtox(ll, long long); + +splat_subsystem_t * +splat_generic_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_GENERIC_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_GENERIC_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_GENERIC; + + SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST1_NAME, SPLAT_GENERIC_TEST1_DESC, + SPLAT_GENERIC_TEST1_ID, splat_generic_test_strtoul); + SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST2_NAME, SPLAT_GENERIC_TEST2_DESC, + SPLAT_GENERIC_TEST2_ID, splat_generic_test_strtol); + SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST3_NAME, SPLAT_GENERIC_TEST3_DESC, + SPLAT_GENERIC_TEST3_ID, splat_generic_test_strtoull); + SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST4_NAME, SPLAT_GENERIC_TEST4_DESC, + SPLAT_GENERIC_TEST4_ID, splat_generic_test_strtoll); + + return sub; +} + +void +splat_generic_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + + SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST1_ID); + + kfree(sub); +} + +int +splat_generic_id(void) +{ + return SPLAT_SUBSYSTEM_GENERIC; +} diff --git a/module/splat/splat-internal.h b/module/splat/splat-internal.h new file mode 100644 index 000000000..87c47b173 --- /dev/null +++ b/module/splat/splat-internal.h @@ -0,0 +1,239 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _SPLAT_INTERNAL_H +#define _SPLAT_INTERNAL_H + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/limits.h> +#include <linux/version.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/list.h> + +#include <asm/ioctls.h> +#include <asm/uaccess.h> +#include <stdarg.h> + +#include <sys/callb.h> +#include <sys/condvar.h> +#include <sys/cred.h> +#include <sys/sysmacros.h> +#include <sys/kmem.h> +#include <sys/kstat.h> +#include <sys/mutex.h> +#include <sys/random.h> +#include <sys/rwlock.h> +#include <sys/taskq.h> +#include <sys/thread.h> +#include <sys/time.h> +#include <sys/timer.h> +#include <sys/types.h> +#include <sys/kobj.h> +#include <sys/atomic.h> +#include <sys/list.h> +#include <sys/sunddi.h> +#include <linux/cdev.h> + +#include "spl-device.h" +#include "splat-ctl.h" + +#define SPLAT_SUBSYSTEM_INIT(type) \ +({ splat_subsystem_t *_sub_; \ + \ + _sub_ = (splat_subsystem_t *)splat_##type##_init(); \ + if (_sub_ == NULL) { \ + printk(KERN_ERR "splat: Error initializing: " #type "\n"); \ + } else { \ + spin_lock(&splat_module_lock); \ + list_add_tail(&(_sub_->subsystem_list), \ + &splat_module_list); \ + spin_unlock(&splat_module_lock); \ + } \ +}) + +#define SPLAT_SUBSYSTEM_FINI(type) \ +({ splat_subsystem_t *_sub_, *_tmp_; \ + int _id_, _flag_ = 0; \ + \ + _id_ = splat_##type##_id(); \ + spin_lock(&splat_module_lock); \ + list_for_each_entry_safe(_sub_, _tmp_, &splat_module_list, \ + subsystem_list) { \ + if (_sub_->desc.id == _id_) { \ + list_del_init(&(_sub_->subsystem_list)); \ + spin_unlock(&splat_module_lock); \ + splat_##type##_fini(_sub_); \ + spin_lock(&splat_module_lock); \ + _flag_ = 1; \ + } \ + } \ + spin_unlock(&splat_module_lock); \ + \ + if (!_flag_) \ + printk(KERN_ERR "splat: Error finalizing: " #type "\n"); \ +}) + +#define SPLAT_TEST_INIT(sub, n, d, tid, func) \ +({ splat_test_t *_test_; \ + \ + _test_ = (splat_test_t *)kmalloc(sizeof(*_test_), GFP_KERNEL); \ + if (_test_ == NULL) { \ + printk(KERN_ERR "splat: Error initializing: " n "/" #tid" \n");\ + } else { \ + memset(_test_, 0, sizeof(*_test_)); \ + strncpy(_test_->desc.name, n, SPLAT_NAME_SIZE-1); \ + strncpy(_test_->desc.desc, d, SPLAT_DESC_SIZE-1); \ + _test_->desc.id = tid; \ + _test_->test = func; \ + INIT_LIST_HEAD(&(_test_->test_list)); \ + spin_lock(&((sub)->test_lock)); \ + list_add_tail(&(_test_->test_list),&((sub)->test_list));\ + spin_unlock(&((sub)->test_lock)); \ + } \ +}) + +#define SPLAT_TEST_FINI(sub, tid) \ +({ splat_test_t *_test_, *_tmp_; \ + int _flag_ = 0; \ + \ + spin_lock(&((sub)->test_lock)); \ + list_for_each_entry_safe(_test_, _tmp_, \ + &((sub)->test_list), test_list) { \ + if (_test_->desc.id == tid) { \ + list_del_init(&(_test_->test_list)); \ + _flag_ = 1; \ + } \ + } \ + spin_unlock(&((sub)->test_lock)); \ + \ + if (!_flag_) \ + printk(KERN_ERR "splat: Error finalizing: " #tid "\n"); \ +}) + +typedef int (*splat_test_func_t)(struct file *, void *); + +typedef struct splat_test { + struct list_head test_list; + splat_user_t desc; + splat_test_func_t test; +} splat_test_t; + +typedef struct splat_subsystem { + struct list_head subsystem_list;/* List had to chain entries */ + splat_user_t desc; + spinlock_t test_lock; + struct list_head test_list; +} splat_subsystem_t; + +#define SPLAT_INFO_BUFFER_SIZE 65536 +#define SPLAT_INFO_BUFFER_REDZONE 256 + +typedef struct splat_info { + spinlock_t info_lock; + int info_size; + char *info_buffer; + char *info_head; /* Internal kernel use only */ +} splat_info_t; + +#define sym2str(sym) (char *)(#sym) + +#define splat_print(file, format, args...) \ +({ splat_info_t *_info_ = (splat_info_t *)file->private_data; \ + int _rc_; \ + \ + ASSERT(_info_); \ + ASSERT(_info_->info_buffer); \ + \ + spin_lock(&_info_->info_lock); \ + \ + /* Don't allow the kernel to start a write in the red zone */ \ + if ((int)(_info_->info_head - _info_->info_buffer) > \ + (SPLAT_INFO_BUFFER_SIZE - SPLAT_INFO_BUFFER_REDZONE)) { \ + _rc_ = -EOVERFLOW; \ + } else { \ + _rc_ = sprintf(_info_->info_head, format, args); \ + if (_rc_ >= 0) \ + _info_->info_head += _rc_; \ + } \ + \ + spin_unlock(&_info_->info_lock); \ + _rc_; \ +}) + +#define splat_vprint(file, test, format, args...) \ + splat_print(file, "%*s: " format, SPLAT_NAME_SIZE, test, args) + +splat_subsystem_t *splat_condvar_init(void); +splat_subsystem_t *splat_kmem_init(void); +splat_subsystem_t *splat_mutex_init(void); +splat_subsystem_t *splat_krng_init(void); +splat_subsystem_t *splat_rwlock_init(void); +splat_subsystem_t *splat_taskq_init(void); +splat_subsystem_t *splat_thread_init(void); +splat_subsystem_t *splat_time_init(void); +splat_subsystem_t *splat_vnode_init(void); +splat_subsystem_t *splat_kobj_init(void); +splat_subsystem_t *splat_atomic_init(void); +splat_subsystem_t *splat_list_init(void); +splat_subsystem_t *splat_generic_init(void); + +void splat_condvar_fini(splat_subsystem_t *); +void splat_kmem_fini(splat_subsystem_t *); +void splat_mutex_fini(splat_subsystem_t *); +void splat_krng_fini(splat_subsystem_t *); +void splat_rwlock_fini(splat_subsystem_t *); +void splat_taskq_fini(splat_subsystem_t *); +void splat_thread_fini(splat_subsystem_t *); +void splat_time_fini(splat_subsystem_t *); +void splat_vnode_fini(splat_subsystem_t *); +void splat_kobj_fini(splat_subsystem_t *); +void splat_atomic_fini(splat_subsystem_t *); +void splat_list_fini(splat_subsystem_t *); +void splat_generic_fini(splat_subsystem_t *); + +int splat_condvar_id(void); +int splat_kmem_id(void); +int splat_mutex_id(void); +int splat_krng_id(void); +int splat_rwlock_id(void); +int splat_taskq_id(void); +int splat_thread_id(void); +int splat_time_id(void); +int splat_vnode_id(void); +int splat_kobj_id(void); +int splat_atomic_id(void); +int splat_list_id(void); +int splat_generic_id(void); + +#endif /* _SPLAT_INTERNAL_H */ diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c new file mode 100644 index 000000000..a9792b1a5 --- /dev/null +++ b/module/splat/splat-kmem.c @@ -0,0 +1,733 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_KMEM 0x0100 +#define SPLAT_KMEM_NAME "kmem" +#define SPLAT_KMEM_DESC "Kernel Malloc/Slab Tests" + +#define SPLAT_KMEM_TEST1_ID 0x0101 +#define SPLAT_KMEM_TEST1_NAME "kmem_alloc" +#define SPLAT_KMEM_TEST1_DESC "Memory allocation test (kmem_alloc)" + +#define SPLAT_KMEM_TEST2_ID 0x0102 +#define SPLAT_KMEM_TEST2_NAME "kmem_zalloc" +#define SPLAT_KMEM_TEST2_DESC "Memory allocation test (kmem_zalloc)" + +#define SPLAT_KMEM_TEST3_ID 0x0103 +#define SPLAT_KMEM_TEST3_NAME "vmem_alloc" +#define SPLAT_KMEM_TEST3_DESC "Memory allocation test (vmem_alloc)" + +#define SPLAT_KMEM_TEST4_ID 0x0104 +#define SPLAT_KMEM_TEST4_NAME "vmem_zalloc" +#define SPLAT_KMEM_TEST4_DESC "Memory allocation test (vmem_zalloc)" + +#define SPLAT_KMEM_TEST5_ID 0x0105 +#define SPLAT_KMEM_TEST5_NAME "kmem_cache1" +#define SPLAT_KMEM_TEST5_DESC "Slab ctor/dtor test (small)" + +#define SPLAT_KMEM_TEST6_ID 0x0106 +#define SPLAT_KMEM_TEST6_NAME "kmem_cache2" +#define SPLAT_KMEM_TEST6_DESC "Slab ctor/dtor test (large)" + +#define SPLAT_KMEM_TEST7_ID 0x0107 +#define SPLAT_KMEM_TEST7_NAME "kmem_reap" +#define SPLAT_KMEM_TEST7_DESC "Slab reaping test" + +#define SPLAT_KMEM_TEST8_ID 0x0108 +#define SPLAT_KMEM_TEST8_NAME "kmem_lock" +#define SPLAT_KMEM_TEST8_DESC "Slab locking test" + +#define SPLAT_KMEM_ALLOC_COUNT 10 +#define SPLAT_VMEM_ALLOC_COUNT 10 + + +/* XXX - This test may fail under tight memory conditions */ +static int +splat_kmem_test1(struct file *file, void *arg) +{ + void *ptr[SPLAT_KMEM_ALLOC_COUNT]; + int size = PAGE_SIZE; + int i, count, rc = 0; + + /* We are intentionally going to push kmem_alloc to its max + * allocation size, so suppress the console warnings for now */ + kmem_set_warning(0); + + while ((!rc) && (size <= (PAGE_SIZE * 32))) { + count = 0; + + for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) { + ptr[i] = kmem_alloc(size, KM_SLEEP); + if (ptr[i]) + count++; + } + + for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) + if (ptr[i]) + kmem_free(ptr[i], size); + + splat_vprint(file, SPLAT_KMEM_TEST1_NAME, + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_KMEM_ALLOC_COUNT); + if (count != SPLAT_KMEM_ALLOC_COUNT) + rc = -ENOMEM; + + size *= 2; + } + + kmem_set_warning(1); + + return rc; +} + +static int +splat_kmem_test2(struct file *file, void *arg) +{ + void *ptr[SPLAT_KMEM_ALLOC_COUNT]; + int size = PAGE_SIZE; + int i, j, count, rc = 0; + + /* We are intentionally going to push kmem_alloc to its max + * allocation size, so suppress the console warnings for now */ + kmem_set_warning(0); + + while ((!rc) && (size <= (PAGE_SIZE * 32))) { + count = 0; + + for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) { + ptr[i] = kmem_zalloc(size, KM_SLEEP); + if (ptr[i]) + count++; + } + + /* Ensure buffer has been zero filled */ + for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) { + for (j = 0; j < size; j++) { + if (((char *)ptr[i])[j] != '\0') { + splat_vprint(file, SPLAT_KMEM_TEST2_NAME, + "%d-byte allocation was " + "not zeroed\n", size); + rc = -EFAULT; + } + } + } + + for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) + if (ptr[i]) + kmem_free(ptr[i], size); + + splat_vprint(file, SPLAT_KMEM_TEST2_NAME, + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_KMEM_ALLOC_COUNT); + if (count != SPLAT_KMEM_ALLOC_COUNT) + rc = -ENOMEM; + + size *= 2; + } + + kmem_set_warning(1); + + return rc; +} + +static int +splat_kmem_test3(struct file *file, void *arg) +{ + void *ptr[SPLAT_VMEM_ALLOC_COUNT]; + int size = PAGE_SIZE; + int i, count, rc = 0; + + while ((!rc) && (size <= (PAGE_SIZE * 1024))) { + count = 0; + + for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) { + ptr[i] = vmem_alloc(size, KM_SLEEP); + if (ptr[i]) + count++; + } + + for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) + if (ptr[i]) + vmem_free(ptr[i], size); + + splat_vprint(file, SPLAT_KMEM_TEST3_NAME, + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_VMEM_ALLOC_COUNT); + if (count != SPLAT_VMEM_ALLOC_COUNT) + rc = -ENOMEM; + + size *= 2; + } + + return rc; +} + +static int +splat_kmem_test4(struct file *file, void *arg) +{ + void *ptr[SPLAT_VMEM_ALLOC_COUNT]; + int size = PAGE_SIZE; + int i, j, count, rc = 0; + + while ((!rc) && (size <= (PAGE_SIZE * 1024))) { + count = 0; + + for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) { + ptr[i] = vmem_zalloc(size, KM_SLEEP); + if (ptr[i]) + count++; + } + + /* Ensure buffer has been zero filled */ + for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) { + for (j = 0; j < size; j++) { + if (((char *)ptr[i])[j] != '\0') { + splat_vprint(file, SPLAT_KMEM_TEST4_NAME, + "%d-byte allocation was " + "not zeroed\n", size); + rc = -EFAULT; + } + } + } + + for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) + if (ptr[i]) + vmem_free(ptr[i], size); + + splat_vprint(file, SPLAT_KMEM_TEST4_NAME, + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_VMEM_ALLOC_COUNT); + if (count != SPLAT_VMEM_ALLOC_COUNT) + rc = -ENOMEM; + + size *= 2; + } + + return rc; +} + +#define SPLAT_KMEM_TEST_MAGIC 0x004488CCUL +#define SPLAT_KMEM_CACHE_NAME "kmem_test" +#define SPLAT_KMEM_OBJ_COUNT 128 +#define SPLAT_KMEM_OBJ_RECLAIM 16 + +typedef struct kmem_cache_data { + unsigned long kcd_magic; + int kcd_flag; + char kcd_buf[0]; +} kmem_cache_data_t; + +typedef struct kmem_cache_priv { + unsigned long kcp_magic; + struct file *kcp_file; + kmem_cache_t *kcp_cache; + kmem_cache_data_t *kcp_kcd[SPLAT_KMEM_OBJ_COUNT]; + spinlock_t kcp_lock; + wait_queue_head_t kcp_waitq; + int kcp_size; + int kcp_count; + int kcp_threads; + int kcp_alloc; + int kcp_rc; +} kmem_cache_priv_t; + +static int +splat_kmem_cache_test_constructor(void *ptr, void *priv, int flags) +{ + kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv; + kmem_cache_data_t *kcd = (kmem_cache_data_t *)ptr; + + if (kcd && kcp) { + kcd->kcd_magic = kcp->kcp_magic; + kcd->kcd_flag = 1; + memset(kcd->kcd_buf, 0xaa, kcp->kcp_size - (sizeof *kcd)); + kcp->kcp_count++; + } + + return 0; +} + +static void +splat_kmem_cache_test_destructor(void *ptr, void *priv) +{ + kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv; + kmem_cache_data_t *kcd = (kmem_cache_data_t *)ptr; + + if (kcd && kcp) { + kcd->kcd_magic = 0; + kcd->kcd_flag = 0; + memset(kcd->kcd_buf, 0xbb, kcp->kcp_size - (sizeof *kcd)); + kcp->kcp_count--; + } + + return; +} + +static int +splat_kmem_cache_size_test(struct file *file, void *arg, + char *name, int size, int flags) +{ + kmem_cache_t *cache = NULL; + kmem_cache_data_t *kcd = NULL; + kmem_cache_priv_t kcp; + int rc = 0, max; + + kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; + kcp.kcp_file = file; + kcp.kcp_size = size; + kcp.kcp_count = 0; + kcp.kcp_rc = 0; + + cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp.kcp_size, 0, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + NULL, &kcp, NULL, flags); + if (!cache) { + splat_vprint(file, name, + "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME); + return -ENOMEM; + } + + kcd = kmem_cache_alloc(cache, KM_SLEEP); + if (!kcd) { + splat_vprint(file, name, + "Unable to allocate from '%s'\n", + SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } + + if (!kcd->kcd_flag) { + splat_vprint(file, name, + "Failed to run contructor for '%s'\n", + SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } + + if (kcd->kcd_magic != kcp.kcp_magic) { + splat_vprint(file, name, + "Failed to pass private data to constructor " + "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } + + max = kcp.kcp_count; + kmem_cache_free(cache, kcd); + + /* Destroy the entire cache which will force destructors to + * run and we can verify one was called for every object */ + kmem_cache_destroy(cache); + if (kcp.kcp_count) { + splat_vprint(file, name, + "Failed to run destructor on all slab objects " + "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + } + + splat_vprint(file, name, + "Successfully ran ctors/dtors for %d elements in '%s'\n", + max, SPLAT_KMEM_CACHE_NAME); + + return rc; + +out_free: + if (kcd) + kmem_cache_free(cache, kcd); + + kmem_cache_destroy(cache); + return rc; +} + +/* Validate small object cache behavior for dynamic/kmem/vmem caches */ +static int +splat_kmem_test5(struct file *file, void *arg) +{ + char *name = SPLAT_KMEM_TEST5_NAME; + int rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128, 0); + if (rc) + return rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128, KMC_KMEM); + if (rc) + return rc; + + return splat_kmem_cache_size_test(file, arg, name, 128, KMC_VMEM); +} + +/* Validate large object cache behavior for dynamic/kmem/vmem caches */ +static int +splat_kmem_test6(struct file *file, void *arg) +{ + char *name = SPLAT_KMEM_TEST6_NAME; + int rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, 0); + if (rc) + return rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, KMC_KMEM); + if (rc) + return rc; + + return splat_kmem_cache_size_test(file, arg, name, 128 * 1028, KMC_VMEM); +} + +static void +splat_kmem_cache_test_reclaim(void *priv) +{ + kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv; + int i, count; + + count = min(SPLAT_KMEM_OBJ_RECLAIM, kcp->kcp_count); + splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST7_NAME, + "Reaping %d objects from '%s'\n", count, + SPLAT_KMEM_CACHE_NAME); + + for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) { + if (kcp->kcp_kcd[i]) { + kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]); + kcp->kcp_kcd[i] = NULL; + + if (--count == 0) + break; + } + } + + return; +} + +static int +splat_kmem_test7(struct file *file, void *arg) +{ + kmem_cache_t *cache; + kmem_cache_priv_t kcp; + int i, rc = 0; + + kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; + kcp.kcp_file = file; + kcp.kcp_size = 256; + kcp.kcp_count = 0; + kcp.kcp_rc = 0; + + cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp.kcp_size, 0, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + splat_kmem_cache_test_reclaim, + &kcp, NULL, 0); + if (!cache) { + splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME); + return -ENOMEM; + } + + kcp.kcp_cache = cache; + + for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) { + /* All allocations need not succeed */ + kcp.kcp_kcd[i] = kmem_cache_alloc(cache, KM_SLEEP); + if (!kcp.kcp_kcd[i]) { + splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + "Unable to allocate from '%s'\n", + SPLAT_KMEM_CACHE_NAME); + } + } + + ASSERT(kcp.kcp_count > 0); + + /* Request the slab cache free any objects it can. For a few reasons + * this may not immediately result in more free memory even if objects + * are freed. First off, due to fragmentation we may not be able to + * reclaim any slabs. Secondly, even if we do we fully clear some + * slabs we will not want to immedately reclaim all of them because + * we may contend with cache allocs and thrash. What we want to see + * is slab size decrease more gradually as it becomes clear they + * will not be needed. This should be acheivable in less than minute + * if it takes longer than this something has gone wrong. + */ + for (i = 0; i < 60; i++) { + kmem_cache_reap_now(cache); + splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + "%s cache objects %d, slabs %u/%u objs %u/%u\n", + SPLAT_KMEM_CACHE_NAME, kcp.kcp_count, + (unsigned)cache->skc_slab_alloc, + (unsigned)cache->skc_slab_total, + (unsigned)cache->skc_obj_alloc, + (unsigned)cache->skc_obj_total); + + if (cache->skc_obj_total == 0) + break; + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } + + if (cache->skc_obj_total == 0) { + splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + "Successfully created %d objects " + "in cache %s and reclaimed them\n", + SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME); + } else { + splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + "Failed to reclaim %u/%d objects from cache %s\n", + (unsigned)cache->skc_obj_total, SPLAT_KMEM_OBJ_COUNT, + SPLAT_KMEM_CACHE_NAME); + rc = -ENOMEM; + } + + /* Cleanup our mess (for failure case of time expiring) */ + for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) + if (kcp.kcp_kcd[i]) + kmem_cache_free(cache, kcp.kcp_kcd[i]); + + kmem_cache_destroy(cache); + + return rc; +} + +static void +splat_kmem_test8_thread(void *arg) +{ + kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg; + int count = kcp->kcp_alloc, rc = 0, i; + void **objs; + + ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC); + + objs = vmem_zalloc(count * sizeof(void *), KM_SLEEP); + if (!objs) { + splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME, + "Unable to alloc objp array for cache '%s'\n", + kcp->kcp_cache->skc_name); + rc = -ENOMEM; + goto out; + } + + for (i = 0; i < count; i++) { + objs[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); + if (!objs[i]) { + splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME, + "Unable to allocate from cache '%s'\n", + kcp->kcp_cache->skc_name); + rc = -ENOMEM; + break; + } + } + + for (i = 0; i < count; i++) + if (objs[i]) + kmem_cache_free(kcp->kcp_cache, objs[i]); + + vmem_free(objs, count * sizeof(void *)); +out: + spin_lock(&kcp->kcp_lock); + if (!kcp->kcp_rc) + kcp->kcp_rc = rc; + + if (--kcp->kcp_threads == 0) + wake_up(&kcp->kcp_waitq); + + spin_unlock(&kcp->kcp_lock); + + thread_exit(); +} + +static int +splat_kmem_test8_count(kmem_cache_priv_t *kcp, int threads) +{ + int ret; + + spin_lock(&kcp->kcp_lock); + ret = (kcp->kcp_threads == threads); + spin_unlock(&kcp->kcp_lock); + + return ret; +} + +/* This test will always pass and is simply here so I can easily + * eyeball the slab cache locking overhead to ensure it is reasonable. + */ +static int +splat_kmem_test8_sc(struct file *file, void *arg, int size, int count) +{ + kmem_cache_priv_t kcp; + kthread_t *thr; + struct timespec start, stop, delta; + char cache_name[32]; + int i, j, rc = 0, threads = 32; + + kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; + kcp.kcp_file = file; + + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "name", + "time (sec)\tslabs \tobjs \thash\n"); + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "", + " \ttot/max/calc\ttot/max/calc\n"); + + for (i = 1; i <= count; i *= 2) { + kcp.kcp_size = size; + kcp.kcp_count = 0; + kcp.kcp_threads = 0; + kcp.kcp_alloc = i; + kcp.kcp_rc = 0; + spin_lock_init(&kcp.kcp_lock); + init_waitqueue_head(&kcp.kcp_waitq); + + (void)snprintf(cache_name, 32, "%s-%d-%d", + SPLAT_KMEM_CACHE_NAME, size, i); + kcp.kcp_cache = kmem_cache_create(cache_name, kcp.kcp_size, 0, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + NULL, &kcp, NULL, 0); + if (!kcp.kcp_cache) { + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, + "Unable to create '%s' cache\n", + SPLAT_KMEM_CACHE_NAME); + rc = -ENOMEM; + break; + } + + start = current_kernel_time(); + + for (j = 0; j < threads; j++) { + thr = thread_create(NULL, 0, splat_kmem_test8_thread, + &kcp, 0, &p0, TS_RUN, minclsyspri); + if (thr == NULL) { + rc = -ESRCH; + break; + } + spin_lock(&kcp.kcp_lock); + kcp.kcp_threads++; + spin_unlock(&kcp.kcp_lock); + } + + /* Sleep until the thread sets kcp.kcp_threads == 0 */ + wait_event(kcp.kcp_waitq, splat_kmem_test8_count(&kcp, 0)); + stop = current_kernel_time(); + delta = timespec_sub(stop, start); + + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t" + "%lu/%lu/%lu\t%lu/%lu/%lu\n", + kcp.kcp_cache->skc_name, + delta.tv_sec, delta.tv_nsec, + (unsigned long)kcp.kcp_cache->skc_slab_total, + (unsigned long)kcp.kcp_cache->skc_slab_max, + (unsigned long)(kcp.kcp_alloc * threads / + SPL_KMEM_CACHE_OBJ_PER_SLAB), + (unsigned long)kcp.kcp_cache->skc_obj_total, + (unsigned long)kcp.kcp_cache->skc_obj_max, + (unsigned long)(kcp.kcp_alloc * threads)); + + kmem_cache_destroy(kcp.kcp_cache); + + if (!rc && kcp.kcp_rc) + rc = kcp.kcp_rc; + + if (rc) + break; + } + + return rc; +} + +static int +splat_kmem_test8(struct file *file, void *arg) +{ + int i, rc = 0; + + /* Run through slab cache with objects size from + * 16-1Mb in 4x multiples with 1024 objects each */ + for (i = 16; i <= 1024*1024; i *= 4) { + rc = splat_kmem_test8_sc(file, arg, i, 256); + if (rc) + break; + } + + return rc; +} + +splat_subsystem_t * +splat_kmem_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_KMEM_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_KMEM; + + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC, + SPLAT_KMEM_TEST1_ID, splat_kmem_test1); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC, + SPLAT_KMEM_TEST2_ID, splat_kmem_test2); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC, + SPLAT_KMEM_TEST3_ID, splat_kmem_test3); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC, + SPLAT_KMEM_TEST4_ID, splat_kmem_test4); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC, + SPLAT_KMEM_TEST5_ID, splat_kmem_test5); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC, + SPLAT_KMEM_TEST6_ID, splat_kmem_test6); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC, + SPLAT_KMEM_TEST7_ID, splat_kmem_test7); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC, + SPLAT_KMEM_TEST8_ID, splat_kmem_test8); + + return sub; +} + +void +splat_kmem_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID); + + kfree(sub); +} + +int +splat_kmem_id(void) { + return SPLAT_SUBSYSTEM_KMEM; +} diff --git a/module/splat/splat-kobj.c b/module/splat/splat-kobj.c new file mode 100644 index 000000000..2137ab02a --- /dev/null +++ b/module/splat/splat-kobj.c @@ -0,0 +1,164 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_KOBJ 0x0a00 +#define SPLAT_KOBJ_NAME "kobj" +#define SPLAT_KOBJ_DESC "Kernel Kobj Tests" + +#define SPLAT_KOBJ_TEST1_ID 0x0a01 +#define SPLAT_KOBJ_TEST1_NAME "open" +#define SPLAT_KOBJ_TEST1_DESC "Kobj Open/Close Test" + +#define SPLAT_KOBJ_TEST2_ID 0x0a02 +#define SPLAT_KOBJ_TEST2_NAME "size/read" +#define SPLAT_KOBJ_TEST2_DESC "Kobj Size/Read Test" + +#define SPLAT_KOBJ_TEST_FILE "/etc/fstab" + +static int +splat_kobj_test1(struct file *file, void *arg) +{ + struct _buf *f; + + f = kobj_open_file(SPLAT_KOBJ_TEST_FILE); + if (f == (struct _buf *)-1) { + splat_vprint(file, SPLAT_KOBJ_TEST1_NAME, "Failed to open " + "test file: %s\n", SPLAT_KOBJ_TEST_FILE); + return -ENOENT; + } + + kobj_close_file(f); + splat_vprint(file, SPLAT_KOBJ_TEST1_NAME, "Successfully opened and " + "closed test file: %s\n", SPLAT_KOBJ_TEST_FILE); + + return 0; +} /* splat_kobj_test1() */ + +static int +splat_kobj_test2(struct file *file, void *arg) +{ + struct _buf *f; + char *buf; + uint64_t size; + int rc; + + f = kobj_open_file(SPLAT_KOBJ_TEST_FILE); + if (f == (struct _buf *)-1) { + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed to open " + "test file: %s\n", SPLAT_KOBJ_TEST_FILE); + return -ENOENT; + } + + rc = kobj_get_filesize(f, &size); + if (rc) { + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed stat of " + "test file: %s (%d)\n", SPLAT_KOBJ_TEST_FILE, rc); + goto out; + } + + buf = kmalloc(size + 1, GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed to alloc " + "%lld bytes for tmp buffer (%d)\n", size, rc); + goto out; + } + + memset(buf, 0, size + 1); + rc = kobj_read_file(f, buf, size, 0); + if (rc < 0) { + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed read of " + "test file: %s (%d)\n", SPLAT_KOBJ_TEST_FILE, rc); + goto out2; + } + + /* Validate we read as many bytes as expected based on the stat. This + * isn't a perfect test since we didn't create the file however it is + * pretty unlikely there are garbage characters in your /etc/fstab */ + if (size != (uint64_t)strlen(buf)) { + rc = -EFBIG; + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Stat'ed size " + "(%lld) does not match number of bytes read " + "(%lld)\n", size, (uint64_t)strlen(buf)); + goto out2; + } + + rc = 0; + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "\n%s\n", buf); + splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Successfully stat'ed " + "and read expected number of bytes (%lld) from test " + "file: %s\n", size, SPLAT_KOBJ_TEST_FILE); +out2: + kfree(buf); +out: + kobj_close_file(f); + + return rc; +} /* splat_kobj_test2() */ + +splat_subsystem_t * +splat_kobj_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_KOBJ_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_KOBJ_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_KOBJ; + + SPLAT_TEST_INIT(sub, SPLAT_KOBJ_TEST1_NAME, SPLAT_KOBJ_TEST1_DESC, + SPLAT_KOBJ_TEST1_ID, splat_kobj_test1); + SPLAT_TEST_INIT(sub, SPLAT_KOBJ_TEST2_NAME, SPLAT_KOBJ_TEST2_DESC, + SPLAT_KOBJ_TEST2_ID, splat_kobj_test2); + + return sub; +} /* splat_kobj_init() */ + +void +splat_kobj_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + + SPLAT_TEST_FINI(sub, SPLAT_KOBJ_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_KOBJ_TEST1_ID); + + kfree(sub); +} /* splat_kobj_fini() */ + +int +splat_kobj_id(void) +{ + return SPLAT_SUBSYSTEM_KOBJ; +} /* splat_kobj_id() */ diff --git a/module/splat/splat-list.c b/module/splat/splat-list.c new file mode 100644 index 000000000..3d435cad0 --- /dev/null +++ b/module/splat/splat-list.c @@ -0,0 +1,473 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_LIST 0x0c00 +#define SPLAT_LIST_NAME "list" +#define SPLAT_LIST_DESC "Kernel List Tests" + +#define SPLAT_LIST_TEST1_ID 0x0c01 +#define SPLAT_LIST_TEST1_NAME "create/destroy" +#define SPLAT_LIST_TEST1_DESC "Create/destroy Test" + +#define SPLAT_LIST_TEST2_ID 0x0c02 +#define SPLAT_LIST_TEST2_NAME "ins/rm head" +#define SPLAT_LIST_TEST2_DESC "Insert/remove head Test" + +#define SPLAT_LIST_TEST3_ID 0x0c03 +#define SPLAT_LIST_TEST3_NAME "ins/rm tail" +#define SPLAT_LIST_TEST3_DESC "Insert/remove tail Test" + +#define SPLAT_LIST_TEST4_ID 0x0c04 +#define SPLAT_LIST_TEST4_NAME "insert_after" +#define SPLAT_LIST_TEST4_DESC "Insert_after Test" + +#define SPLAT_LIST_TEST5_ID 0x0c05 +#define SPLAT_LIST_TEST5_NAME "insert_before" +#define SPLAT_LIST_TEST5_DESC "Insert_before Test" + +#define SPLAT_LIST_TEST6_ID 0x0c06 +#define SPLAT_LIST_TEST6_NAME "remove" +#define SPLAT_LIST_TEST6_DESC "Remove Test" + +#define SPLAT_LIST_TEST7_ID 0x0c7 +#define SPLAT_LIST_TEST7_NAME "active" +#define SPLAT_LIST_TEST7_DESC "Active Test" + +/* It is important that li_node is not the first element, this + * ensures the list_d2l/list_object macros are working correctly. */ +typedef struct list_item { + int li_data; + list_node_t li_node; +} list_item_t; + +#define LIST_ORDER_STACK 0 +#define LIST_ORDER_QUEUE 1 + +static int +splat_list_test1(struct file *file, void *arg) +{ + list_t list; + + splat_vprint(file, SPLAT_LIST_TEST1_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + if (!list_is_empty(&list)) { + splat_vprint(file, SPLAT_LIST_TEST1_NAME, + "New list NOT empty%s\n", ""); + /* list_destroy() intentionally skipped to avoid assert */ + return -EEXIST; + } + + splat_vprint(file, SPLAT_LIST_TEST1_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + /* Validate the list has been destroyed */ + if (list_link_active(&list.list_head)) { + splat_vprint(file, SPLAT_LIST_TEST1_NAME, + "Destroyed list still active%s", ""); + return -EIO; + } + + return 0; +} + +static int +splat_list_validate(list_t *list, int size, int order, int mult) +{ + list_item_t *li; + int i; + + /* Walk all items in list from head to verify stack or queue + * ordering. We bound the for loop by size+1 to ensure that + * we still terminate if there is list corruption. We also + * intentionally make things a little more complex than they + * need to be by using list_head/list_next for queues, and + * list_tail/list_prev for stacks. This is simply done for + * coverage and to ensure these function are working right. + */ + for (i = 0, li = (order ? list_head(list) : list_tail(list)); + i < size + 1 && li != NULL; + i++, li = (order ? list_next(list, li) : list_prev(list, li))) + if (li->li_data != i * mult) + return -EIDRM; + + if (i != size) + return -E2BIG; + + return 0; +} + +static int +splat_list_test2(struct file *file, void *arg) +{ + list_t list; + list_item_t *li; + int i, list_size = 8, rc = 0; + + splat_vprint(file, SPLAT_LIST_TEST2_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + /* Insert all items at the list head to form a stack */ + splat_vprint(file, SPLAT_LIST_TEST2_NAME, + "Adding %d items to list head\n", list_size); + for (i = 0; i < list_size; i++) { + li = kmem_alloc(sizeof(list_item_t), KM_SLEEP); + if (li == NULL) { + rc = -ENOMEM; + goto out; + } + + list_link_init(&li->li_node); + li->li_data = i; + list_insert_head(&list, li); + } + + splat_vprint(file, SPLAT_LIST_TEST2_NAME, + "Validating %d item list is a stack\n", list_size); + rc = splat_list_validate(&list, list_size, LIST_ORDER_STACK, 1); + if (rc) + splat_vprint(file, SPLAT_LIST_TEST2_NAME, + "List validation failed, %d\n", rc); +out: + /* Remove all items */ + splat_vprint(file, SPLAT_LIST_TEST2_NAME, + "Removing %d items from list head\n", list_size); + while ((li = list_remove_head(&list))) + kmem_free(li, sizeof(list_item_t)); + + splat_vprint(file, SPLAT_LIST_TEST2_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + return rc; +} + +static int +splat_list_test3(struct file *file, void *arg) +{ + list_t list; + list_item_t *li; + int i, list_size = 8, rc = 0; + + splat_vprint(file, SPLAT_LIST_TEST3_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + /* Insert all items at the list tail to form a queue */ + splat_vprint(file, SPLAT_LIST_TEST3_NAME, + "Adding %d items to list tail\n", list_size); + for (i = 0; i < list_size; i++) { + li = kmem_alloc(sizeof(list_item_t), KM_SLEEP); + if (li == NULL) { + rc = -ENOMEM; + goto out; + } + + list_link_init(&li->li_node); + li->li_data = i; + list_insert_tail(&list, li); + } + + splat_vprint(file, SPLAT_LIST_TEST3_NAME, + "Validating %d item list is a queue\n", list_size); + rc = splat_list_validate(&list, list_size, LIST_ORDER_QUEUE, 1); + if (rc) + splat_vprint(file, SPLAT_LIST_TEST3_NAME, + "List validation failed, %d\n", rc); +out: + /* Remove all items */ + splat_vprint(file, SPLAT_LIST_TEST3_NAME, + "Removing %d items from list tail\n", list_size); + while ((li = list_remove_tail(&list))) + kmem_free(li, sizeof(list_item_t)); + + splat_vprint(file, SPLAT_LIST_TEST3_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + return rc; +} + +static int +splat_list_test4(struct file *file, void *arg) +{ + list_t list; + list_item_t *li_new, *li_last = NULL; + int i, list_size = 8, rc = 0; + + splat_vprint(file, SPLAT_LIST_TEST4_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + /* Insert all items after the last item to form a queue */ + splat_vprint(file, SPLAT_LIST_TEST4_NAME, + "Adding %d items each after the last item\n", list_size); + for (i = 0; i < list_size; i++) { + li_new = kmem_alloc(sizeof(list_item_t), KM_SLEEP); + if (li_new == NULL) { + rc = -ENOMEM; + goto out; + } + + list_link_init(&li_new->li_node); + li_new->li_data = i; + list_insert_after(&list, li_last, li_new); + li_last = li_new; + } + + splat_vprint(file, SPLAT_LIST_TEST4_NAME, + "Validating %d item list is a queue\n", list_size); + rc = splat_list_validate(&list, list_size, LIST_ORDER_QUEUE, 1); + if (rc) + splat_vprint(file, SPLAT_LIST_TEST4_NAME, + "List validation failed, %d\n", rc); +out: + /* Remove all items */ + splat_vprint(file, SPLAT_LIST_TEST4_NAME, + "Removing %d items from list tail\n", list_size); + while ((li_new = list_remove_head(&list))) + kmem_free(li_new, sizeof(list_item_t)); + + splat_vprint(file, SPLAT_LIST_TEST4_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + return rc; +} + +static int +splat_list_test5(struct file *file, void *arg) +{ + list_t list; + list_item_t *li_new, *li_last = NULL; + int i, list_size = 8, rc = 0; + + splat_vprint(file, SPLAT_LIST_TEST5_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + /* Insert all items before the last item to form a stack */ + splat_vprint(file, SPLAT_LIST_TEST5_NAME, + "Adding %d items each before the last item\n", list_size); + for (i = 0; i < list_size; i++) { + li_new = kmem_alloc(sizeof(list_item_t), KM_SLEEP); + if (li_new == NULL) { + rc = -ENOMEM; + goto out; + } + + list_link_init(&li_new->li_node); + li_new->li_data = i; + list_insert_before(&list, li_last, li_new); + li_last = li_new; + } + + splat_vprint(file, SPLAT_LIST_TEST5_NAME, + "Validating %d item list is a queue\n", list_size); + rc = splat_list_validate(&list, list_size, LIST_ORDER_STACK, 1); + if (rc) + splat_vprint(file, SPLAT_LIST_TEST5_NAME, + "List validation failed, %d\n", rc); +out: + /* Remove all items */ + splat_vprint(file, SPLAT_LIST_TEST5_NAME, + "Removing %d items from list tail\n", list_size); + while ((li_new = list_remove_tail(&list))) + kmem_free(li_new, sizeof(list_item_t)); + + splat_vprint(file, SPLAT_LIST_TEST5_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + return rc; +} + +static int +splat_list_test6(struct file *file, void *arg) +{ + list_t list; + list_item_t *li, *li_prev; + int i, list_size = 8, rc = 0; + + splat_vprint(file, SPLAT_LIST_TEST6_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + /* Insert all items at the list tail to form a queue */ + splat_vprint(file, SPLAT_LIST_TEST6_NAME, + "Adding %d items to list tail\n", list_size); + for (i = 0; i < list_size; i++) { + li = kmem_alloc(sizeof(list_item_t), KM_SLEEP); + if (li == NULL) { + rc = -ENOMEM; + goto out; + } + + list_link_init(&li->li_node); + li->li_data = i; + list_insert_tail(&list, li); + } + + /* Remove all odd items from the queue */ + splat_vprint(file, SPLAT_LIST_TEST6_NAME, + "Removing %d odd items from the list\n", list_size / 2); + for (li = list_head(&list); li != NULL; li = list_next(&list, li)) { + if (li->li_data % 2 == 1) { + li_prev = list_prev(&list, li); + list_remove(&list, li); + li = li_prev; + } + } + + splat_vprint(file, SPLAT_LIST_TEST6_NAME, "Validating %d item " + "list is a queue of only even elements\n", list_size / 2); + rc = splat_list_validate(&list, list_size / 2, LIST_ORDER_QUEUE, 2); + if (rc) + splat_vprint(file, SPLAT_LIST_TEST6_NAME, + "List validation failed, %d\n", rc); +out: + /* Remove all items */ + splat_vprint(file, SPLAT_LIST_TEST6_NAME, + "Removing %d items from list tail\n", list_size / 2); + while ((li = list_remove_tail(&list))) + kmem_free(li, sizeof(list_item_t)); + + splat_vprint(file, SPLAT_LIST_TEST6_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + return rc; +} + +static int +splat_list_test7(struct file *file, void *arg) +{ + list_t list; + list_item_t *li; + int rc = 0; + + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Creating list\n%s", ""); + list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node)); + + li = kmem_alloc(sizeof(list_item_t), KM_SLEEP); + if (li == NULL) { + rc = -ENOMEM; + goto out; + } + + /* Validate newly initialized node is inactive */ + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Init list node\n%s", ""); + list_link_init(&li->li_node); + if (list_link_active(&li->li_node)) { + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Newly initialized " + "list node should inactive %p/%p\n", + li->li_node.prev, li->li_node.next); + rc = -EINVAL; + goto out; + } + + /* Validate node is active when linked in to a list */ + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Insert list node\n%s", ""); + list_insert_head(&list, li); + if (!list_link_active(&li->li_node)) { + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "List node " + "inserted in list should be active %p/%p\n", + li->li_node.prev, li->li_node.next); + rc = -EINVAL; + goto out; + } + + /* Validate node is inactive when removed from list */ + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Remove list node\n%s", ""); + list_remove(&list, li); + if (list_link_active(&li->li_node)) { + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "List node " + "removed from list should be inactive %p/%p\n", + li->li_node.prev, li->li_node.next); + rc = -EINVAL; + } + + kmem_free(li, sizeof(list_item_t)); +out: + /* Remove all items */ + while ((li = list_remove_head(&list))) + kmem_free(li, sizeof(list_item_t)); + + splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Destroying list\n%s", ""); + list_destroy(&list); + + return rc; +} + +splat_subsystem_t * +splat_list_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_LIST_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_LIST_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_LIST; + + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST1_NAME, SPLAT_LIST_TEST1_DESC, + SPLAT_LIST_TEST1_ID, splat_list_test1); + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST2_NAME, SPLAT_LIST_TEST2_DESC, + SPLAT_LIST_TEST2_ID, splat_list_test2); + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST3_NAME, SPLAT_LIST_TEST3_DESC, + SPLAT_LIST_TEST3_ID, splat_list_test3); + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST4_NAME, SPLAT_LIST_TEST4_DESC, + SPLAT_LIST_TEST4_ID, splat_list_test4); + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST5_NAME, SPLAT_LIST_TEST5_DESC, + SPLAT_LIST_TEST5_ID, splat_list_test5); + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST6_NAME, SPLAT_LIST_TEST6_DESC, + SPLAT_LIST_TEST6_ID, splat_list_test6); + SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST7_NAME, SPLAT_LIST_TEST7_DESC, + SPLAT_LIST_TEST7_ID, splat_list_test7); + + return sub; +} + +void +splat_list_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST7_ID); + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST6_ID); + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST5_ID); + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST1_ID); + + kfree(sub); +} + +int +splat_list_id(void) +{ + return SPLAT_SUBSYSTEM_LIST; +} diff --git a/module/splat/splat-mutex.c b/module/splat/splat-mutex.c new file mode 100644 index 000000000..640f8f407 --- /dev/null +++ b/module/splat/splat-mutex.c @@ -0,0 +1,355 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_MUTEX 0x0400 +#define SPLAT_MUTEX_NAME "mutex" +#define SPLAT_MUTEX_DESC "Kernel Mutex Tests" + +#define SPLAT_MUTEX_TEST1_ID 0x0401 +#define SPLAT_MUTEX_TEST1_NAME "tryenter" +#define SPLAT_MUTEX_TEST1_DESC "Validate mutex_tryenter() correctness" + +#define SPLAT_MUTEX_TEST2_ID 0x0402 +#define SPLAT_MUTEX_TEST2_NAME "race" +#define SPLAT_MUTEX_TEST2_DESC "Many threads entering/exiting the mutex" + +#define SPLAT_MUTEX_TEST3_ID 0x0403 +#define SPLAT_MUTEX_TEST3_NAME "owned" +#define SPLAT_MUTEX_TEST3_DESC "Validate mutex_owned() correctness" + +#define SPLAT_MUTEX_TEST4_ID 0x0404 +#define SPLAT_MUTEX_TEST4_NAME "owner" +#define SPLAT_MUTEX_TEST4_DESC "Validate mutex_owner() correctness" + +#define SPLAT_MUTEX_TEST_MAGIC 0x115599DDUL +#define SPLAT_MUTEX_TEST_NAME "mutex_test" +#define SPLAT_MUTEX_TEST_WORKQ "mutex_wq" +#define SPLAT_MUTEX_TEST_COUNT 128 + +typedef struct mutex_priv { + unsigned long mp_magic; + struct file *mp_file; + struct work_struct mp_work[SPLAT_MUTEX_TEST_COUNT]; + kmutex_t mp_mtx; + int mp_rc; +} mutex_priv_t; + +#ifdef HAVE_3ARGS_INIT_WORK +static void +splat_mutex_test1_work(void *priv) +{ + mutex_priv_t *mp = (mutex_priv_t *)priv; + + ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); + mp->mp_rc = 0; + + if (!mutex_tryenter(&mp->mp_mtx)) + mp->mp_rc = -EBUSY; +} +#endif + +static int +splat_mutex_test1(struct file *file, void *arg) +{ + int rc = 0; +#ifdef HAVE_3ARGS_INIT_WORK + struct workqueue_struct *wq; + struct work_struct work; + mutex_priv_t *mp; + + mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL); + if (mp == NULL) + return -ENOMEM; + + wq = create_singlethread_workqueue(SPLAT_MUTEX_TEST_WORKQ); + if (wq == NULL) { + rc = -ENOMEM; + goto out2; + } + + mutex_init(&(mp->mp_mtx), SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + mutex_enter(&(mp->mp_mtx)); + + mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC; + mp->mp_file = file; + INIT_WORK(&work, splat_mutex_test1_work, mp); + + /* Schedule a work item which will try and aquire the mutex via + * mutex_tryenter() while its held. This should fail and the work + * item will indicte this status in the passed private data. */ + if (!queue_work(wq, &work)) { + mutex_exit(&(mp->mp_mtx)); + rc = -EINVAL; + goto out; + } + + flush_workqueue(wq); + mutex_exit(&(mp->mp_mtx)); + + /* Work item successfully aquired mutex, very bad! */ + if (mp->mp_rc != -EBUSY) { + rc = -EINVAL; + goto out; + } + + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", + "mutex_trylock() correctly failed when mutex held\n"); + + /* Schedule a work item which will try and aquire the mutex via + * mutex_tryenter() while it is not held. This should work and + * the item will indicte this status in the passed private data. */ + if (!queue_work(wq, &work)) { + rc = -EINVAL; + goto out; + } + + flush_workqueue(wq); + + /* Work item failed to aquire mutex, very bad! */ + if (mp->mp_rc != 0) { + rc = -EINVAL; + goto out; + } + + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", + "mutex_trylock() correctly succeeded when mutex unheld\n"); +out: + mutex_destroy(&(mp->mp_mtx)); + destroy_workqueue(wq); +out2: + kfree(mp); +#endif + return rc; +} + +#ifdef HAVE_3ARGS_INIT_WORK +static void +splat_mutex_test2_work(void *priv) +{ + mutex_priv_t *mp = (mutex_priv_t *)priv; + int rc; + + ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); + + /* Read the value before sleeping and write it after we wake up to + * maximize the chance of a race if mutexs are not working properly */ + mutex_enter(&mp->mp_mtx); + rc = mp->mp_rc; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 100); /* 1/100 of a second */ + mp->mp_rc = rc + 1; + mutex_exit(&mp->mp_mtx); +} +#endif + +static int +splat_mutex_test2(struct file *file, void *arg) +{ + int rc = 0; +#ifdef HAVE_3ARGS_INIT_WORK + struct workqueue_struct *wq; + mutex_priv_t *mp; + int i; + + mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL); + if (mp == NULL) + return -ENOMEM; + + /* Create a thread per CPU items on queue will race */ + wq = create_workqueue(SPLAT_MUTEX_TEST_WORKQ); + if (wq == NULL) { + rc = -ENOMEM; + goto out; + } + + mutex_init(&(mp->mp_mtx), SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + + mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC; + mp->mp_file = file; + mp->mp_rc = 0; + + /* Schedule N work items to the work queue each of which enters the + * mutex, sleeps briefly, then exits the mutex. On a multiprocessor + * box these work items will be handled by all available CPUs. The + * mutex is instrumented such that if any two processors are in the + * critical region at the same time the system will panic. If the + * mutex is implemented right this will never happy, that's a pass. */ + for (i = 0; i < SPLAT_MUTEX_TEST_COUNT; i++) { + INIT_WORK(&(mp->mp_work[i]), splat_mutex_test2_work, mp); + + if (!queue_work(wq, &(mp->mp_work[i]))) { + splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, + "Failed to queue work id %d\n", i); + rc = -EINVAL; + } + } + + flush_workqueue(wq); + + if (mp->mp_rc == SPLAT_MUTEX_TEST_COUNT) { + splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads " + "correctly entered/exited the mutex %d times\n", + num_online_cpus(), mp->mp_rc); + } else { + splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads " + "only processed %d/%d mutex work items\n", + num_online_cpus(), mp->mp_rc, SPLAT_MUTEX_TEST_COUNT); + rc = -EINVAL; + } + + mutex_destroy(&(mp->mp_mtx)); + destroy_workqueue(wq); +out: + kfree(mp); +#endif + return rc; +} + +static int +splat_mutex_test3(struct file *file, void *arg) +{ + kmutex_t mtx; + int rc = 0; + + mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + + mutex_enter(&mtx); + + /* Mutex should be owned by current */ + if (!mutex_owned(&mtx)) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " + "be owned by pid %d but is owned by pid %d\n", + current->pid, mtx.km_owner ? mtx.km_owner->pid : -1); + rc = -EINVAL; + goto out; + } + + mutex_exit(&mtx); + + /* Mutex should not be owned by any task */ + if (mutex_owned(&mtx)) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " + "not be owned but is owned by pid %d\n", + mtx.km_owner ? mtx.km_owner->pid : -1); + rc = -EINVAL; + goto out; + } + + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s", + "Correct mutex_owned() behavior\n"); +out: + mutex_destroy(&mtx); + + return rc; +} + +static int +splat_mutex_test4(struct file *file, void *arg) +{ + kmutex_t mtx; + kthread_t *owner; + int rc = 0; + + mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + + mutex_enter(&mtx); + + /* Mutex should be owned by current */ + owner = mutex_owner(&mtx); + if (current != owner) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " + "be owned by pid %d but is owned by pid %d\n", + current->pid, owner ? owner->pid : -1); + rc = -EINVAL; + goto out; + } + + mutex_exit(&mtx); + + /* Mutex should not be owned by any task */ + owner = mutex_owner(&mtx); + if (owner) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should not " + "be owned but is owned by pid %d\n", owner->pid); + rc = -EINVAL; + goto out; + } + + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s", + "Correct mutex_owner() behavior\n"); +out: + mutex_destroy(&mtx); + + return rc; +} + +splat_subsystem_t * +splat_mutex_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_MUTEX_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_MUTEX_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_MUTEX; + + SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST1_NAME, SPLAT_MUTEX_TEST1_DESC, + SPLAT_MUTEX_TEST1_ID, splat_mutex_test1); + SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST2_NAME, SPLAT_MUTEX_TEST2_DESC, + SPLAT_MUTEX_TEST2_ID, splat_mutex_test2); + SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST3_NAME, SPLAT_MUTEX_TEST3_DESC, + SPLAT_MUTEX_TEST3_ID, splat_mutex_test3); + SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST4_NAME, SPLAT_MUTEX_TEST4_DESC, + SPLAT_MUTEX_TEST4_ID, splat_mutex_test4); + + return sub; +} + +void +splat_mutex_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST1_ID); + + kfree(sub); +} + +int +splat_mutex_id(void) { + return SPLAT_SUBSYSTEM_MUTEX; +} diff --git a/module/splat/splat-random.c b/module/splat/splat-random.c new file mode 100644 index 000000000..c96dd480c --- /dev/null +++ b/module/splat/splat-random.c @@ -0,0 +1,129 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_KRNG 0x0300 +#define SPLAT_KRNG_NAME "krng" +#define SPLAT_KRNG_DESC "Kernel Random Number Generator Tests" + +#define SPLAT_KRNG_TEST1_ID 0x0301 +#define SPLAT_KRNG_TEST1_NAME "freq" +#define SPLAT_KRNG_TEST1_DESC "Frequency Test" + +#define KRNG_NUM_BITS 1048576 +#define KRNG_NUM_BYTES (KRNG_NUM_BITS >> 3) +#define KRNG_NUM_BITS_DIV2 (KRNG_NUM_BITS >> 1) +#define KRNG_ERROR_RANGE 2097 + +/* Random Number Generator Tests + There can be meny more tests on quality of the + random number generator. For now we are only + testing the frequency of particular bits. + We could also test consecutive sequences, + randomness within a particular block, etc. + but is probably not necessary for our purposes */ + +static int +splat_krng_test1(struct file *file, void *arg) +{ + uint8_t *buf; + int i, j, diff, num = 0, rc = 0; + + buf = kmalloc(sizeof(*buf) * KRNG_NUM_BYTES, GFP_KERNEL); + if (buf == NULL) { + rc = -ENOMEM; + goto out; + } + + memset(buf, 0, sizeof(*buf) * KRNG_NUM_BYTES); + + /* Always succeeds */ + random_get_pseudo_bytes(buf, sizeof(uint8_t) * KRNG_NUM_BYTES); + + for (i = 0; i < KRNG_NUM_BYTES; i++) { + uint8_t tmp = buf[i]; + for (j = 0; j < 8; j++) { + uint8_t tmp2 = ((tmp >> j) & 0x01); + if (tmp2 == 1) { + num++; + } + } + } + + kfree(buf); + + diff = KRNG_NUM_BITS_DIV2 - num; + if (diff < 0) + diff *= -1; + + splat_print(file, "Test 1 Number of ones: %d\n", num); + splat_print(file, "Test 1 Difference from expected: %d Allowed: %d\n", + diff, KRNG_ERROR_RANGE); + + if (diff > KRNG_ERROR_RANGE) + rc = -ERANGE; +out: + return rc; +} + +splat_subsystem_t * +splat_krng_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_KRNG_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_KRNG_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_KRNG; + + SPLAT_TEST_INIT(sub, SPLAT_KRNG_TEST1_NAME, SPLAT_KRNG_TEST1_DESC, + SPLAT_KRNG_TEST1_ID, splat_krng_test1); + + return sub; +} + +void +splat_krng_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + + SPLAT_TEST_FINI(sub, SPLAT_KRNG_TEST1_ID); + + kfree(sub); +} + +int +splat_krng_id(void) { + return SPLAT_SUBSYSTEM_KRNG; +} diff --git a/module/splat/splat-rwlock.c b/module/splat/splat-rwlock.c new file mode 100644 index 000000000..b1db12ea8 --- /dev/null +++ b/module/splat/splat-rwlock.c @@ -0,0 +1,786 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_RWLOCK 0x0700 +#define SPLAT_RWLOCK_NAME "rwlock" +#define SPLAT_RWLOCK_DESC "Kernel RW Lock Tests" + +#define SPLAT_RWLOCK_TEST1_ID 0x0701 +#define SPLAT_RWLOCK_TEST1_NAME "rwtest1" +#define SPLAT_RWLOCK_TEST1_DESC "Multiple Readers One Writer" + +#define SPLAT_RWLOCK_TEST2_ID 0x0702 +#define SPLAT_RWLOCK_TEST2_NAME "rwtest2" +#define SPLAT_RWLOCK_TEST2_DESC "Multiple Writers" + +#define SPLAT_RWLOCK_TEST3_ID 0x0703 +#define SPLAT_RWLOCK_TEST3_NAME "rwtest3" +#define SPLAT_RWLOCK_TEST3_DESC "Owner Verification" + +#define SPLAT_RWLOCK_TEST4_ID 0x0704 +#define SPLAT_RWLOCK_TEST4_NAME "rwtest4" +#define SPLAT_RWLOCK_TEST4_DESC "Trylock Test" + +#define SPLAT_RWLOCK_TEST5_ID 0x0705 +#define SPLAT_RWLOCK_TEST5_NAME "rwtest5" +#define SPLAT_RWLOCK_TEST5_DESC "Write Downgrade Test" + +#define SPLAT_RWLOCK_TEST6_ID 0x0706 +#define SPLAT_RWLOCK_TEST6_NAME "rwtest6" +#define SPLAT_RWLOCK_TEST6_DESC "Read Upgrade Test" + +#define SPLAT_RWLOCK_TEST_MAGIC 0x115599DDUL +#define SPLAT_RWLOCK_TEST_NAME "rwlock_test" +#define SPLAT_RWLOCK_TEST_COUNT 8 + +#define SPLAT_RWLOCK_RELEASE_INIT 0 +#define SPLAT_RWLOCK_RELEASE_WRITERS 1 +#define SPLAT_RWLOCK_RELEASE_READERS 2 + +typedef struct rw_priv { + unsigned long rw_magic; + struct file *rw_file; + krwlock_t rwl; + spinlock_t rw_priv_lock; + wait_queue_head_t rw_waitq; + atomic_t rw_completed; + atomic_t rw_acquired; + atomic_t rw_waiters; + atomic_t rw_release; +} rw_priv_t; + +typedef struct rw_thr { + int rwt_id; + const char *rwt_name; + rw_priv_t *rwt_rwp; + int rwt_rc; +} rw_thr_t; + +static inline void +splat_rwlock_sleep(signed long delay) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); +} + +#define splat_rwlock_lock_and_test(lock,test) \ +({ \ + int ret = 0; \ + \ + spin_lock(lock); \ + ret = (test) ? 1 : 0; \ + spin_unlock(lock); \ + ret; \ +}) + +void splat_init_rw_priv(rw_priv_t *rwv, struct file *file) +{ + rwv->rw_magic = SPLAT_RWLOCK_TEST_MAGIC; + rwv->rw_file = file; + spin_lock_init(&rwv->rw_priv_lock); + init_waitqueue_head(&rwv->rw_waitq); + atomic_set(&rwv->rw_completed, 0); + atomic_set(&rwv->rw_acquired, 0); + atomic_set(&rwv->rw_waiters, 0); + atomic_set(&rwv->rw_release, SPLAT_RWLOCK_RELEASE_INIT); + + /* Initialize the read/write lock */ + rw_init(&rwv->rwl, SPLAT_RWLOCK_TEST_NAME, RW_DEFAULT, NULL); +} + +int +splat_rwlock_test1_writer_thread(void *arg) +{ + rw_thr_t *rwt = (rw_thr_t *)arg; + rw_priv_t *rwv = rwt->rwt_rwp; + uint8_t rnd = 0; + char name[16]; + + ASSERT(rwv->rw_magic == SPLAT_RWLOCK_TEST_MAGIC); + snprintf(name, sizeof(name), "%s%d", + SPLAT_RWLOCK_TEST_NAME, rwt->rwt_id); + daemonize(name); + get_random_bytes((void *)&rnd, 1); + splat_rwlock_sleep(rnd * HZ / 1000); + + spin_lock(&rwv->rw_priv_lock); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s writer thread trying to acquire rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + atomic_inc(&rwv->rw_waiters); + spin_unlock(&rwv->rw_priv_lock); + + /* Take the semaphore for writing + * release it when we are told to */ + rw_enter(&rwv->rwl, RW_WRITER); + + spin_lock(&rwv->rw_priv_lock); + atomic_dec(&rwv->rw_waiters); + atomic_inc(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s writer thread acquired rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Wait here until the control thread + * says we can release the write lock */ + wait_event_interruptible(rwv->rw_waitq, + splat_rwlock_lock_and_test(&rwv->rw_priv_lock, + atomic_read(&rwv->rw_release) == + SPLAT_RWLOCK_RELEASE_WRITERS)); + spin_lock(&rwv->rw_priv_lock); + atomic_inc(&rwv->rw_completed); + atomic_dec(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s writer thread dropped rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Release the semaphore */ + rw_exit(&rwv->rwl); + return 0; +} + +int +splat_rwlock_test1_reader_thread(void *arg) +{ + rw_thr_t *rwt = (rw_thr_t *)arg; + rw_priv_t *rwv = rwt->rwt_rwp; + uint8_t rnd = 0; + char name[16]; + + ASSERT(rwv->rw_magic == SPLAT_RWLOCK_TEST_MAGIC); + snprintf(name, sizeof(name), "%s%d", + SPLAT_RWLOCK_TEST_NAME, rwt->rwt_id); + daemonize(name); + get_random_bytes((void *)&rnd, 1); + splat_rwlock_sleep(rnd * HZ / 1000); + + /* Don't try and and take the semaphore until + * someone else has already acquired it */ + wait_event_interruptible(rwv->rw_waitq, + splat_rwlock_lock_and_test(&rwv->rw_priv_lock, + atomic_read(&rwv->rw_acquired) > 0)); + + spin_lock(&rwv->rw_priv_lock); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread trying to acquire rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + atomic_inc(&rwv->rw_waiters); + spin_unlock(&rwv->rw_priv_lock); + + /* Take the semaphore for reading + * release it when we are told to */ + rw_enter(&rwv->rwl, RW_READER); + + spin_lock(&rwv->rw_priv_lock); + atomic_dec(&rwv->rw_waiters); + atomic_inc(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread acquired rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Wait here until the control thread + * says we can release the read lock */ + wait_event_interruptible(rwv->rw_waitq, + splat_rwlock_lock_and_test(&rwv->rw_priv_lock, + atomic_read(&rwv->rw_release) == + SPLAT_RWLOCK_RELEASE_READERS)); + + spin_lock(&rwv->rw_priv_lock); + atomic_inc(&rwv->rw_completed); + atomic_dec(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread dropped rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Release the semaphore */ + rw_exit(&rwv->rwl); + return 0; +} + +static int +splat_rwlock_test1(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_RWLOCK_TEST_COUNT]; + rw_thr_t rwt[SPLAT_RWLOCK_TEST_COUNT]; + rw_priv_t rwv; + + /* Initialize private data including the rwlock */ + splat_init_rw_priv(&rwv, file); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_RWLOCK_TEST_COUNT; i++) { + rwt[i].rwt_rwp = &rwv; + rwt[i].rwt_id = i; + rwt[i].rwt_name = SPLAT_RWLOCK_TEST1_NAME; + rwt[i].rwt_rc = 0; + + /* The first thread will be a writer */ + if (i == 0) { + pids[i] = kernel_thread(splat_rwlock_test1_writer_thread, + &rwt[i], 0); + } else { + pids[i] = kernel_thread(splat_rwlock_test1_reader_thread, + &rwt[i], 0); + } + + if (pids[i] >= 0) { + count++; + } + } + + /* Once the writer has the lock, release the readers */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, atomic_read(&rwv.rw_acquired) <= 0)) { + splat_rwlock_sleep(1 * HZ); + } + wake_up_interruptible(&rwv.rw_waitq); + + /* Ensure that there is only 1 writer and all readers are waiting */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_acquired) != 1 || + atomic_read(&rwv.rw_waiters) != + SPLAT_RWLOCK_TEST_COUNT - 1)) { + + splat_rwlock_sleep(1 * HZ); + } + /* Relase the writer */ + spin_lock(&rwv.rw_priv_lock); + atomic_set(&rwv.rw_release, SPLAT_RWLOCK_RELEASE_WRITERS); + spin_unlock(&rwv.rw_priv_lock); + wake_up_interruptible(&rwv.rw_waitq); + + /* Now ensure that there are multiple reader threads holding the lock */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_acquired) <= 1)) { + splat_rwlock_sleep(1 * HZ); + } + /* Release the readers */ + spin_lock(&rwv.rw_priv_lock); + atomic_set(&rwv.rw_release, SPLAT_RWLOCK_RELEASE_READERS); + spin_unlock(&rwv.rw_priv_lock); + wake_up_interruptible(&rwv.rw_waitq); + + /* Wait for the test to complete */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_acquired) != 0 || + atomic_read(&rwv.rw_waiters) != 0)) { + splat_rwlock_sleep(1 * HZ); + + } + + rw_destroy(&rwv.rwl); + return rc; +} + +int +splat_rwlock_test2_writer_thread(void *arg) +{ + rw_thr_t *rwt = (rw_thr_t *)arg; + rw_priv_t *rwv = rwt->rwt_rwp; + uint8_t rnd = 0; + char name[16]; + + ASSERT(rwv->rw_magic == SPLAT_RWLOCK_TEST_MAGIC); + snprintf(name, sizeof(name), "%s%d", + SPLAT_RWLOCK_TEST_NAME, rwt->rwt_id); + daemonize(name); + get_random_bytes((void *)&rnd, 1); + splat_rwlock_sleep(rnd * HZ / 1000); + + /* Here just increment the waiters count even if we are not + * exactly about to call rw_enter(). Not really a big deal + * since more than likely will be true when we simulate work + * later on */ + spin_lock(&rwv->rw_priv_lock); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s writer thread trying to acquire rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + atomic_inc(&rwv->rw_waiters); + spin_unlock(&rwv->rw_priv_lock); + + /* Wait here until the control thread + * says we can acquire the write lock */ + wait_event_interruptible(rwv->rw_waitq, + splat_rwlock_lock_and_test(&rwv->rw_priv_lock, + atomic_read(&rwv->rw_release) == + SPLAT_RWLOCK_RELEASE_WRITERS)); + + /* Take the semaphore for writing */ + rw_enter(&rwv->rwl, RW_WRITER); + + spin_lock(&rwv->rw_priv_lock); + atomic_dec(&rwv->rw_waiters); + atomic_inc(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s writer thread acquired rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Give up the processor for a bit to simulate + * doing some work while taking the write lock */ + splat_rwlock_sleep(rnd * HZ / 1000); + + /* Ensure that we are the only one writing */ + if (atomic_read(&rwv->rw_acquired) > 1) { + rwt->rwt_rc = 1; + } else { + rwt->rwt_rc = 0; + } + + spin_lock(&rwv->rw_priv_lock); + atomic_inc(&rwv->rw_completed); + atomic_dec(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s writer thread dropped rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + rw_exit(&rwv->rwl); + + return 0; +} + +static int +splat_rwlock_test2(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_RWLOCK_TEST_COUNT]; + rw_thr_t rwt[SPLAT_RWLOCK_TEST_COUNT]; + rw_priv_t rwv; + + /* Initialize private data including the rwlock */ + splat_init_rw_priv(&rwv, file); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_RWLOCK_TEST_COUNT; i++) { + rwt[i].rwt_rwp = &rwv; + rwt[i].rwt_id = i; + rwt[i].rwt_name = SPLAT_RWLOCK_TEST2_NAME; + rwt[i].rwt_rc = 0; + + /* The first thread will be a writer */ + pids[i] = kernel_thread(splat_rwlock_test2_writer_thread, + &rwt[i], 0); + + if (pids[i] >= 0) { + count++; + } + } + + /* Wait for writers to get queued up */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_waiters) < SPLAT_RWLOCK_TEST_COUNT)) { + splat_rwlock_sleep(1 * HZ); + } + /* Relase the writers */ + spin_lock(&rwv.rw_priv_lock); + atomic_set(&rwv.rw_release, SPLAT_RWLOCK_RELEASE_WRITERS); + spin_unlock(&rwv.rw_priv_lock); + wake_up_interruptible(&rwv.rw_waitq); + + /* Wait for the test to complete */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_acquired) != 0 || + atomic_read(&rwv.rw_waiters) != 0)) { + splat_rwlock_sleep(1 * HZ); + } + + /* If any of the write threads ever acquired the lock + * while another thread had it, make sure we return + * an error */ + for (i = 0; i < SPLAT_RWLOCK_TEST_COUNT; i++) { + if (rwt[i].rwt_rc) { + rc++; + } + } + + rw_destroy(&rwv.rwl); + return rc; +} + +static int +splat_rwlock_test3(struct file *file, void *arg) +{ + kthread_t *owner; + rw_priv_t rwv; + int rc = 0; + + /* Initialize private data + * including the rwlock */ + splat_init_rw_priv(&rwv, file); + + /* Take the rwlock for writing */ + rw_enter(&rwv.rwl, RW_WRITER); + owner = rw_owner(&rwv.rwl); + if (current != owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "rwlock should " + "be owned by pid %d but is owned by pid %d\n", + current->pid, owner ? owner->pid : -1); + rc = -EINVAL; + goto out; + } + + /* Release the rwlock */ + rw_exit(&rwv.rwl); + owner = rw_owner(&rwv.rwl); + if (owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "rwlock should not " + "be owned but is owned by pid %d\n", owner->pid); + rc = -EINVAL; + goto out; + } + + /* Take the rwlock for reading. + * Should not have an owner */ + rw_enter(&rwv.rwl, RW_READER); + owner = rw_owner(&rwv.rwl); + if (owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "rwlock should not " + "be owned but is owned by pid %d\n", owner->pid); + /* Release the rwlock */ + rw_exit(&rwv.rwl); + rc = -EINVAL; + goto out; + } + + /* Release the rwlock */ + rw_exit(&rwv.rwl); + +out: + rw_destroy(&rwv.rwl); + return rc; +} + +int +splat_rwlock_test4_reader_thread(void *arg) +{ + rw_thr_t *rwt = (rw_thr_t *)arg; + rw_priv_t *rwv = rwt->rwt_rwp; + uint8_t rnd = 0; + char name[16]; + + ASSERT(rwv->rw_magic == SPLAT_RWLOCK_TEST_MAGIC); + snprintf(name, sizeof(name), "%s%d", + SPLAT_RWLOCK_TEST_NAME, rwt->rwt_id); + daemonize(name); + get_random_bytes((void *)&rnd, 1); + splat_rwlock_sleep(rnd * HZ / 1000); + + /* Don't try and and take the semaphore until + * someone else has already acquired it */ + wait_event_interruptible(rwv->rw_waitq, + splat_rwlock_lock_and_test(&rwv->rw_priv_lock, + atomic_read(&rwv->rw_acquired) > 0)); + + spin_lock(&rwv->rw_priv_lock); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread trying to acquire rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Take the semaphore for reading + * release it when we are told to */ + rwt->rwt_rc = rw_tryenter(&rwv->rwl, RW_READER); + + /* Here we acquired the lock this is a + * failure since the writer should be + * holding the lock */ + if (rwt->rwt_rc == 1) { + spin_lock(&rwv->rw_priv_lock); + atomic_inc(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread acquired rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + spin_lock(&rwv->rw_priv_lock); + atomic_dec(&rwv->rw_acquired); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread dropped rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + + /* Release the semaphore */ + rw_exit(&rwv->rwl); + } + /* Here we know we didn't block and didn't + * acquire the rwlock for reading */ + else { + spin_lock(&rwv->rw_priv_lock); + atomic_inc(&rwv->rw_completed); + splat_vprint(rwv->rw_file, rwt->rwt_name, + "%s reader thread could not acquire rwlock with " + "%d holding lock and %d waiting\n", + name, atomic_read(&rwv->rw_acquired), + atomic_read(&rwv->rw_waiters)); + spin_unlock(&rwv->rw_priv_lock); + } + + return 0; +} + +static int +splat_rwlock_test4(struct file *file, void *arg) +{ + int i, count = 0, rc = 0; + long pids[SPLAT_RWLOCK_TEST_COUNT]; + rw_thr_t rwt[SPLAT_RWLOCK_TEST_COUNT]; + rw_priv_t rwv; + + /* Initialize private data + * including the rwlock */ + splat_init_rw_priv(&rwv, file); + + /* Create some threads, the exact number isn't important just as + * long as we know how many we managed to create and should expect. */ + for (i = 0; i < SPLAT_RWLOCK_TEST_COUNT; i++) { + rwt[i].rwt_rwp = &rwv; + rwt[i].rwt_id = i; + rwt[i].rwt_name = SPLAT_RWLOCK_TEST4_NAME; + rwt[i].rwt_rc = 0; + + /* The first thread will be a writer */ + if (i == 0) { + /* We can reuse the test1 writer thread here */ + pids[i] = kernel_thread(splat_rwlock_test1_writer_thread, + &rwt[i], 0); + } else { + pids[i] = kernel_thread(splat_rwlock_test4_reader_thread, + &rwt[i], 0); + } + + if (pids[i] >= 0) { + count++; + } + } + + /* Once the writer has the lock, release the readers */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_acquired) <= 0)) { + splat_rwlock_sleep(1 * HZ); + } + wake_up_interruptible(&rwv.rw_waitq); + + /* Make sure that the reader threads complete */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_completed) != SPLAT_RWLOCK_TEST_COUNT - 1)) { + splat_rwlock_sleep(1 * HZ); + } + /* Release the writer */ + spin_lock(&rwv.rw_priv_lock); + atomic_set(&rwv.rw_release, SPLAT_RWLOCK_RELEASE_WRITERS); + spin_unlock(&rwv.rw_priv_lock); + wake_up_interruptible(&rwv.rw_waitq); + + /* Wait for the test to complete */ + while (splat_rwlock_lock_and_test(&rwv.rw_priv_lock, + atomic_read(&rwv.rw_acquired) != 0 || + atomic_read(&rwv.rw_waiters) != 0)) { + splat_rwlock_sleep(1 * HZ); + } + + /* If any of the reader threads ever acquired the lock + * while another thread had it, make sure we return + * an error since the rw_tryenter() should have failed */ + for (i = 0; i < SPLAT_RWLOCK_TEST_COUNT; i++) { + if (rwt[i].rwt_rc) { + rc++; + } + } + + rw_destroy(&rwv.rwl); + return rc; +} + +static int +splat_rwlock_test5(struct file *file, void *arg) +{ + kthread_t *owner; + rw_priv_t rwv; + int rc = 0; + + /* Initialize private data + * including the rwlock */ + splat_init_rw_priv(&rwv, file); + + /* Take the rwlock for writing */ + rw_enter(&rwv.rwl, RW_WRITER); + owner = rw_owner(&rwv.rwl); + if (current != owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST5_NAME, "rwlock should " + "be owned by pid %d but is owned by pid %d\n", + current->pid, owner ? owner->pid : -1); + rc = -EINVAL; + goto out; + } + + /* Make sure that the downgrade + * worked properly */ + rw_downgrade(&rwv.rwl); + + owner = rw_owner(&rwv.rwl); + if (owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST5_NAME, "rwlock should not " + "be owned but is owned by pid %d\n", owner->pid); + /* Release the rwlock */ + rw_exit(&rwv.rwl); + rc = -EINVAL; + goto out; + } + + /* Release the rwlock */ + rw_exit(&rwv.rwl); + +out: + rw_destroy(&rwv.rwl); + return rc; +} + +static int +splat_rwlock_test6(struct file *file, void *arg) +{ + kthread_t *owner; + rw_priv_t rwv; + int rc = 0; + + /* Initialize private data + * including the rwlock */ + splat_init_rw_priv(&rwv, file); + + /* Take the rwlock for reading */ + rw_enter(&rwv.rwl, RW_READER); + owner = rw_owner(&rwv.rwl); + if (owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should not " + "be owned but is owned by pid %d\n", owner->pid); + rc = -EINVAL; + goto out; + } + + /* Make sure that the upgrade + * worked properly */ + rc = !rw_tryupgrade(&rwv.rwl); + + owner = rw_owner(&rwv.rwl); + if (rc || current != owner) { + splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should " + "be owned by pid %d but is owned by pid %d " + "trylock rc %d\n", + current->pid, owner ? owner->pid : -1, rc); + rc = -EINVAL; + goto out; + } + + /* Release the rwlock */ + rw_exit(&rwv.rwl); + +out: + rw_destroy(&rwv.rwl); + return rc; +} + +splat_subsystem_t * +splat_rwlock_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_RWLOCK_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_RWLOCK_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_RWLOCK; + + SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST1_NAME, SPLAT_RWLOCK_TEST1_DESC, + SPLAT_RWLOCK_TEST1_ID, splat_rwlock_test1); + SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST2_NAME, SPLAT_RWLOCK_TEST2_DESC, + SPLAT_RWLOCK_TEST2_ID, splat_rwlock_test2); + SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST3_NAME, SPLAT_RWLOCK_TEST3_DESC, + SPLAT_RWLOCK_TEST3_ID, splat_rwlock_test3); + SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST4_NAME, SPLAT_RWLOCK_TEST4_DESC, + SPLAT_RWLOCK_TEST4_ID, splat_rwlock_test4); + SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST5_NAME, SPLAT_RWLOCK_TEST5_DESC, + SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5); + SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC, + SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6); + + return sub; +} + +void +splat_rwlock_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID); + SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID); + SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST1_ID); + kfree(sub); +} + +int +splat_rwlock_id(void) { + return SPLAT_SUBSYSTEM_RWLOCK; +} diff --git a/module/splat/splat-taskq.c b/module/splat/splat-taskq.c new file mode 100644 index 000000000..3cc09bcb9 --- /dev/null +++ b/module/splat/splat-taskq.c @@ -0,0 +1,310 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_TASKQ 0x0200 +#define SPLAT_TASKQ_NAME "taskq" +#define SPLAT_TASKQ_DESC "Kernel Task Queue Tests" + +#define SPLAT_TASKQ_TEST1_ID 0x0201 +#define SPLAT_TASKQ_TEST1_NAME "single" +#define SPLAT_TASKQ_TEST1_DESC "Single task queue, single task" + +#define SPLAT_TASKQ_TEST2_ID 0x0202 +#define SPLAT_TASKQ_TEST2_NAME "multiple" +#define SPLAT_TASKQ_TEST2_DESC "Multiple task queues, multiple tasks" + +#define SPLAT_TASKQ_TEST3_ID 0x0203 +#define SPLAT_TASKQ_TEST3_NAME "system" +#define SPLAT_TASKQ_TEST3_DESC "System task queue, multiple tasks" + +typedef struct splat_taskq_arg { + int flag; + int id; + struct file *file; + const char *name; +} splat_taskq_arg_t; + +/* Validation Test 1 - Create a taskq, queue a task, wait until + * task completes, ensure task ran properly, cleanup taskq, + */ +static void +splat_taskq_test13_func(void *arg) +{ + splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg; + + ASSERT(tq_arg); + splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST1_NAME, + "Taskq '%s' function '%s' setting flag\n", + tq_arg->name, sym2str(splat_taskq_test13_func)); + tq_arg->flag = 1; +} + +static int +splat_taskq_test1(struct file *file, void *arg) +{ + taskq_t *tq; + taskqid_t id; + splat_taskq_arg_t tq_arg; + + splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, "Taskq '%s' creating\n", + SPLAT_TASKQ_TEST1_NAME); + if ((tq = taskq_create(SPLAT_TASKQ_TEST1_NAME, 1, maxclsyspri, + 50, INT_MAX, TASKQ_PREPOPULATE)) == NULL) { + splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, + "Taskq '%s' create failed\n", + SPLAT_TASKQ_TEST1_NAME); + return -EINVAL; + } + + tq_arg.flag = 0; + tq_arg.id = 0; + tq_arg.file = file; + tq_arg.name = SPLAT_TASKQ_TEST1_NAME; + + splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, + "Taskq '%s' function '%s' dispatching\n", + tq_arg.name, sym2str(splat_taskq_test13_func)); + if ((id = taskq_dispatch(tq, splat_taskq_test13_func, + &tq_arg, TQ_SLEEP)) == 0) { + splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, + "Taskq '%s' function '%s' dispatch failed\n", + tq_arg.name, sym2str(splat_taskq_test13_func)); + taskq_destroy(tq); + return -EINVAL; + } + + splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, "Taskq '%s' waiting\n", + tq_arg.name); + taskq_wait(tq); + splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, "Taskq '%s' destroying\n", + tq_arg.name); + taskq_destroy(tq); + + return (tq_arg.flag) ? 0 : -EINVAL; +} + +/* Validation Test 2 - Create multiple taskq's, each with multiple tasks, + * wait until all tasks complete, ensure all tasks ran properly and in the + * the correct order, cleanup taskq's + */ +static void +splat_taskq_test2_func1(void *arg) +{ + splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg; + + ASSERT(tq_arg); + splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' function '%s' flag = %d = %d * 2\n", + tq_arg->name, tq_arg->id, + sym2str(splat_taskq_test2_func1), + tq_arg->flag * 2, tq_arg->flag); + tq_arg->flag *= 2; +} + +static void +splat_taskq_test2_func2(void *arg) +{ + splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg; + + ASSERT(tq_arg); + splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' function '%s' flag = %d = %d + 1\n", + tq_arg->name, tq_arg->id, + sym2str(splat_taskq_test2_func2), + tq_arg->flag + 1, tq_arg->flag); + tq_arg->flag += 1; +} + +#define TEST2_TASKQS 8 +#define TEST2_THREADS_PER_TASKQ 4 + +static int +splat_taskq_test2(struct file *file, void *arg) { + taskq_t *tq[TEST2_TASKQS] = { NULL }; + taskqid_t id; + splat_taskq_arg_t tq_args[TEST2_TASKQS]; + int i, rc = 0; + + for (i = 0; i < TEST2_TASKQS; i++) { + + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, "Taskq '%s/%d' " + "creating\n", SPLAT_TASKQ_TEST2_NAME, i); + if ((tq[i] = taskq_create(SPLAT_TASKQ_TEST2_NAME, + TEST2_THREADS_PER_TASKQ, + maxclsyspri, 50, INT_MAX, + TASKQ_PREPOPULATE)) == NULL) { + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' create failed\n", + SPLAT_TASKQ_TEST2_NAME, i); + rc = -EINVAL; + break; + } + + tq_args[i].flag = i; + tq_args[i].id = i; + tq_args[i].file = file; + tq_args[i].name = SPLAT_TASKQ_TEST2_NAME; + + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' function '%s' dispatching\n", + tq_args[i].name, tq_args[i].id, + sym2str(splat_taskq_test2_func1)); + if ((id = taskq_dispatch( + tq[i], splat_taskq_test2_func1, + &tq_args[i], TQ_SLEEP)) == 0) { + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' function '%s' dispatch " + "failed\n", tq_args[i].name, tq_args[i].id, + sym2str(splat_taskq_test2_func1)); + rc = -EINVAL; + break; + } + + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' function '%s' dispatching\n", + tq_args[i].name, tq_args[i].id, + sym2str(splat_taskq_test2_func2)); + if ((id = taskq_dispatch( + tq[i], splat_taskq_test2_func2, + &tq_args[i], TQ_SLEEP)) == 0) { + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' function '%s' dispatch failed\n", + tq_args[i].name, tq_args[i].id, + sym2str(splat_taskq_test2_func2)); + rc = -EINVAL; + break; + } + } + + /* When rc is set we're effectively just doing cleanup here, so + * ignore new errors in that case. They just cause noise. */ + for (i = 0; i < TEST2_TASKQS; i++) { + if (tq[i] != NULL) { + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' waiting\n", + tq_args[i].name, tq_args[i].id); + taskq_wait(tq[i]); + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d; destroying\n", + tq_args[i].name, tq_args[i].id); + taskq_destroy(tq[i]); + + if (!rc && tq_args[i].flag != ((i * 2) + 1)) { + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' processed tasks " + "out of order; %d != %d\n", + tq_args[i].name, tq_args[i].id, + tq_args[i].flag, i * 2 + 1); + rc = -EINVAL; + } else { + splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, + "Taskq '%s/%d' processed tasks " + "in the correct order; %d == %d\n", + tq_args[i].name, tq_args[i].id, + tq_args[i].flag, i * 2 + 1); + } + } + } + + return rc; +} + +/* Validation Test 3 - Use the global system task queue with a single + * task, * wait until task completes, ensure task ran properly. + */ +static int +splat_taskq_test3(struct file *file, void *arg) +{ + taskqid_t id; + splat_taskq_arg_t tq_arg; + + tq_arg.flag = 0; + tq_arg.id = 0; + tq_arg.file = file; + tq_arg.name = SPLAT_TASKQ_TEST3_NAME; + + splat_vprint(file, SPLAT_TASKQ_TEST3_NAME, + "Taskq '%s' function '%s' dispatching\n", + tq_arg.name, sym2str(splat_taskq_test13_func)); + if ((id = taskq_dispatch(system_taskq, splat_taskq_test13_func, + &tq_arg, TQ_SLEEP)) == 0) { + splat_vprint(file, SPLAT_TASKQ_TEST3_NAME, + "Taskq '%s' function '%s' dispatch failed\n", + tq_arg.name, sym2str(splat_taskq_test13_func)); + return -EINVAL; + } + + splat_vprint(file, SPLAT_TASKQ_TEST3_NAME, "Taskq '%s' waiting\n", + tq_arg.name); + taskq_wait(system_taskq); + + return (tq_arg.flag) ? 0 : -EINVAL; +} + +splat_subsystem_t * +splat_taskq_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_TASKQ_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_TASKQ_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_TASKQ; + + SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST1_NAME, SPLAT_TASKQ_TEST1_DESC, + SPLAT_TASKQ_TEST1_ID, splat_taskq_test1); + SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST2_NAME, SPLAT_TASKQ_TEST2_DESC, + SPLAT_TASKQ_TEST2_ID, splat_taskq_test2); + SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST3_NAME, SPLAT_TASKQ_TEST3_DESC, + SPLAT_TASKQ_TEST3_ID, splat_taskq_test3); + + return sub; +} + +void +splat_taskq_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST1_ID); + + kfree(sub); +} + +int +splat_taskq_id(void) { + return SPLAT_SUBSYSTEM_TASKQ; +} diff --git a/module/splat/splat-thread.c b/module/splat/splat-thread.c new file mode 100644 index 000000000..ca6c46ac3 --- /dev/null +++ b/module/splat/splat-thread.c @@ -0,0 +1,203 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_THREAD 0x0600 +#define SPLAT_THREAD_NAME "thread" +#define SPLAT_THREAD_DESC "Kernel Thread Tests" + +#define SPLAT_THREAD_TEST1_ID 0x0601 +#define SPLAT_THREAD_TEST1_NAME "create" +#define SPLAT_THREAD_TEST1_DESC "Validate thread creation" + +#define SPLAT_THREAD_TEST2_ID 0x0602 +#define SPLAT_THREAD_TEST2_NAME "exit" +#define SPLAT_THREAD_TEST2_DESC "Validate thread exit" + +#define SPLAT_THREAD_TEST_MAGIC 0x4488CC00UL + +typedef struct thread_priv { + unsigned long tp_magic; + struct file *tp_file; + spinlock_t tp_lock; + wait_queue_head_t tp_waitq; + int tp_rc; +} thread_priv_t; + +static int +splat_thread_rc(thread_priv_t *tp, int rc) +{ + int ret; + + spin_lock(&tp->tp_lock); + ret = (tp->tp_rc == rc); + spin_unlock(&tp->tp_lock); + + return ret; +} + +static void +splat_thread_work1(void *priv) +{ + thread_priv_t *tp = (thread_priv_t *)priv; + + spin_lock(&tp->tp_lock); + ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC); + tp->tp_rc = 1; + spin_unlock(&tp->tp_lock); + + wake_up(&tp->tp_waitq); + thread_exit(); +} + +static int +splat_thread_test1(struct file *file, void *arg) +{ + thread_priv_t tp; + kthread_t *thr; + + tp.tp_magic = SPLAT_THREAD_TEST_MAGIC; + tp.tp_file = file; + spin_lock_init(&tp.tp_lock); + init_waitqueue_head(&tp.tp_waitq); + tp.tp_rc = 0; + + thr = (kthread_t *)thread_create(NULL, 0, splat_thread_work1, &tp, 0, + &p0, TS_RUN, minclsyspri); + /* Must never fail under Solaris, but we check anyway since this + * can happen in the linux SPL, we may want to change this behavior */ + if (thr == NULL) + return -ESRCH; + + /* Sleep until the thread sets tp.tp_rc == 1 */ + wait_event(tp.tp_waitq, splat_thread_rc(&tp, 1)); + + splat_vprint(file, SPLAT_THREAD_TEST1_NAME, "%s", + "Thread successfully started properly\n"); + return 0; +} + +static void +splat_thread_work2(void *priv) +{ + thread_priv_t *tp = (thread_priv_t *)priv; + + spin_lock(&tp->tp_lock); + ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC); + tp->tp_rc = 1; + spin_unlock(&tp->tp_lock); + + wake_up(&tp->tp_waitq); + thread_exit(); + + /* The following code is unreachable when thread_exit() is + * working properly, which is exactly what we're testing */ + spin_lock(&tp->tp_lock); + tp->tp_rc = 2; + spin_unlock(&tp->tp_lock); + + wake_up(&tp->tp_waitq); +} + +static int +splat_thread_test2(struct file *file, void *arg) +{ + thread_priv_t tp; + kthread_t *thr; + int rc = 0; + + tp.tp_magic = SPLAT_THREAD_TEST_MAGIC; + tp.tp_file = file; + spin_lock_init(&tp.tp_lock); + init_waitqueue_head(&tp.tp_waitq); + tp.tp_rc = 0; + + thr = (kthread_t *)thread_create(NULL, 0, splat_thread_work2, &tp, 0, + &p0, TS_RUN, minclsyspri); + /* Must never fail under Solaris, but we check anyway since this + * can happen in the linux SPL, we may want to change this behavior */ + if (thr == NULL) + return -ESRCH; + + /* Sleep until the thread sets tp.tp_rc == 1 */ + wait_event(tp.tp_waitq, splat_thread_rc(&tp, 1)); + + /* Sleep until the thread sets tp.tp_rc == 2, or until we hit + * the timeout. If thread exit is working properly we should + * hit the timeout and never see to.tp_rc == 2. */ + rc = wait_event_timeout(tp.tp_waitq, splat_thread_rc(&tp, 2), HZ / 10); + if (rc > 0) { + rc = -EINVAL; + splat_vprint(file, SPLAT_THREAD_TEST2_NAME, "%s", + "Thread did not exit properly at thread_exit()\n"); + } else { + splat_vprint(file, SPLAT_THREAD_TEST2_NAME, "%s", + "Thread successfully exited at thread_exit()\n"); + } + + return rc; +} + +splat_subsystem_t * +splat_thread_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_THREAD_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_THREAD_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_THREAD; + + SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST1_NAME, SPLAT_THREAD_TEST1_DESC, + SPLAT_THREAD_TEST1_ID, splat_thread_test1); + SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST2_NAME, SPLAT_THREAD_TEST2_DESC, + SPLAT_THREAD_TEST2_ID, splat_thread_test2); + + return sub; +} + +void +splat_thread_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST1_ID); + + kfree(sub); +} + +int +splat_thread_id(void) { + return SPLAT_SUBSYSTEM_THREAD; +} diff --git a/module/splat/splat-time.c b/module/splat/splat-time.c new file mode 100644 index 000000000..1aa13e520 --- /dev/null +++ b/module/splat/splat-time.c @@ -0,0 +1,117 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" + +#define SPLAT_SUBSYSTEM_TIME 0x0800 +#define SPLAT_TIME_NAME "time" +#define SPLAT_TIME_DESC "Kernel Time Tests" + +#define SPLAT_TIME_TEST1_ID 0x0801 +#define SPLAT_TIME_TEST1_NAME "time1" +#define SPLAT_TIME_TEST1_DESC "HZ Test" + +#define SPLAT_TIME_TEST2_ID 0x0802 +#define SPLAT_TIME_TEST2_NAME "time2" +#define SPLAT_TIME_TEST2_DESC "Monotonic Test" + +static int +splat_time_test1(struct file *file, void *arg) +{ + int myhz = hz; + splat_vprint(file, SPLAT_TIME_TEST1_NAME, "hz is %d\n", myhz); + return 0; +} + +static int +splat_time_test2(struct file *file, void *arg) +{ + hrtime_t tm1, tm2; + int i; + + tm1 = gethrtime(); + splat_vprint(file, SPLAT_TIME_TEST2_NAME, "time is %lld\n", tm1); + + for(i = 0; i < 100; i++) { + tm2 = gethrtime(); + splat_vprint(file, SPLAT_TIME_TEST2_NAME, "time is %lld\n", tm2); + + if(tm1 > tm2) { + splat_print(file, "%s: gethrtime() is not giving " + "monotonically increasing values\n", + SPLAT_TIME_TEST2_NAME); + return 1; + } + tm1 = tm2; + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10); + } + + return 0; +} + +splat_subsystem_t * +splat_time_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_TIME_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_TIME_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_TIME; + + SPLAT_TEST_INIT(sub, SPLAT_TIME_TEST1_NAME, SPLAT_TIME_TEST1_DESC, + SPLAT_TIME_TEST1_ID, splat_time_test1); + SPLAT_TEST_INIT(sub, SPLAT_TIME_TEST2_NAME, SPLAT_TIME_TEST2_DESC, + SPLAT_TIME_TEST2_ID, splat_time_test2); + + return sub; +} + +void +splat_time_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + + SPLAT_TEST_FINI(sub, SPLAT_TIME_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_TIME_TEST1_ID); + + kfree(sub); +} + +int +splat_time_id(void) +{ + return SPLAT_SUBSYSTEM_TIME; +} diff --git a/module/splat/splat-vnode.c b/module/splat/splat-vnode.c new file mode 100644 index 000000000..413651dac --- /dev/null +++ b/module/splat/splat-vnode.c @@ -0,0 +1,532 @@ +/* + * This file is part of the SPL: Solaris Porting Layer. + * + * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory + * Written by: + * Brian Behlendorf <[email protected]>, + * Herb Wartens <[email protected]>, + * Jim Garlick <[email protected]> + * UCRL-CODE-235197 + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "splat-internal.h" +#include <linux/rcupdate.h> + +#define SPLAT_SUBSYSTEM_VNODE 0x0900 +#define SPLAT_VNODE_NAME "vnode" +#define SPLAT_VNODE_DESC "Kernel Vnode Tests" + +#define SPLAT_VNODE_TEST1_ID 0x0901 +#define SPLAT_VNODE_TEST1_NAME "vn_open" +#define SPLAT_VNODE_TEST1_DESC "Vn_open Test" + +#define SPLAT_VNODE_TEST2_ID 0x0902 +#define SPLAT_VNODE_TEST2_NAME "vn_openat" +#define SPLAT_VNODE_TEST2_DESC "Vn_openat Test" + +#define SPLAT_VNODE_TEST3_ID 0x0903 +#define SPLAT_VNODE_TEST3_NAME "vn_rdwr" +#define SPLAT_VNODE_TEST3_DESC "Vn_rdwrt Test" + +#define SPLAT_VNODE_TEST4_ID 0x0904 +#define SPLAT_VNODE_TEST4_NAME "vn_rename" +#define SPLAT_VNODE_TEST4_DESC "Vn_rename Test" + +#define SPLAT_VNODE_TEST5_ID 0x0905 +#define SPLAT_VNODE_TEST5_NAME "vn_getattr" +#define SPLAT_VNODE_TEST5_DESC "Vn_getattr Test" + +#define SPLAT_VNODE_TEST6_ID 0x0906 +#define SPLAT_VNODE_TEST6_NAME "vn_sync" +#define SPLAT_VNODE_TEST6_DESC "Vn_sync Test" + +#define SPLAT_VNODE_TEST7_ID 0x0907 +#define SPLAT_VNODE_TEST7_NAME "vn_getf" +#define SPLAT_VNODE_TEST7_DESC "vn_getf/vn_releasef Test" + +#define SPLAT_VNODE_TEST_FILE "/etc/fstab" +#define SPLAT_VNODE_TEST_FILE_AT "etc/fstab" +#define SPLAT_VNODE_TEST_FILE_RW "/tmp/spl.vnode.tmp" +#define SPLAT_VNODE_TEST_FILE_RW1 "/tmp/spl.vnode.tmp.1" +#define SPLAT_VNODE_TEST_FILE_RW2 "/tmp/spl.vnode.tmp.2" + +static int +splat_vnode_test1(struct file *file, void *arg) +{ + vnode_t *vp; + int rc; + + if ((rc = vn_open(SPLAT_VNODE_TEST_FILE, UIO_SYSSPACE, + FREAD, 0644, &vp, 0, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST1_NAME, + "Failed to vn_open test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE, rc); + return rc; + } + + rc = VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); + + if (rc) { + splat_vprint(file, SPLAT_VNODE_TEST1_NAME, + "Failed to vn_close test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE, rc); + return rc; + } + + splat_vprint(file, SPLAT_VNODE_TEST1_NAME, "Successfully vn_open'ed " + "and vn_closed test file: %s\n", SPLAT_VNODE_TEST_FILE); + + return rc; +} /* splat_vnode_test1() */ + +static int +splat_vnode_test2(struct file *file, void *arg) +{ + vnode_t *vp; + int rc; + + if ((rc = vn_openat(SPLAT_VNODE_TEST_FILE_AT, UIO_SYSSPACE, + FREAD, 0644, &vp, 0, 0, rootdir, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST2_NAME, + "Failed to vn_openat test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE, rc); + return rc; + } + + rc = VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); + + if (rc) { + splat_vprint(file, SPLAT_VNODE_TEST2_NAME, + "Failed to vn_close test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE, rc); + return rc; + } + + splat_vprint(file, SPLAT_VNODE_TEST2_NAME, "Successfully vn_openat'ed " + "and vn_closed test file: %s\n", SPLAT_VNODE_TEST_FILE); + + return rc; +} /* splat_vnode_test2() */ + +static int +splat_vnode_test3(struct file *file, void *arg) +{ + vnode_t *vp; + char buf1[32] = "SPL VNode Interface Test File\n"; + char buf2[32] = ""; + int rc; + + if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, + FWRITE | FREAD | FCREAT | FEXCL, + 0644, &vp, 0, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, + "Failed to vn_open test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + return rc; + } + + rc = vn_rdwr(UIO_WRITE, vp, buf1, strlen(buf1), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, + "Failed vn_rdwr write of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + goto out; + } + + rc = vn_rdwr(UIO_READ, vp, buf2, strlen(buf1), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, + "Failed vn_rdwr read of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + goto out; + } + + if (strncmp(buf1, buf2, strlen(buf1))) { + rc = -EINVAL; + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, + "Failed strncmp data written does not match " + "data read\nWrote: %sRead: %s\n", buf1, buf2); + goto out; + } + + rc = 0; + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Wrote: %s", buf1); + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Read: %s", buf2); + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Successfully wrote and " + "read expected data pattern to test file: %s\n", + SPLAT_VNODE_TEST_FILE_RW); + +out: + VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); + vn_remove(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, RMFILE); + + return rc; +} /* splat_vnode_test3() */ + +static int +splat_vnode_test4(struct file *file, void *arg) +{ + vnode_t *vp; + char buf1[32] = "SPL VNode Interface Test File\n"; + char buf2[32] = ""; + int rc; + + if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW1, UIO_SYSSPACE, + FWRITE | FREAD | FCREAT | FEXCL, 0644, &vp, 0, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, + "Failed to vn_open test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW1, rc); + goto out; + } + + rc = vn_rdwr(UIO_WRITE, vp, buf1, strlen(buf1), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, + "Failed vn_rdwr write of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW1, rc); + goto out2; + } + + VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); + + rc = vn_rename(SPLAT_VNODE_TEST_FILE_RW1,SPLAT_VNODE_TEST_FILE_RW2,0); + if (rc) { + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Failed vn_rename " + "%s -> %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW1, + SPLAT_VNODE_TEST_FILE_RW2, rc); + goto out; + } + + if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW2, UIO_SYSSPACE, + FREAD | FEXCL, 0644, &vp, 0, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, + "Failed to vn_open test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW2, rc); + goto out; + } + + rc = vn_rdwr(UIO_READ, vp, buf2, strlen(buf1), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, + "Failed vn_rdwr read of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW2, rc); + goto out2; + } + + if (strncmp(buf1, buf2, strlen(buf1))) { + rc = EINVAL; + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, + "Failed strncmp data written does not match " + "data read\nWrote: %sRead: %s\n", buf1, buf2); + goto out2; + } + + rc = 0; + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Wrote to %s: %s", + SPLAT_VNODE_TEST_FILE_RW1, buf1); + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Read from %s: %s", + SPLAT_VNODE_TEST_FILE_RW2, buf2); + splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Successfully renamed " + "test file %s -> %s and verified data pattern\n", + SPLAT_VNODE_TEST_FILE_RW1, SPLAT_VNODE_TEST_FILE_RW2); +out2: + VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); +out: + vn_remove(SPLAT_VNODE_TEST_FILE_RW1, UIO_SYSSPACE, RMFILE); + vn_remove(SPLAT_VNODE_TEST_FILE_RW2, UIO_SYSSPACE, RMFILE); + + return rc; +} /* splat_vnode_test4() */ + +static int +splat_vnode_test5(struct file *file, void *arg) +{ + vnode_t *vp; + vattr_t vap; + int rc; + + if ((rc = vn_open(SPLAT_VNODE_TEST_FILE, UIO_SYSSPACE, + FREAD, 0644, &vp, 0, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST5_NAME, + "Failed to vn_open test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE, rc); + return rc; + } + + rc = VOP_GETATTR(vp, &vap, 0, 0, NULL); + if (rc) { + splat_vprint(file, SPLAT_VNODE_TEST5_NAME, + "Failed to vn_getattr test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE, rc); + goto out; + } + + if (vap.va_type != VREG) { + rc = -EINVAL; + splat_vprint(file, SPLAT_VNODE_TEST5_NAME, + "Failed expected regular file type " + "(%d != VREG): %s (%d)\n", vap.va_type, + SPLAT_VNODE_TEST_FILE, rc); + goto out; + } + + splat_vprint(file, SPLAT_VNODE_TEST1_NAME, "Successfully " + "vn_getattr'ed test file: %s\n", SPLAT_VNODE_TEST_FILE); + +out: + VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); + + return rc; +} /* splat_vnode_test5() */ + +static int +splat_vnode_test6(struct file *file, void *arg) +{ + vnode_t *vp; + char buf[32] = "SPL VNode Interface Test File\n"; + int rc; + + if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, + FWRITE | FCREAT | FEXCL, 0644, &vp, 0, 0))) { + splat_vprint(file, SPLAT_VNODE_TEST6_NAME, + "Failed to vn_open test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + return rc; + } + + rc = vn_rdwr(UIO_WRITE, vp, buf, strlen(buf), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST6_NAME, + "Failed vn_rdwr write of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + goto out; + } + + rc = vn_fsync(vp, 0, 0, 0); + if (rc) { + splat_vprint(file, SPLAT_VNODE_TEST6_NAME, + "Failed vn_fsync of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + goto out; + } + + rc = 0; + splat_vprint(file, SPLAT_VNODE_TEST6_NAME, "Successfully " + "fsync'ed test file %s\n", SPLAT_VNODE_TEST_FILE_RW); +out: + VOP_CLOSE(vp, 0, 0, 0, 0, 0); + VN_RELE(vp); + vn_remove(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, RMFILE); + + return rc; +} /* splat_vnode_test6() */ + +/* Basically a slightly modified version of sys_close() */ +static int +fd_uninstall(int fd) +{ + struct file *fp; + struct files_struct *files = current->files; +#ifdef HAVE_FILES_FDTABLE + struct fdtable *fdt; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + + if (fd >= fdt->max_fds) + goto out_unlock; + + fp = fdt->fd[fd]; + if (!fp) + goto out_unlock; + + rcu_assign_pointer(fdt->fd[fd], NULL); + FD_CLR(fd, fdt->close_on_exec); +#else + spin_lock(&files->file_lock); + if (fd >= files->max_fds) + goto out_unlock; + + fp = files->fd[fd]; + if (!fp) + goto out_unlock; + + files->fd[fd] = NULL; + FD_CLR(fd, files->close_on_exec); +#endif + /* Dropping the lock here exposes a minor race but it allows me + * to use the existing kernel interfaces for this, and for a test + * case I think that's reasonable. */ + spin_unlock(&files->file_lock); + put_unused_fd(fd); + return 0; + +out_unlock: + spin_unlock(&files->file_lock); + return -EBADF; +} /* fd_uninstall() */ + +static int +splat_vnode_test7(struct file *file, void *arg) +{ + char buf1[32] = "SPL VNode Interface Test File\n"; + char buf2[32] = ""; + struct file *lfp; + file_t *fp; + int rc, fd; + + /* Prep work needed to test getf/releasef */ + fd = get_unused_fd(); + if (fd < 0) { + splat_vprint(file, SPLAT_VNODE_TEST7_NAME, + "Failed to get unused fd (%d)\n", fd); + return fd; + } + + lfp = filp_open(SPLAT_VNODE_TEST_FILE_RW, O_RDWR|O_CREAT|O_EXCL, 0644); + if (IS_ERR(lfp)) { + put_unused_fd(fd); + rc = PTR_ERR(lfp); + splat_vprint(file, SPLAT_VNODE_TEST7_NAME, + "Failed to filp_open: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + return rc; + } + + /* Pair up the new fd and lfp in the current context, this allows + * getf to lookup the file struct simply by the known open fd */ + fd_install(fd, lfp); + + /* Actual getf()/releasef() test */ + fp = vn_getf(fd); + if (fp == NULL) { + rc = -EINVAL; + splat_vprint(file, SPLAT_VNODE_TEST7_NAME, + "Failed to getf fd %d: (%d)\n", fd, rc); + goto out; + } + + rc = vn_rdwr(UIO_WRITE, fp->f_vnode, buf1, strlen(buf1), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST7_NAME, + "Failed vn_rdwr write of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + goto out; + } + + rc = vn_rdwr(UIO_READ, fp->f_vnode, buf2, strlen(buf1), 0, + UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL); + if (rc < 0) { + splat_vprint(file, SPLAT_VNODE_TEST7_NAME, + "Failed vn_rdwr read of test file: %s (%d)\n", + SPLAT_VNODE_TEST_FILE_RW, rc); + goto out; + } + + if (strncmp(buf1, buf2, strlen(buf1))) { + rc = -EINVAL; + splat_vprint(file, SPLAT_VNODE_TEST7_NAME, + "Failed strncmp data written does not match " + "data read\nWrote: %sRead: %s\n", buf1, buf2); + goto out; + } + + rc = 0; + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Wrote: %s", buf1); + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Read: %s", buf2); + splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Successfully wrote and " + "read expected data pattern to test file: %s\n", + SPLAT_VNODE_TEST_FILE_RW); +out: + vn_releasef(fd); + fd_uninstall(fd); + filp_close(lfp, 0); + vn_remove(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, RMFILE); + + return rc; +} /* splat_vnode_test7() */ + +splat_subsystem_t * +splat_vnode_init(void) +{ + splat_subsystem_t *sub; + + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; + + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_VNODE_NAME, SPLAT_NAME_SIZE); + strncpy(sub->desc.desc, SPLAT_VNODE_DESC, SPLAT_DESC_SIZE); + INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->test_list); + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_VNODE; + + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST1_NAME, SPLAT_VNODE_TEST1_DESC, + SPLAT_VNODE_TEST1_ID, splat_vnode_test1); + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST2_NAME, SPLAT_VNODE_TEST2_DESC, + SPLAT_VNODE_TEST2_ID, splat_vnode_test2); + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST3_NAME, SPLAT_VNODE_TEST3_DESC, + SPLAT_VNODE_TEST3_ID, splat_vnode_test3); + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST4_NAME, SPLAT_VNODE_TEST4_DESC, + SPLAT_VNODE_TEST4_ID, splat_vnode_test4); + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST5_NAME, SPLAT_VNODE_TEST5_DESC, + SPLAT_VNODE_TEST5_ID, splat_vnode_test5); + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST6_NAME, SPLAT_VNODE_TEST6_DESC, + SPLAT_VNODE_TEST6_ID, splat_vnode_test6); + SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST7_NAME, SPLAT_VNODE_TEST7_DESC, + SPLAT_VNODE_TEST7_ID, splat_vnode_test7); + + return sub; +} /* splat_vnode_init() */ + +void +splat_vnode_fini(splat_subsystem_t *sub) +{ + ASSERT(sub); + + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST7_ID); + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST6_ID); + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST5_ID); + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST1_ID); + + kfree(sub); +} /* splat_vnode_fini() */ + +int +splat_vnode_id(void) +{ + return SPLAT_SUBSYSTEM_VNODE; +} /* splat_vnode_id() */ |