Rename modules to module and update references

author: Brian Behlendorf <[email protected]> 2009-01-15 10:44:54 -0800
committer: Brian Behlendorf <[email protected]> 2009-01-15 10:44:54 -0800
commit: 617d5a673cd16aa91fa9668b94cc385094fae852 (patch)
tree: 37c7e043f3599d458a3aa0e763363853c298fba3 /module/spl
parent: f6a19c0d37992755ed6b1b50344047537a1efe5c (diff)
17 files changed, 7461 insertions, 0 deletions
diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in
new file mode 100644
index 000000000..f92945b00
--- /dev/null
+++ b/module/spl/Makefile.in
@@ -0,0 +1,51 @@
+# Makefile.in for spl kernel module
+
+MODULES := spl
+DISTFILES = Makefile.in \
+            spl-kmem.c spl-rwlock.c spl-taskq.c \
+            spl-thread.c spl-generic.c
+EXTRA_CFLAGS = @KERNELCPPFLAGS@
+
+# Solaris porting layer module
+obj-m := spl.o
+
+spl-objs += spl-debug.o
+spl-objs += spl-proc.o
+spl-objs += spl-kmem.o
+spl-objs += spl-thread.o
+spl-objs += spl-taskq.o
+spl-objs += spl-rwlock.o
+spl-objs += spl-vnode.o
+spl-objs += spl-err.o
+spl-objs += spl-time.o
+spl-objs += spl-kobj.o
+spl-objs += spl-module.o
+spl-objs += spl-generic.o
+spl-objs += spl-atomic.o
+spl-objs += spl-mutex.o
+spl-objs += spl-kstat.o
+spl-objs += spl-condvar.o
+
+splmodule := spl.ko
+splmoduledir := @kmoduledir@/kernel/lib/
+
+install:
+	mkdir -p $(DESTDIR)$(splmoduledir)
+	$(INSTALL) -m 644 $(splmodule) $(DESTDIR)$(splmoduledir)/$(splmodule)
+	-/sbin/depmod -a
+
+uninstall:
+	rm -f $(DESTDIR)$(splmoduledir)/$(splmodule)
+	-/sbin/depmod -a
+
+clean:
+	-rm -f $(splmodule) *.o .*.cmd *.mod.c *.ko *.s */*.o
+
+distclean: clean
+	rm -f Makefile
+	rm -rf .tmp_versions
+
+maintainer-clean: distclean
+
+distdir: $(DISTFILES)
+	cp -p $(DISTFILES) $(distdir)
diff --git a/module/spl/spl-atomic.c b/module/spl/spl-atomic.c
new file mode 100644
index 000000000..40cdb06cc
--- /dev/null
+++ b/module/spl/spl-atomic.c
@@ -0,0 +1,40 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/atomic.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_ATOMIC
+
+/* Global atomic lock declarations */
+spinlock_t atomic64_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t atomic32_lock = SPIN_LOCK_UNLOCKED;
+
+EXPORT_SYMBOL(atomic64_lock);
+EXPORT_SYMBOL(atomic32_lock);
diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c
new file mode 100644
index 000000000..163f2a1de
--- /dev/null
+++ b/module/spl/spl-condvar.c
@@ -0,0 +1,201 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/condvar.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_CONDVAR
+
+void
+__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
+{
+	int flags = KM_SLEEP;
+
+	ENTRY;
+	ASSERT(cvp);
+	ASSERT(name);
+	ASSERT(type == CV_DEFAULT);
+	ASSERT(arg == NULL);
+
+	cvp->cv_magic = CV_MAGIC;
+	init_waitqueue_head(&cvp->cv_event);
+	spin_lock_init(&cvp->cv_lock);
+	atomic_set(&cvp->cv_waiters, 0);
+	cvp->cv_mutex = NULL;
+	cvp->cv_name = NULL;
+	cvp->cv_name_size = strlen(name) + 1;
+
+        /* We may be called when there is a non-zero preempt_count or
+	 * interrupts are disabled is which case we must not sleep.
+	 */
+        if (current_thread_info()->preempt_count || irqs_disabled())
+		flags = KM_NOSLEEP;
+
+	cvp->cv_name = kmem_alloc(cvp->cv_name_size, flags);
+	if (cvp->cv_name)
+	        strcpy(cvp->cv_name, name);
+
+	EXIT;
+}
+EXPORT_SYMBOL(__cv_init);
+
+void
+__cv_destroy(kcondvar_t *cvp)
+{
+	ENTRY;
+	ASSERT(cvp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	spin_lock(&cvp->cv_lock);
+	ASSERT(atomic_read(&cvp->cv_waiters) == 0);
+	ASSERT(!waitqueue_active(&cvp->cv_event));
+
+	if (cvp->cv_name)
+		kmem_free(cvp->cv_name, cvp->cv_name_size);
+
+	spin_unlock(&cvp->cv_lock);
+	memset(cvp, CV_POISON, sizeof(*cvp));
+	EXIT;
+}
+EXPORT_SYMBOL(__cv_destroy);
+
+void
+__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
+{
+	DEFINE_WAIT(wait);
+	ENTRY;
+
+	ASSERT(cvp);
+        ASSERT(mp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	spin_lock(&cvp->cv_lock);
+	ASSERT(mutex_owned(mp));
+
+	if (cvp->cv_mutex == NULL)
+		cvp->cv_mutex = mp;
+
+	/* Ensure the same mutex is used by all callers */
+	ASSERT(cvp->cv_mutex == mp);
+	spin_unlock(&cvp->cv_lock);
+
+	prepare_to_wait_exclusive(&cvp->cv_event, &wait,
+				  TASK_UNINTERRUPTIBLE);
+	atomic_inc(&cvp->cv_waiters);
+
+	/* Mutex should be dropped after prepare_to_wait() this
+	 * ensures we're linked in to the waiters list and avoids the
+	 * race where 'cvp->cv_waiters > 0' but the list is empty. */
+	mutex_exit(mp);
+	schedule();
+	mutex_enter(mp);
+
+	atomic_dec(&cvp->cv_waiters);
+	finish_wait(&cvp->cv_event, &wait);
+	EXIT;
+}
+EXPORT_SYMBOL(__cv_wait);
+
+/* 'expire_time' argument is an absolute wall clock time in jiffies.
+ * Return value is time left (expire_time - now) or -1 if timeout occurred.
+ */
+clock_t
+__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time)
+{
+	DEFINE_WAIT(wait);
+	clock_t time_left;
+	ENTRY;
+
+	ASSERT(cvp);
+        ASSERT(mp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	spin_lock(&cvp->cv_lock);
+	ASSERT(mutex_owned(mp));
+
+	if (cvp->cv_mutex == NULL)
+		cvp->cv_mutex = mp;
+
+	/* Ensure the same mutex is used by all callers */
+	ASSERT(cvp->cv_mutex == mp);
+	spin_unlock(&cvp->cv_lock);
+
+	/* XXX - Does not handle jiffie wrap properly */
+	time_left = expire_time - jiffies;
+	if (time_left <= 0)
+		RETURN(-1);
+
+	prepare_to_wait_exclusive(&cvp->cv_event, &wait,
+				  TASK_UNINTERRUPTIBLE);
+	atomic_inc(&cvp->cv_waiters);
+
+	/* Mutex should be dropped after prepare_to_wait() this
+	 * ensures we're linked in to the waiters list and avoids the
+	 * race where 'cvp->cv_waiters > 0' but the list is empty. */
+	mutex_exit(mp);
+	time_left = schedule_timeout(time_left);
+	mutex_enter(mp);
+
+	atomic_dec(&cvp->cv_waiters);
+	finish_wait(&cvp->cv_event, &wait);
+
+	RETURN(time_left > 0 ? time_left : -1);
+}
+EXPORT_SYMBOL(__cv_timedwait);
+
+void
+__cv_signal(kcondvar_t *cvp)
+{
+	ENTRY;
+	ASSERT(cvp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+
+	/* All waiters are added with WQ_FLAG_EXCLUSIVE so only one
+	 * waiter will be set runable with each call to wake_up().
+	 * Additionally wake_up() holds a spin_lock assoicated with
+	 * the wait queue to ensure we don't race waking up processes. */
+	if (atomic_read(&cvp->cv_waiters) > 0)
+		wake_up(&cvp->cv_event);
+
+	EXIT;
+}
+EXPORT_SYMBOL(__cv_signal);
+
+void
+__cv_broadcast(kcondvar_t *cvp)
+{
+	ASSERT(cvp);
+	ASSERT(cvp->cv_magic == CV_MAGIC);
+	ENTRY;
+
+	/* Wake_up_all() will wake up all waiters even those which
+	 * have the WQ_FLAG_EXCLUSIVE flag set. */
+	if (atomic_read(&cvp->cv_waiters) > 0)
+		wake_up_all(&cvp->cv_event);
+
+	EXIT;
+}
+EXPORT_SYMBOL(__cv_broadcast);
diff --git a/module/spl/spl-debug.c b/module/spl/spl-debug.c
new file mode 100644
index 000000000..df22cbc48
--- /dev/null
+++ b/module/spl/spl-debug.c
@@ -0,0 +1,1276 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  This file was originally part of Lustre, http://www.lustre.org.
+ *  but has subsequently been adapted for use in the SPL in
+ *  accordance with the GPL.
+ *
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Zach Brown <[email protected]>
+ *          Phil Schwan <[email protected]>
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <linux/kmod.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/kthread.h>
+#include <linux/hardirq.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <sys/sysmacros.h>
+#include <sys/proc.h>
+#include <sys/debug.h>
+#include <spl-ctl.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_DEBUG
+
+unsigned long spl_debug_subsys = ~0;
+EXPORT_SYMBOL(spl_debug_subsys);
+module_param(spl_debug_subsys, long, 0644);
+MODULE_PARM_DESC(spl_debug_subsys, "Subsystem debugging level mask.");
+
+unsigned long spl_debug_mask = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE);
+EXPORT_SYMBOL(spl_debug_mask);
+module_param(spl_debug_mask, long, 0644);
+MODULE_PARM_DESC(spl_debug_mask, "Debugging level mask.");
+
+unsigned long spl_debug_printk = D_CANTMASK;
+EXPORT_SYMBOL(spl_debug_printk);
+module_param(spl_debug_printk, long, 0644);
+MODULE_PARM_DESC(spl_debug_printk, "Console printk level mask.");
+
+int spl_debug_mb = -1;
+EXPORT_SYMBOL(spl_debug_mb);
+module_param(spl_debug_mb, int, 0644);
+MODULE_PARM_DESC(spl_debug_mb, "Total debug buffer size.");
+
+unsigned int spl_debug_binary = 1;
+EXPORT_SYMBOL(spl_debug_binary);
+
+unsigned int spl_debug_catastrophe;
+EXPORT_SYMBOL(spl_debug_catastrophe);
+
+unsigned int spl_debug_panic_on_bug = 1;
+EXPORT_SYMBOL(spl_debug_panic_on_bug);
+module_param(spl_debug_panic_on_bug, int, 0644);
+MODULE_PARM_DESC(spl_debug_panic_on_bug, "Panic on BUG");
+
+static char spl_debug_file_name[PATH_MAX];
+char spl_debug_file_path[PATH_MAX] = "/var/dumps/spl-log";
+
+unsigned int spl_console_ratelimit = 1;
+EXPORT_SYMBOL(spl_console_ratelimit);
+
+long spl_console_max_delay;
+EXPORT_SYMBOL(spl_console_max_delay);
+
+long spl_console_min_delay;
+EXPORT_SYMBOL(spl_console_min_delay);
+
+unsigned int spl_console_backoff = SPL_DEFAULT_BACKOFF;
+EXPORT_SYMBOL(spl_console_backoff);
+
+unsigned int spl_debug_stack;
+EXPORT_SYMBOL(spl_debug_stack);
+
+static int spl_panic_in_progress;
+
+union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS] __cacheline_aligned;
+char *trace_console_buffers[NR_CPUS][3];
+struct rw_semaphore trace_sem;
+atomic_t trace_tage_allocated = ATOMIC_INIT(0);
+
+static int spl_debug_dump_all_pages(dumplog_priv_t *dp, char *);
+static void trace_fini(void);
+
+
+/* Memory percentage breakdown by type */
+static unsigned int pages_factor[TCD_TYPE_MAX] = {
+       80,  /* 80% pages for TCD_TYPE_PROC */
+       10,  /* 10% pages for TCD_TYPE_SOFTIRQ */
+       10   /* 10% pages for TCD_TYPE_IRQ */
+};
+
+const char *
+spl_debug_subsys2str(int subsys)
+{
+        switch (subsys) {
+        default:
+                return NULL;
+        case S_UNDEFINED:
+                return "undefined";
+        case S_ATOMIC:
+                return "atomic";
+        case S_KOBJ:
+                return "kobj";
+        case S_VNODE:
+                return "vnode";
+        case S_TIME:
+                return "time";
+        case S_RWLOCK:
+                return "rwlock";
+        case S_THREAD:
+                return "thread";
+        case S_CONDVAR:
+                return "condvar";
+        case S_MUTEX:
+                return "mutex";
+        case S_RNG:
+                return "rng";
+        case S_TASKQ:
+                return "taskq";
+        case S_KMEM:
+                return "kmem";
+        case S_DEBUG:
+                return "debug";
+        case S_GENERIC:
+                return "generic";
+        case S_PROC:
+                return "proc";
+        case S_MODULE:
+                return "module";
+        }
+}
+
+const char *
+spl_debug_dbg2str(int debug)
+{
+        switch (debug) {
+        default:
+                return NULL;
+        case D_TRACE:
+                return "trace";
+        case D_INFO:
+                return "info";
+        case D_WARNING:
+                return "warning";
+        case D_ERROR:
+                return "error";
+        case D_EMERG:
+                return "emerg";
+        case D_CONSOLE:
+                return "console";
+        case D_IOCTL:
+                return "ioctl";
+        case D_DPRINTF:
+                return "dprintf";
+        case D_OTHER:
+                return "other";
+        }
+}
+
+int
+spl_debug_mask2str(char *str, int size, unsigned long mask, int is_subsys)
+{
+        const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str :
+                                                 spl_debug_dbg2str;
+        const char *token;
+        int i, bit, len = 0;
+
+        if (mask == 0) {                        /* "0" */
+                if (size > 0)
+                        str[0] = '0';
+                len = 1;
+        } else {                                /* space-separated tokens */
+                for (i = 0; i < 32; i++) {
+                        bit = 1 << i;
+
+                        if ((mask & bit) == 0)
+                                continue;
+
+                        token = fn(bit);
+                        if (token == NULL)              /* unused bit */
+                                continue;
+
+                        if (len > 0) {                  /* separator? */
+                                if (len < size)
+                                        str[len] = ' ';
+                                len++;
+                        }
+
+                        while (*token != 0) {
+                                if (len < size)
+                                        str[len] = *token;
+                                token++;
+                                len++;
+                        }
+                }
+        }
+
+        /* terminate 'str' */
+        if (len < size)
+                str[len] = 0;
+        else
+                str[size - 1] = 0;
+
+        return len;
+}
+
+static int
+spl_debug_token2mask(int *mask, const char *str, int len, int is_subsys)
+{
+        const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str :
+                                                 spl_debug_dbg2str;
+        const char   *token;
+        int i, j, bit;
+
+        /* match against known tokens */
+        for (i = 0; i < 32; i++) {
+                bit = 1 << i;
+
+                token = fn(bit);
+                if (token == NULL)              /* unused? */
+                        continue;
+
+                /* strcasecmp */
+                for (j = 0; ; j++) {
+                        if (j == len) {         /* end of token */
+                                if (token[j] == 0) {
+                                        *mask = bit;
+                                        return 0;
+                                }
+                                break;
+                        }
+
+                        if (token[j] == 0)
+                                break;
+
+                        if (str[j] == token[j])
+                                continue;
+
+                        if (str[j] < 'A' || 'Z' < str[j])
+                                break;
+
+                        if (str[j] - 'A' + 'a' != token[j])
+                                break;
+                }
+        }
+
+        return -EINVAL;                         /* no match */
+}
+
+int
+spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys)
+{
+        char op = 0;
+        int m = 0, matched, n, t;
+
+        /* Allow a number for backwards compatibility */
+        for (n = strlen(str); n > 0; n--)
+                if (!isspace(str[n-1]))
+                        break;
+        matched = n;
+
+        if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && matched == n) {
+                *mask = m;
+                return 0;
+        }
+
+        /* <str> must be a list of debug tokens or numbers separated by
+         * whitespace and optionally an operator ('+' or '-').  If an operator
+         * appears first in <str>, '*mask' is used as the starting point
+         * (relative), otherwise 0 is used (absolute).  An operator applies to
+         * all following tokens up to the next operator. */
+        matched = 0;
+        while (*str != 0) {
+                while (isspace(*str)) /* skip whitespace */
+                        str++;
+
+                if (*str == 0)
+                        break;
+
+                if (*str == '+' || *str == '-') {
+                        op = *str++;
+
+                        /* op on first token == relative */
+                        if (!matched)
+                                m = *mask;
+
+                        while (isspace(*str)) /* skip whitespace */
+                                str++;
+
+                        if (*str == 0)          /* trailing op */
+                                return -EINVAL;
+                }
+
+                /* find token length */
+                for (n = 0; str[n] != 0 && !isspace(str[n]); n++);
+
+                /* match token */
+                if (spl_debug_token2mask(&t, str, n, is_subsys) != 0)
+                        return -EINVAL;
+
+                matched = 1;
+                if (op == '-')
+                        m &= ~t;
+                else
+                        m |= t;
+
+                str += n;
+        }
+
+        if (!matched)
+                return -EINVAL;
+
+        *mask = m;
+        return 0;
+}
+
+static void
+spl_debug_dumplog_internal(dumplog_priv_t *dp)
+{
+        void *journal_info;
+
+        journal_info = current->journal_info;
+        current->journal_info = NULL;
+
+        snprintf(spl_debug_file_name, sizeof(spl_debug_file_path) - 1,
+                 "%s.%ld.%ld", spl_debug_file_path,
+		 get_seconds(), (long)dp->dp_pid);
+        printk(KERN_ALERT "SPL: dumping log to %s\n", spl_debug_file_name);
+        spl_debug_dump_all_pages(dp, spl_debug_file_name);
+
+        current->journal_info = journal_info;
+}
+
+static int
+spl_debug_dumplog_thread(void *arg)
+{
+	dumplog_priv_t *dp = (dumplog_priv_t *)arg;
+
+        spl_debug_dumplog_internal(dp);
+	atomic_set(&dp->dp_done, 1);
+        wake_up(&dp->dp_waitq);
+	complete_and_exit(NULL, 0);
+
+        return 0; /* Unreachable */
+}
+
+/* When flag is set do not use a new thread for the debug dump */
+int
+spl_debug_dumplog(int flags)
+{
+	struct task_struct *tsk;
+	dumplog_priv_t dp;
+
+        init_waitqueue_head(&dp.dp_waitq);
+        dp.dp_pid = current->pid;
+        dp.dp_flags = flags;
+        atomic_set(&dp.dp_done, 0);
+
+        if (dp.dp_flags & DL_NOTHREAD) {
+                spl_debug_dumplog_internal(&dp);
+        } else {
+
+                tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
+                if (tsk == NULL)
+                        return -ENOMEM;
+
+                wake_up_process(tsk);
+                wait_event(dp.dp_waitq, atomic_read(&dp.dp_done));
+        }
+
+	return 0;
+}
+EXPORT_SYMBOL(spl_debug_dumplog);
+
+static char *
+trace_get_console_buffer(void)
+{
+        int  cpu = get_cpu();
+        int  idx;
+
+        if (in_irq()) {
+                idx = 0;
+        } else if (in_softirq()) {
+                idx = 1;
+        } else {
+                idx = 2;
+        }
+
+        return trace_console_buffers[cpu][idx];
+}
+
+static void
+trace_put_console_buffer(char *buffer)
+{
+        put_cpu();
+}
+
+static int
+trace_lock_tcd(struct trace_cpu_data *tcd)
+{
+        __ASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+
+        spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
+
+        return 1;
+}
+
+static void
+trace_unlock_tcd(struct trace_cpu_data *tcd)
+{
+        __ASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+
+        spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
+}
+
+static struct trace_cpu_data *
+trace_get_tcd(void)
+{
+        int cpu;
+        struct trace_cpu_data *tcd;
+
+        cpu = get_cpu();
+        if (in_irq())
+                tcd = &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd;
+        else if (in_softirq())
+                tcd = &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd;
+        else
+                tcd = &(*trace_data[TCD_TYPE_PROC])[cpu].tcd;
+
+        trace_lock_tcd(tcd);
+
+        return tcd;
+}
+
+static void
+trace_put_tcd (struct trace_cpu_data *tcd)
+{
+        trace_unlock_tcd(tcd);
+
+        put_cpu();
+}
+
+static void
+trace_set_debug_header(struct spl_debug_header *header, int subsys,
+                       int mask, const int line, unsigned long stack)
+{
+        struct timeval tv;
+
+        do_gettimeofday(&tv);
+
+        header->ph_subsys = subsys;
+        header->ph_mask = mask;
+        header->ph_cpu_id = smp_processor_id();
+        header->ph_sec = (__u32)tv.tv_sec;
+        header->ph_usec = tv.tv_usec;
+        header->ph_stack = stack;
+        header->ph_pid = current->pid;
+        header->ph_line_num = line;
+
+        return;
+}
+
+static void
+trace_print_to_console(struct spl_debug_header *hdr, int mask, const char *buf,
+                       int len, const char *file, const char *fn)
+{
+        char *prefix = "SPL", *ptype = NULL;
+
+        if ((mask & D_EMERG) != 0) {
+                prefix = "SPLError";
+                ptype = KERN_EMERG;
+        } else if ((mask & D_ERROR) != 0) {
+                prefix = "SPLError";
+                ptype = KERN_ERR;
+        } else if ((mask & D_WARNING) != 0) {
+                prefix = "SPL";
+                ptype = KERN_WARNING;
+        } else if ((mask & (D_CONSOLE | spl_debug_printk)) != 0) {
+                prefix = "SPL";
+                ptype = KERN_INFO;
+        }
+
+        if ((mask & D_CONSOLE) != 0) {
+                printk("%s%s: %.*s", ptype, prefix, len, buf);
+        } else {
+                printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
+                       hdr->ph_pid, hdr->ph_stack, file,
+                       hdr->ph_line_num, fn, len, buf);
+        }
+
+        return;
+}
+
+static int
+trace_max_debug_mb(void)
+{
+        return MAX(512, ((num_physpages >> (20 - PAGE_SHIFT)) * 80) / 100);
+}
+
+static struct trace_page *
+tage_alloc(int gfp)
+{
+        struct page *page;
+        struct trace_page *tage;
+
+        page = alloc_pages(gfp | __GFP_NOWARN, 0);
+        if (page == NULL)
+                return NULL;
+
+        tage = kmalloc(sizeof(*tage), gfp);
+        if (tage == NULL) {
+                __free_pages(page, 0);
+                return NULL;
+        }
+
+        tage->page = page;
+        atomic_inc(&trace_tage_allocated);
+
+        return tage;
+}
+
+static void
+tage_free(struct trace_page *tage)
+{
+        __ASSERT(tage != NULL);
+        __ASSERT(tage->page != NULL);
+
+        __free_pages(tage->page, 0);
+        kfree(tage);
+        atomic_dec(&trace_tage_allocated);
+}
+
+static struct trace_page *
+tage_from_list(struct list_head *list)
+{
+        return list_entry(list, struct trace_page, linkage);
+}
+
+static void
+tage_to_tail(struct trace_page *tage, struct list_head *queue)
+{
+        __ASSERT(tage != NULL);
+        __ASSERT(queue != NULL);
+
+        list_move_tail(&tage->linkage, queue);
+}
+
+/* try to return a page that has 'len' bytes left at the end */
+static struct trace_page *
+trace_get_tage_try(struct trace_cpu_data *tcd, unsigned long len)
+{
+        struct trace_page *tage;
+
+        if (tcd->tcd_cur_pages > 0) {
+                __ASSERT(!list_empty(&tcd->tcd_pages));
+                tage = tage_from_list(tcd->tcd_pages.prev);
+                if (tage->used + len <= PAGE_SIZE)
+                        return tage;
+        }
+
+        if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
+                if (tcd->tcd_cur_stock_pages > 0) {
+                        tage = tage_from_list(tcd->tcd_stock_pages.prev);
+                        tcd->tcd_cur_stock_pages--;
+                        list_del_init(&tage->linkage);
+                } else {
+                        tage = tage_alloc(GFP_ATOMIC);
+                        if (tage == NULL) {
+                                printk(KERN_WARNING
+                                       "failure to allocate a tage (%ld)\n",
+                                       tcd->tcd_cur_pages);
+                                return NULL;
+                        }
+                }
+
+                tage->used = 0;
+                tage->cpu = smp_processor_id();
+                tage->type = tcd->tcd_type;
+                list_add_tail(&tage->linkage, &tcd->tcd_pages);
+                tcd->tcd_cur_pages++;
+
+                return tage;
+        }
+
+        return NULL;
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct trace_page *
+trace_get_tage(struct trace_cpu_data *tcd, unsigned long len)
+{
+        struct trace_page *tage;
+
+        __ASSERT(len <= PAGE_SIZE);
+
+        tage = trace_get_tage_try(tcd, len);
+        if (tage)
+                return tage;
+
+        if (tcd->tcd_cur_pages > 0) {
+                tage = tage_from_list(tcd->tcd_pages.next);
+                tage->used = 0;
+                tage_to_tail(tage, &tcd->tcd_pages);
+        }
+
+        return tage;
+}
+
+int
+spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask,
+               const char *file, const char *fn, const int line,
+               const char *format1, va_list args, const char *format2, ...)
+{
+        struct trace_cpu_data   *tcd = NULL;
+        struct spl_debug_header header = { 0, };
+        struct trace_page       *tage;
+        /* string_buf is used only if tcd != NULL, and is always set then */
+        char                    *string_buf = NULL;
+        char                    *debug_buf;
+        int                      known_size;
+        int                      needed = 85; /* average message length */
+        int                      max_nob;
+        va_list                  ap;
+        int                      i;
+        int                      remain;
+
+        if (strchr(file, '/'))
+                file = strrchr(file, '/') + 1;
+
+        trace_set_debug_header(&header, subsys, mask, line, CDEBUG_STACK());
+
+        tcd = trace_get_tcd();
+        if (tcd == NULL)
+                goto console;
+
+        if (tcd->tcd_shutting_down) {
+                trace_put_tcd(tcd);
+                tcd = NULL;
+                goto console;
+        }
+
+        known_size = strlen(file) + 1;
+        if (fn)
+                known_size += strlen(fn) + 1;
+
+        if (spl_debug_binary)
+                known_size += sizeof(header);
+
+        /* '2' used because vsnprintf returns real size required for output
+         * _without_ terminating NULL. */
+        for (i = 0; i < 2; i++) {
+                tage = trace_get_tage(tcd, needed + known_size + 1);
+                if (tage == NULL) {
+                        if (needed + known_size > PAGE_SIZE)
+                                mask |= D_ERROR;
+
+                        trace_put_tcd(tcd);
+                        tcd = NULL;
+                        goto console;
+                }
+
+                string_buf = (char *)page_address(tage->page) +
+                             tage->used + known_size;
+
+                max_nob = PAGE_SIZE - tage->used - known_size;
+                if (max_nob <= 0) {
+                        printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
+                        mask |= D_ERROR;
+                        trace_put_tcd(tcd);
+                        tcd = NULL;
+                        goto console;
+                }
+
+                needed = 0;
+                if (format1) {
+                        va_copy(ap, args);
+                        needed = vsnprintf(string_buf, max_nob, format1, ap);
+                        va_end(ap);
+                }
+
+                if (format2) {
+                        remain = max_nob - needed;
+                        if (remain < 0)
+                                remain = 0;
+
+                        va_start(ap, format2);
+                        needed += vsnprintf(string_buf+needed, remain, format2, ap);
+                        va_end(ap);
+                }
+
+                if (needed < max_nob)
+                        break;
+        }
+
+        if (unlikely(*(string_buf + needed - 1) != '\n'))
+                printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
+                       file, line, fn);
+
+        header.ph_len = known_size + needed;
+        debug_buf = (char *)page_address(tage->page) + tage->used;
+
+        if (spl_debug_binary) {
+                memcpy(debug_buf, &header, sizeof(header));
+                tage->used += sizeof(header);
+                debug_buf += sizeof(header);
+        }
+
+        strcpy(debug_buf, file);
+        tage->used += strlen(file) + 1;
+        debug_buf += strlen(file) + 1;
+
+        if (fn) {
+                strcpy(debug_buf, fn);
+                tage->used += strlen(fn) + 1;
+                debug_buf += strlen(fn) + 1;
+        }
+
+        __ASSERT(debug_buf == string_buf);
+
+        tage->used += needed;
+        __ASSERT (tage->used <= PAGE_SIZE);
+
+console:
+        if ((mask & spl_debug_printk) == 0) {
+                /* no console output requested */
+                if (tcd != NULL)
+                        trace_put_tcd(tcd);
+                return 1;
+        }
+
+        if (cdls != NULL) {
+                if (spl_console_ratelimit && cdls->cdls_next != 0 &&
+                    !time_before(cdls->cdls_next, jiffies)) {
+                        /* skipping a console message */
+                        cdls->cdls_count++;
+                        if (tcd != NULL)
+                                trace_put_tcd(tcd);
+                        return 1;
+                }
+
+                if (time_before(cdls->cdls_next + spl_console_max_delay +
+                                (10 * HZ), jiffies)) {
+                        /* last timeout was a long time ago */
+                        cdls->cdls_delay /= spl_console_backoff * 4;
+                } else {
+                        cdls->cdls_delay *= spl_console_backoff;
+
+                        if (cdls->cdls_delay < spl_console_min_delay)
+                                cdls->cdls_delay = spl_console_min_delay;
+                        else if (cdls->cdls_delay > spl_console_max_delay)
+                                cdls->cdls_delay = spl_console_max_delay;
+                }
+
+                /* ensure cdls_next is never zero after it's been seen */
+                cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
+        }
+
+        if (tcd != NULL) {
+                trace_print_to_console(&header, mask, string_buf, needed, file, fn);
+                trace_put_tcd(tcd);
+        } else {
+                string_buf = trace_get_console_buffer();
+
+                needed = 0;
+                if (format1 != NULL) {
+                        va_copy(ap, args);
+                        needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap);
+                        va_end(ap);
+                }
+                if (format2 != NULL) {
+                        remain = TRACE_CONSOLE_BUFFER_SIZE - needed;
+                        if (remain > 0) {
+                                va_start(ap, format2);
+                                needed += vsnprintf(string_buf+needed, remain, format2, ap);
+                                va_end(ap);
+                        }
+                }
+                trace_print_to_console(&header, mask,
+                                 string_buf, needed, file, fn);
+
+                trace_put_console_buffer(string_buf);
+        }
+
+        if (cdls != NULL && cdls->cdls_count != 0) {
+                string_buf = trace_get_console_buffer();
+
+                needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE,
+                         "Skipped %d previous similar message%s\n",
+                         cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
+
+                trace_print_to_console(&header, mask,
+                                 string_buf, needed, file, fn);
+
+                trace_put_console_buffer(string_buf);
+                cdls->cdls_count = 0;
+        }
+
+        return 0;
+}
+EXPORT_SYMBOL(spl_debug_vmsg);
+
+/* Do the collect_pages job on a single CPU: assumes that all other
+ * CPUs have been stopped during a panic.  If this isn't true for
+ * some arch, this will have to be implemented separately in each arch.
+ */
+static void
+collect_pages_from_single_cpu(struct page_collection *pc)
+{
+        struct trace_cpu_data *tcd;
+        int i, j;
+
+        tcd_for_each(tcd, i, j) {
+                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+                tcd->tcd_cur_pages = 0;
+        }
+}
+
+static void
+collect_pages_on_all_cpus(struct page_collection *pc)
+{
+        struct trace_cpu_data *tcd;
+        int i, cpu;
+
+        spin_lock(&pc->pc_lock);
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu) {
+                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+                        tcd->tcd_cur_pages = 0;
+                }
+        }
+        spin_unlock(&pc->pc_lock);
+}
+
+static void
+collect_pages(dumplog_priv_t *dp, struct page_collection *pc)
+{
+        INIT_LIST_HEAD(&pc->pc_pages);
+
+        if (spl_panic_in_progress || dp->dp_flags & DL_SINGLE_CPU)
+                collect_pages_from_single_cpu(pc);
+        else
+                collect_pages_on_all_cpus(pc);
+}
+
+static void
+put_pages_back_on_all_cpus(struct page_collection *pc)
+{
+        struct trace_cpu_data *tcd;
+        struct list_head *cur_head;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        int i, cpu;
+
+        spin_lock(&pc->pc_lock);
+
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu) {
+                        cur_head = tcd->tcd_pages.next;
+
+                        list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
+                                                 linkage) {
+
+                                __ASSERT_TAGE_INVARIANT(tage);
+
+                                if (tage->cpu != cpu || tage->type != i)
+                                        continue;
+
+                                tage_to_tail(tage, cur_head);
+                                tcd->tcd_cur_pages++;
+                        }
+                }
+        }
+
+        spin_unlock(&pc->pc_lock);
+}
+
+static void
+put_pages_back(struct page_collection *pc)
+{
+        if (!spl_panic_in_progress)
+                put_pages_back_on_all_cpus(pc);
+}
+
+static struct file *
+trace_filp_open (const char *name, int flags, int mode, int *err)
+{
+        struct file *filp = NULL;
+        int rc;
+
+        filp = filp_open(name, flags, mode);
+        if (IS_ERR(filp)) {
+                rc = PTR_ERR(filp);
+                printk(KERN_ERR "SPL: Can't open %s file: %d\n", name, rc);
+                if (err)
+                        *err = rc;
+                filp = NULL;
+        }
+        return filp;
+}
+
+#define trace_filp_write(fp, b, s, p)  (fp)->f_op->write((fp), (b), (s), p)
+#define trace_filp_fsync(fp)           (fp)->f_op->fsync((fp),(fp)->f_dentry,1)
+#define trace_filp_close(f)            filp_close(f, NULL)
+#define trace_filp_poff(f)             (&(f)->f_pos)
+
+static int
+spl_debug_dump_all_pages(dumplog_priv_t *dp, char *filename)
+{
+        struct page_collection pc;
+        struct file *filp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        mm_segment_t oldfs;
+        int rc = 0;
+
+        down_write(&trace_sem);
+
+        filp = trace_filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE,
+                               0600, &rc);
+        if (filp == NULL) {
+                if (rc != -EEXIST)
+                        printk(KERN_ERR "SPL: Can't open %s for dump: %d\n",
+                               filename, rc);
+                goto out;
+        }
+
+        spin_lock_init(&pc.pc_lock);
+        collect_pages(dp, &pc);
+        if (list_empty(&pc.pc_pages)) {
+                rc = 0;
+                goto close;
+        }
+
+        oldfs = get_fs();
+        set_fs(get_ds());
+
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                __ASSERT_TAGE_INVARIANT(tage);
+
+                rc = trace_filp_write(filp, page_address(tage->page),
+                                      tage->used, trace_filp_poff(filp));
+                if (rc != (int)tage->used) {
+                        printk(KERN_WARNING "SPL: Wanted to write %u "
+                               "but wrote %d\n", tage->used, rc);
+                        put_pages_back(&pc);
+                        __ASSERT(list_empty(&pc.pc_pages));
+                        break;
+                }
+                list_del(&tage->linkage);
+                tage_free(tage);
+        }
+
+        set_fs(oldfs);
+
+        rc = trace_filp_fsync(filp);
+        if (rc)
+                printk(KERN_ERR "SPL: Unable to sync: %d\n", rc);
+ close:
+        trace_filp_close(filp);
+ out:
+        up_write(&trace_sem);
+
+        return rc;
+}
+
+static void
+spl_debug_flush_pages(void)
+{
+        dumplog_priv_t dp;
+        struct page_collection pc;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+
+        spin_lock_init(&pc.pc_lock);
+        init_waitqueue_head(&dp.dp_waitq);
+        dp.dp_pid = current->pid;
+        dp.dp_flags = 0;
+        atomic_set(&dp.dp_done, 0);
+
+        collect_pages(&dp, &pc);
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                __ASSERT_TAGE_INVARIANT(tage);
+                list_del(&tage->linkage);
+                tage_free(tage);
+        }
+}
+
+unsigned long
+spl_debug_set_mask(unsigned long mask) {
+	spl_debug_mask = mask;
+        return 0;
+}
+EXPORT_SYMBOL(spl_debug_set_mask);
+
+unsigned long
+spl_debug_get_mask(void) {
+        return spl_debug_mask;
+}
+EXPORT_SYMBOL(spl_debug_get_mask);
+
+unsigned long
+spl_debug_set_subsys(unsigned long subsys) {
+	spl_debug_subsys = subsys;
+        return 0;
+}
+EXPORT_SYMBOL(spl_debug_set_subsys);
+
+unsigned long
+spl_debug_get_subsys(void) {
+        return spl_debug_subsys;
+}
+EXPORT_SYMBOL(spl_debug_get_subsys);
+
+int
+spl_debug_set_mb(int mb)
+{
+        int i, j, pages;
+        int limit = trace_max_debug_mb();
+        struct trace_cpu_data *tcd;
+
+        if (mb < num_possible_cpus()) {
+                printk(KERN_ERR "SPL: Refusing to set debug buffer size to "
+                       "%dMB - lower limit is %d\n", mb, num_possible_cpus());
+                return -EINVAL;
+        }
+
+        if (mb > limit) {
+                printk(KERN_ERR "SPL: Refusing to set debug buffer size to "
+                       "%dMB - upper limit is %d\n", mb, limit);
+                return -EINVAL;
+        }
+
+        mb /= num_possible_cpus();
+        pages = mb << (20 - PAGE_SHIFT);
+
+        down_write(&trace_sem);
+
+        tcd_for_each(tcd, i, j)
+                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
+
+        up_write(&trace_sem);
+
+        return 0;
+}
+EXPORT_SYMBOL(spl_debug_set_mb);
+
+int
+spl_debug_get_mb(void)
+{
+        int i, j;
+        struct trace_cpu_data *tcd;
+        int total_pages = 0;
+
+        down_read(&trace_sem);
+
+        tcd_for_each(tcd, i, j)
+                total_pages += tcd->tcd_max_pages;
+
+        up_read(&trace_sem);
+
+        return (total_pages >> (20 - PAGE_SHIFT)) + 1;
+}
+EXPORT_SYMBOL(spl_debug_get_mb);
+
+void spl_debug_dumpstack(struct task_struct *tsk)
+{
+        extern void show_task(struct task_struct *);
+
+        if (tsk == NULL)
+                tsk = current;
+
+        printk(KERN_ERR "SPL: Showing stack for process %d\n", tsk->pid);
+        dump_stack();
+}
+EXPORT_SYMBOL(spl_debug_dumpstack);
+
+void spl_debug_bug(char *file, const char *func, const int line, int flags)
+{
+        spl_debug_catastrophe = 1;
+        spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SBUG\n");
+
+        if (in_interrupt()) {
+                panic("SBUG in interrupt.\n");
+                /* not reached */
+        }
+
+        /* Ensure all debug pages and dumped by current cpu */
+         if (spl_debug_panic_on_bug)
+                spl_panic_in_progress = 1;
+
+#ifdef DEBUG
+        spl_debug_dumpstack(NULL);
+        spl_debug_dumplog(flags);
+#endif
+
+        if (spl_debug_panic_on_bug)
+                panic("SBUG");
+
+        set_task_state(current, TASK_UNINTERRUPTIBLE);
+        while (1)
+                schedule();
+}
+EXPORT_SYMBOL(spl_debug_bug);
+
+int
+spl_debug_clear_buffer(void)
+{
+        spl_debug_flush_pages();
+        return 0;
+}
+EXPORT_SYMBOL(spl_debug_clear_buffer);
+
+int
+spl_debug_mark_buffer(char *text)
+{
+        CDEBUG(D_WARNING, "*************************************\n");
+        CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text);
+        CDEBUG(D_WARNING, "*************************************\n");
+
+        return 0;
+}
+EXPORT_SYMBOL(spl_debug_mark_buffer);
+
+static int
+trace_init(int max_pages)
+{
+        struct trace_cpu_data *tcd;
+        int i, j;
+
+        init_rwsem(&trace_sem);
+
+        /* initialize trace_data */
+        memset(trace_data, 0, sizeof(trace_data));
+        for (i = 0; i < TCD_TYPE_MAX; i++) {
+                trace_data[i] = kmalloc(sizeof(union trace_data_union) *
+                                        NR_CPUS, GFP_KERNEL);
+                if (trace_data[i] == NULL)
+                        goto out;
+        }
+
+        tcd_for_each(tcd, i, j) {
+                spin_lock_init(&tcd->tcd_lock);
+                tcd->tcd_pages_factor = pages_factor[i];
+                tcd->tcd_type = i;
+                tcd->tcd_cpu = j;
+                INIT_LIST_HEAD(&tcd->tcd_pages);
+                INIT_LIST_HEAD(&tcd->tcd_stock_pages);
+                tcd->tcd_cur_pages = 0;
+                tcd->tcd_cur_stock_pages = 0;
+                tcd->tcd_max_pages = (max_pages * pages_factor[i]) / 100;
+                tcd->tcd_shutting_down = 0;
+        }
+
+        for (i = 0; i < num_possible_cpus(); i++) {
+                for (j = 0; j < 3; j++) {
+                        trace_console_buffers[i][j] =
+                                kmalloc(TRACE_CONSOLE_BUFFER_SIZE,
+                                        GFP_KERNEL);
+
+                        if (trace_console_buffers[i][j] == NULL)
+                                goto out;
+                }
+       }
+
+        return 0;
+out:
+        trace_fini();
+        printk(KERN_ERR "SPL: Insufficient memory for debug logs\n");
+        return -ENOMEM;
+}
+
+int
+debug_init(void)
+{
+        int rc, max = spl_debug_mb;
+
+        spl_console_max_delay = SPL_DEFAULT_MAX_DELAY;
+        spl_console_min_delay = SPL_DEFAULT_MIN_DELAY;
+
+        /* If spl_debug_mb is set to an invalid value or uninitialized
+         * then just make the total buffers smp_num_cpus TCD_MAX_PAGES */
+        if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 ||
+            max >= 512 || max < 0) {
+                max = TCD_MAX_PAGES;
+        } else {
+                max = (max / num_online_cpus()) << (20 - PAGE_SHIFT);
+        }
+
+        rc = trace_init(max);
+        if (rc)
+                return rc;
+
+        return rc;
+}
+
+static void
+trace_cleanup_on_all_cpus(void)
+{
+        struct trace_cpu_data *tcd;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        int i, cpu;
+
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu) {
+                        tcd->tcd_shutting_down = 1;
+
+                        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
+                                                 linkage) {
+                                __ASSERT_TAGE_INVARIANT(tage);
+
+                                list_del(&tage->linkage);
+                                tage_free(tage);
+                        }
+                        tcd->tcd_cur_pages = 0;
+                }
+        }
+}
+
+static void
+trace_fini(void)
+{
+        int i, j;
+
+        trace_cleanup_on_all_cpus();
+
+        for (i = 0; i < num_possible_cpus(); i++) {
+                for (j = 0; j < 3; j++) {
+                        if (trace_console_buffers[i][j] != NULL) {
+                                kfree(trace_console_buffers[i][j]);
+                                trace_console_buffers[i][j] = NULL;
+                        }
+                }
+        }
+
+        for (i = 0; trace_data[i] != NULL; i++) {
+                kfree(trace_data[i]);
+                trace_data[i] = NULL;
+        }
+}
+
+void
+debug_fini(void)
+{
+        trace_fini();
+}
diff --git a/module/spl/spl-err.c b/module/spl/spl-err.c
new file mode 100644
index 000000000..c4508dfa2
--- /dev/null
+++ b/module/spl/spl-err.c
@@ -0,0 +1,78 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_GENERIC
+
+#ifndef NDEBUG
+static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
+static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
+#endif
+
+void
+vpanic(const char *fmt, va_list ap)
+{
+	char msg[MAXMSGLEN];
+
+	vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
+	panic(msg);
+} /* vpanic() */
+EXPORT_SYMBOL(vpanic);
+
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	char msg[MAXMSGLEN];
+	va_list ap;
+
+	va_start(ap, fmt);
+	vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
+	va_end(ap);
+
+	CERROR("%s", msg);
+} /* cmn_err() */
+EXPORT_SYMBOL(cmn_err);
+
+void
+vcmn_err(int ce, const char *fmt, va_list ap)
+{
+	char msg[MAXMSGLEN];
+
+        if (ce == CE_PANIC)
+                vpanic(fmt, ap);
+
+        if (ce != CE_NOTE) { /* suppress noise in stress testing */
+		vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
+		CERROR("%s%s%s", ce_prefix[ce], msg, ce_suffix[ce]);
+        }
+} /* vcmn_err() */
+EXPORT_SYMBOL(vcmn_err);
diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c
new file mode 100644
index 000000000..c09d9d4e3
--- /dev/null
+++ b/module/spl/spl-generic.c
@@ -0,0 +1,328 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/vmsystm.h>
+#include <sys/vnode.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/taskq.h>
+#include <sys/debug.h>
+#include <sys/proc.h>
+#include <sys/kstat.h>
+#include <sys/utsname.h>
+#include <linux/kmod.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_GENERIC
+
+char spl_version[16] = "SPL v" VERSION;
+
+long spl_hostid = 0;
+EXPORT_SYMBOL(spl_hostid);
+
+char hw_serial[11] = "<none>";
+EXPORT_SYMBOL(hw_serial);
+
+int p0 = 0;
+EXPORT_SYMBOL(p0);
+
+vmem_t *zio_alloc_arena = NULL;
+EXPORT_SYMBOL(zio_alloc_arena);
+
+int
+highbit(unsigned long i)
+{
+        register int h = 1;
+        ENTRY;
+
+        if (i == 0)
+                RETURN(0);
+#if BITS_PER_LONG == 64
+        if (i & 0xffffffff00000000ul) {
+                h += 32; i >>= 32;
+        }
+#endif
+        if (i & 0xffff0000) {
+                h += 16; i >>= 16;
+        }
+        if (i & 0xff00) {
+                h += 8; i >>= 8;
+        }
+        if (i & 0xf0) {
+                h += 4; i >>= 4;
+        }
+        if (i & 0xc) {
+                h += 2; i >>= 2;
+        }
+        if (i & 0x2) {
+                h += 1;
+        }
+        RETURN(h);
+}
+EXPORT_SYMBOL(highbit);
+
+/*
+ * Implementation of 64 bit division for 32-bit machines.
+ */
+#if BITS_PER_LONG == 32
+uint64_t __udivdi3(uint64_t dividend, uint64_t divisor)
+{
+#ifdef HAVE_DIV64_64
+	return div64_64(dividend, divisor);
+#else
+	/* Taken from a 2.6.24 kernel. */
+	uint32_t high, d;
+
+	high = divisor >> 32;
+	if (high) {
+		unsigned int shift = fls(high);
+
+		d = divisor >> shift;
+		dividend >>= shift;
+	} else
+		d = divisor;
+
+	do_div(dividend, d);
+
+	return dividend;
+#endif
+}
+EXPORT_SYMBOL(__udivdi3);
+
+/*
+ * Implementation of 64 bit modulo for 32-bit machines.
+ */
+uint64_t __umoddi3(uint64_t dividend, uint64_t divisor)
+{
+	return dividend - divisor * (dividend / divisor);
+}
+EXPORT_SYMBOL(__umoddi3);
+#endif
+
+/* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
+ * ddi_strtol(9F) man page.  I have not verified the behavior of these
+ * functions against their Solaris counterparts.  It is possible that I
+ * may have misinterpretted the man page or the man page is incorrect.
+ */
+int ddi_strtoul(const char *, char **, int, unsigned long *);
+int ddi_strtol(const char *, char **, int, long *);
+int ddi_strtoull(const char *, char **, int, unsigned long long *);
+int ddi_strtoll(const char *, char **, int, long long *);
+
+#define define_ddi_strtoux(type, valtype)				\
+int ddi_strtou##type(const char *str, char **endptr,			\
+		     int base, valtype *result)				\
+{									\
+	valtype last_value, value = 0;					\
+	char *ptr = (char *)str;					\
+	int flag = 1, digit;						\
+									\
+	if (strlen(ptr) == 0)						\
+		return EINVAL;						\
+									\
+	/* Auto-detect base based on prefix */				\
+	if (!base) {							\
+		if (str[0] == '0') {					\
+			if (tolower(str[1])=='x' && isxdigit(str[2])) {	\
+				base = 16; /* hex */			\
+				ptr += 2;				\
+			} else if (str[1] >= '0' && str[1] < 8) {	\
+				base = 8; /* octal */			\
+				ptr += 1;				\
+			} else {					\
+				return EINVAL;				\
+			}						\
+		} else {						\
+			base = 10; /* decimal */			\
+		}							\
+	}								\
+									\
+	while (1) {							\
+		if (isdigit(*ptr))					\
+			digit = *ptr - '0';				\
+		else if (isalpha(*ptr))					\
+			digit = tolower(*ptr) - 'a' + 10;		\
+		else							\
+			break;						\
+									\
+		if (digit >= base)					\
+			break;						\
+									\
+		last_value = value;					\
+		value = value * base + digit;				\
+		if (last_value > value) /* Overflow */			\
+			return ERANGE;					\
+									\
+		flag = 1;						\
+		ptr++;							\
+	}								\
+									\
+	if (flag)							\
+		*result = value;					\
+									\
+	if (endptr)							\
+		*endptr = (char *)(flag ? ptr : str);			\
+									\
+	return 0;							\
+}									\
+
+#define define_ddi_strtox(type, valtype)				\
+int ddi_strto##type(const char *str, char **endptr,			\
+		       int base, valtype *result)			\
+{									\
+	int rc;								\
+									\
+	if (*str == '-') {						\
+		rc = ddi_strtou##type(str + 1, endptr, base, result);	\
+		if (!rc) {						\
+			if (*endptr == str + 1)				\
+				*endptr = (char *)str;			\
+			else						\
+				*result = -*result;			\
+		}							\
+	} else {							\
+		rc = ddi_strtou##type(str, endptr, base, result);	\
+	}								\
+									\
+	return rc;							\
+}
+
+define_ddi_strtoux(l, unsigned long)
+define_ddi_strtox(l, long)
+define_ddi_strtoux(ll, unsigned long long)
+define_ddi_strtox(ll, long long)
+
+EXPORT_SYMBOL(ddi_strtoul);
+EXPORT_SYMBOL(ddi_strtol);
+EXPORT_SYMBOL(ddi_strtoll);
+EXPORT_SYMBOL(ddi_strtoull);
+
+struct new_utsname *__utsname(void)
+{
+#ifdef HAVE_INIT_UTSNAME
+	return init_utsname();
+#else
+	return &system_utsname;
+#endif
+}
+EXPORT_SYMBOL(__utsname);
+
+static int
+set_hostid(void)
+{
+	char sh_path[] = "/bin/sh";
+	char *argv[] = { sh_path,
+	                 "-c",
+	                 "/usr/bin/hostid >/proc/sys/kernel/spl/hostid",
+	                 NULL };
+	char *envp[] = { "HOME=/",
+	                 "TERM=linux",
+	                 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+	                 NULL };
+
+	/* Doing address resolution in the kernel is tricky and just
+	 * not a good idea in general.  So to set the proper 'hw_serial'
+	 * use the usermodehelper support to ask '/bin/sh' to run
+	 * '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid
+	 * for us to use.  It's a horific solution but it will do for now.
+	 */
+	return call_usermodehelper(sh_path, argv, envp, 1);
+}
+
+static int __init spl_init(void)
+{
+	int rc = 0;
+
+	if ((rc = debug_init()))
+		return rc;
+
+	if ((rc = spl_kmem_init()))
+		GOTO(out , rc);
+
+	if ((rc = spl_mutex_init()))
+		GOTO(out2 , rc);
+
+	if ((rc = spl_taskq_init()))
+		GOTO(out3, rc);
+
+	if ((rc = vn_init()))
+		GOTO(out4, rc);
+
+	if ((rc = proc_init()))
+		GOTO(out5, rc);
+
+	if ((rc = kstat_init()))
+		GOTO(out6, rc);
+
+	if ((rc = set_hostid()))
+		GOTO(out7, rc = -EADDRNOTAVAIL);
+
+	printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION);
+	RETURN(rc);
+out7:
+	kstat_fini();
+out6:
+	proc_fini();
+out5:
+	vn_fini();
+out4:
+	spl_taskq_fini();
+out3:
+	spl_mutex_fini();
+out2:
+	spl_kmem_fini();
+out:
+	debug_fini();
+
+	printk("SPL: Failed to Load Solaris Porting Layer v%s, "
+	       "rc = %d\n", VERSION, rc);
+	return rc;
+}
+
+static void spl_fini(void)
+{
+	ENTRY;
+
+	printk("SPL: Unloaded Solaris Porting Layer v%s\n", VERSION);
+	kstat_fini();
+	proc_fini();
+	vn_fini();
+	spl_taskq_fini();
+	spl_mutex_fini();
+	spl_kmem_fini();
+	debug_fini();
+}
+
+module_init(spl_init);
+module_exit(spl_fini);
+
+MODULE_AUTHOR("Lawrence Livermore National Labs");
+MODULE_DESCRIPTION("Solaris Porting Layer");
+MODULE_LICENSE("GPL");
diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
new file mode 100644
index 000000000..e580fbbdd
--- /dev/null
+++ b/module/spl/spl-kmem.c
@@ -0,0 +1,1452 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/kmem.h>
+
+#ifdef DEBUG_SUBSYSTEM
+# undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_KMEM
+
+/*
+ * Memory allocation interfaces and debugging for basic kmem_*
+ * and vmem_* style memory allocation.  When DEBUG_KMEM is enable
+ * all allocations will be tracked when they are allocated and
+ * freed.  When the SPL module is unload a list of all leaked
+ * addresses and where they were allocated will be dumped to the
+ * console.  Enabling this feature has a significant impant on
+ * performance but it makes finding memory leaks staight forward.
+ */
+#ifdef DEBUG_KMEM
+/* Shim layer memory accounting */
+atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
+unsigned long long kmem_alloc_max = 0;
+atomic64_t vmem_alloc_used = ATOMIC64_INIT(0);
+unsigned long long vmem_alloc_max = 0;
+int kmem_warning_flag = 1;
+
+EXPORT_SYMBOL(kmem_alloc_used);
+EXPORT_SYMBOL(kmem_alloc_max);
+EXPORT_SYMBOL(vmem_alloc_used);
+EXPORT_SYMBOL(vmem_alloc_max);
+EXPORT_SYMBOL(kmem_warning_flag);
+
+# ifdef DEBUG_KMEM_TRACKING
+
+/* XXX - Not to surprisingly with debugging enabled the xmem_locks are very
+ * highly contended particularly on xfree().  If we want to run with this
+ * detailed debugging enabled for anything other than debugging  we need to
+ * minimize the contention by moving to a lock per xmem_table entry model.
+ */
+
+#  define KMEM_HASH_BITS          10
+#  define KMEM_TABLE_SIZE         (1 << KMEM_HASH_BITS)
+
+#  define VMEM_HASH_BITS          10
+#  define VMEM_TABLE_SIZE         (1 << VMEM_HASH_BITS)
+
+typedef struct kmem_debug {
+	struct hlist_node kd_hlist;     /* Hash node linkage */
+	struct list_head kd_list;       /* List of all allocations */
+	void *kd_addr;                  /* Allocation pointer */
+	size_t kd_size;                 /* Allocation size */
+	const char *kd_func;            /* Allocation function */
+	int kd_line;                    /* Allocation line */
+} kmem_debug_t;
+
+spinlock_t kmem_lock;
+struct hlist_head kmem_table[KMEM_TABLE_SIZE];
+struct list_head kmem_list;
+
+spinlock_t vmem_lock;
+struct hlist_head vmem_table[VMEM_TABLE_SIZE];
+struct list_head vmem_list;
+
+EXPORT_SYMBOL(kmem_lock);
+EXPORT_SYMBOL(kmem_table);
+EXPORT_SYMBOL(kmem_list);
+
+EXPORT_SYMBOL(vmem_lock);
+EXPORT_SYMBOL(vmem_table);
+EXPORT_SYMBOL(vmem_list);
+# endif
+
+int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
+#else
+int kmem_set_warning(int flag) { return 0; }
+#endif
+EXPORT_SYMBOL(kmem_set_warning);
+
+/*
+ * Slab allocation interfaces
+ *
+ * While the Linux slab implementation was inspired by the Solaris
+ * implemenation I cannot use it to emulate the Solaris APIs.  I
+ * require two features which are not provided by the Linux slab.
+ *
+ * 1) Constructors AND destructors.  Recent versions of the Linux
+ *    kernel have removed support for destructors.  This is a deal
+ *    breaker for the SPL which contains particularly expensive
+ *    initializers for mutex's, condition variables, etc.  We also
+ *    require a minimal level of cleanup for these data types unlike
+ *    many Linux data type which do need to be explicitly destroyed.
+ *
+ * 2) Virtual address space backed slab.  Callers of the Solaris slab
+ *    expect it to work well for both small are very large allocations.
+ *    Because of memory fragmentation the Linux slab which is backed
+ *    by kmalloc'ed memory performs very badly when confronted with
+ *    large numbers of large allocations.  Basing the slab on the
+ *    virtual address space removes the need for contigeous pages
+ *    and greatly improve performance for large allocations.
+ *
+ * For these reasons, the SPL has its own slab implementation with
+ * the needed features.  It is not as highly optimized as either the
+ * Solaris or Linux slabs, but it should get me most of what is
+ * needed until it can be optimized or obsoleted by another approach.
+ *
+ * One serious concern I do have about this method is the relatively
+ * small virtual address space on 32bit arches.  This will seriously
+ * constrain the size of the slab caches and their performance.
+ *
+ * XXX: Implement work requests to keep an eye on each cache and
+ *      shrink them via spl_slab_reclaim() when they are wasting lots
+ *      of space.  Currently this process is driven by the reapers.
+ *
+ * XXX: Improve the partial slab list by carefully maintaining a
+ *      strict ordering of fullest to emptiest slabs based on
+ *      the slab reference count.  This gaurentees the when freeing
+ *      slabs back to the system we need only linearly traverse the
+ *      last N slabs in the list to discover all the freeable slabs.
+ *
+ * XXX: NUMA awareness for optionally allocating memory close to a
+ *      particular core.  This can be adventageous if you know the slab
+ *      object will be short lived and primarily accessed from one core.
+ *
+ * XXX: Slab coloring may also yield performance improvements and would
+ *      be desirable to implement.
+ *
+ * XXX: Proper hardware cache alignment would be good too.
+ */
+
+struct list_head spl_kmem_cache_list;   /* List of caches */
+struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
+
+static int spl_cache_flush(spl_kmem_cache_t *skc,
+                           spl_kmem_magazine_t *skm, int flush);
+
+#ifdef HAVE_SET_SHRINKER
+static struct shrinker *spl_kmem_cache_shrinker;
+#else
+static int spl_kmem_cache_generic_shrinker(int nr_to_scan,
+                                           unsigned int gfp_mask);
+static struct shrinker spl_kmem_cache_shrinker = {
+	.shrink = spl_kmem_cache_generic_shrinker,
+	.seeks = KMC_DEFAULT_SEEKS,
+};
+#endif
+
+#ifdef DEBUG_KMEM
+# ifdef DEBUG_KMEM_TRACKING
+
+static kmem_debug_t *
+kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits,
+                void *addr)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kmem_debug *p;
+	unsigned long flags;
+	ENTRY;
+
+	spin_lock_irqsave(lock, flags);
+
+	head = &table[hash_ptr(addr, bits)];
+	hlist_for_each_entry_rcu(p, node, head, kd_hlist) {
+		if (p->kd_addr == addr) {
+			hlist_del_init(&p->kd_hlist);
+			list_del_init(&p->kd_list);
+			spin_unlock_irqrestore(lock, flags);
+			return p;
+		}
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+
+	RETURN(NULL);
+}
+
+void *
+kmem_alloc_track(size_t size, int flags, const char *func, int line,
+    int node_alloc, int node)
+{
+	void *ptr = NULL;
+	kmem_debug_t *dptr;
+	unsigned long irq_flags;
+	ENTRY;
+
+	dptr = (kmem_debug_t *) kmalloc(sizeof(kmem_debug_t),
+	    flags & ~__GFP_ZERO);
+
+	if (dptr == NULL) {
+		CWARN("kmem_alloc(%ld, 0x%x) debug failed\n",
+		    sizeof(kmem_debug_t), flags);
+	} else {
+		/* Marked unlikely because we should never be doing this,
+		 * we tolerate to up 2 pages but a single page is best.   */
+		if (unlikely((size) > (PAGE_SIZE * 2)) && kmem_warning_flag)
+			CWARN("Large kmem_alloc(%llu, 0x%x) (%lld/%llu)\n",
+			    (unsigned long long) size, flags,
+			    atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+
+		/* We use kstrdup() below because the string pointed to by
+		 * __FUNCTION__ might not be available by the time we want
+		 * to print it since the module might have been unloaded. */
+		dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO);
+		if (unlikely(dptr->kd_func == NULL)) {
+			kfree(dptr);
+			CWARN("kstrdup() failed in kmem_alloc(%llu, 0x%x) "
+			    "(%lld/%llu)\n", (unsigned long long) size, flags,
+			    atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+			goto out;
+		}
+
+		/* Use the correct allocator */
+		if (node_alloc) {
+			ASSERT(!(flags & __GFP_ZERO));
+			ptr = kmalloc_node(size, flags, node);
+		} else if (flags & __GFP_ZERO) {
+			ptr = kzalloc(size, flags & ~__GFP_ZERO);
+		} else {
+			ptr = kmalloc(size, flags);
+		}
+
+		if (unlikely(ptr == NULL)) {
+			kfree(dptr->kd_func);
+			kfree(dptr);
+			CWARN("kmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
+			    (unsigned long long) size, flags,
+			    atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+			goto out;
+		}
+
+		atomic64_add(size, &kmem_alloc_used);
+		if (unlikely(atomic64_read(&kmem_alloc_used) >
+		    kmem_alloc_max))
+			kmem_alloc_max =
+			    atomic64_read(&kmem_alloc_used);
+
+		INIT_HLIST_NODE(&dptr->kd_hlist);
+		INIT_LIST_HEAD(&dptr->kd_list);
+
+		dptr->kd_addr = ptr;
+		dptr->kd_size = size;
+		dptr->kd_line = line;
+
+		spin_lock_irqsave(&kmem_lock, irq_flags);
+		hlist_add_head_rcu(&dptr->kd_hlist,
+		    &kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
+		list_add_tail(&dptr->kd_list, &kmem_list);
+		spin_unlock_irqrestore(&kmem_lock, irq_flags);
+
+		CDEBUG_LIMIT(D_INFO, "kmem_alloc(%llu, 0x%x) = %p "
+		    "(%lld/%llu)\n", (unsigned long long) size, flags,
+		    ptr, atomic64_read(&kmem_alloc_used),
+		    kmem_alloc_max);
+	}
+out:
+	RETURN(ptr);
+}
+EXPORT_SYMBOL(kmem_alloc_track);
+
+void
+kmem_free_track(void *ptr, size_t size)
+{
+	kmem_debug_t *dptr;
+	ENTRY;
+
+	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
+	    (unsigned long long) size);
+
+	dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
+
+	ASSERT(dptr); /* Must exist in hash due to kmem_alloc() */
+
+	/* Size must match */
+	ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
+	    "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size,
+	    (unsigned long long) size, dptr->kd_func, dptr->kd_line);
+
+	atomic64_sub(size, &kmem_alloc_used);
+
+	CDEBUG_LIMIT(D_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr,
+	    (unsigned long long) size, atomic64_read(&kmem_alloc_used),
+	    kmem_alloc_max);
+
+	kfree(dptr->kd_func);
+
+	memset(dptr, 0x5a, sizeof(kmem_debug_t));
+	kfree(dptr);
+
+	memset(ptr, 0x5a, size);
+	kfree(ptr);
+
+	EXIT;
+}
+EXPORT_SYMBOL(kmem_free_track);
+
+void *
+vmem_alloc_track(size_t size, int flags, const char *func, int line)
+{
+	void *ptr = NULL;
+	kmem_debug_t *dptr;
+	unsigned long irq_flags;
+	ENTRY;
+
+	ASSERT(flags & KM_SLEEP);
+
+	dptr = (kmem_debug_t *) kmalloc(sizeof(kmem_debug_t), flags);
+	if (dptr == NULL) {
+		CWARN("vmem_alloc(%ld, 0x%x) debug failed\n",
+		    sizeof(kmem_debug_t), flags);
+	} else {
+		/* We use kstrdup() below because the string pointed to by
+		 * __FUNCTION__ might not be available by the time we want
+		 * to print it, since the module might have been unloaded. */
+		dptr->kd_func = kstrdup(func, flags & ~__GFP_ZERO);
+		if (unlikely(dptr->kd_func == NULL)) {
+			kfree(dptr);
+			CWARN("kstrdup() failed in vmem_alloc(%llu, 0x%x) "
+			    "(%lld/%llu)\n", (unsigned long long) size, flags,
+			    atomic64_read(&vmem_alloc_used), vmem_alloc_max);
+			goto out;
+		}
+
+		ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO,
+		    PAGE_KERNEL);
+
+		if (unlikely(ptr == NULL)) {
+			kfree(dptr->kd_func);
+			kfree(dptr);
+			CWARN("vmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
+			    (unsigned long long) size, flags,
+			    atomic64_read(&vmem_alloc_used), vmem_alloc_max);
+			goto out;
+		}
+
+		if (flags & __GFP_ZERO)
+			memset(ptr, 0, size);
+
+		atomic64_add(size, &vmem_alloc_used);
+		if (unlikely(atomic64_read(&vmem_alloc_used) >
+		    vmem_alloc_max))
+			vmem_alloc_max =
+			    atomic64_read(&vmem_alloc_used);
+
+		INIT_HLIST_NODE(&dptr->kd_hlist);
+		INIT_LIST_HEAD(&dptr->kd_list);
+
+		dptr->kd_addr = ptr;
+		dptr->kd_size = size;
+		dptr->kd_line = line;
+
+		spin_lock_irqsave(&vmem_lock, irq_flags);
+		hlist_add_head_rcu(&dptr->kd_hlist,
+		    &vmem_table[hash_ptr(ptr, VMEM_HASH_BITS)]);
+		list_add_tail(&dptr->kd_list, &vmem_list);
+		spin_unlock_irqrestore(&vmem_lock, irq_flags);
+
+		CDEBUG_LIMIT(D_INFO, "vmem_alloc(%llu, 0x%x) = %p "
+		    "(%lld/%llu)\n", (unsigned long long) size, flags,
+		    ptr, atomic64_read(&vmem_alloc_used),
+		    vmem_alloc_max);
+	}
+out:
+	RETURN(ptr);
+}
+EXPORT_SYMBOL(vmem_alloc_track);
+
+void
+vmem_free_track(void *ptr, size_t size)
+{
+	kmem_debug_t *dptr;
+	ENTRY;
+
+	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
+	    (unsigned long long) size);
+
+	dptr = kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr);
+	ASSERT(dptr); /* Must exist in hash due to vmem_alloc() */
+
+	/* Size must match */
+	ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
+	    "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size,
+	    (unsigned long long) size, dptr->kd_func, dptr->kd_line);
+
+	atomic64_sub(size, &vmem_alloc_used);
+	CDEBUG_LIMIT(D_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr,
+	    (unsigned long long) size, atomic64_read(&vmem_alloc_used),
+	    vmem_alloc_max);
+
+	kfree(dptr->kd_func);
+
+	memset(dptr, 0x5a, sizeof(kmem_debug_t));
+	kfree(dptr);
+
+	memset(ptr, 0x5a, size);
+	vfree(ptr);
+
+	EXIT;
+}
+EXPORT_SYMBOL(vmem_free_track);
+
+# else /* DEBUG_KMEM_TRACKING */
+
+void *
+kmem_alloc_debug(size_t size, int flags, const char *func, int line,
+    int node_alloc, int node)
+{
+	void *ptr;
+	ENTRY;
+
+	/* Marked unlikely because we should never be doing this,
+	 * we tolerate to up 2 pages but a single page is best.   */
+	if (unlikely(size > (PAGE_SIZE * 2)) && kmem_warning_flag)
+		CWARN("Large kmem_alloc(%llu, 0x%x) (%lld/%llu)\n",
+		    (unsigned long long) size, flags,
+		    atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+
+	/* Use the correct allocator */
+	if (node_alloc) {
+		ASSERT(!(flags & __GFP_ZERO));
+		ptr = kmalloc_node(size, flags, node);
+	} else if (flags & __GFP_ZERO) {
+		ptr = kzalloc(size, flags & (~__GFP_ZERO));
+	} else {
+		ptr = kmalloc(size, flags);
+	}
+
+	if (ptr == NULL) {
+		CWARN("kmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
+		    (unsigned long long) size, flags,
+		    atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+	} else {
+		atomic64_add(size, &kmem_alloc_used);
+		if (unlikely(atomic64_read(&kmem_alloc_used) > kmem_alloc_max))
+			kmem_alloc_max = atomic64_read(&kmem_alloc_used);
+
+		CDEBUG_LIMIT(D_INFO, "kmem_alloc(%llu, 0x%x) = %p "
+		       "(%lld/%llu)\n", (unsigned long long) size, flags, ptr,
+		       atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+	}
+	RETURN(ptr);
+}
+EXPORT_SYMBOL(kmem_alloc_debug);
+
+void
+kmem_free_debug(void *ptr, size_t size)
+{
+	ENTRY;
+
+	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
+	    (unsigned long long) size);
+
+	atomic64_sub(size, &kmem_alloc_used);
+
+	CDEBUG_LIMIT(D_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr,
+	    (unsigned long long) size, atomic64_read(&kmem_alloc_used),
+	    kmem_alloc_max);
+
+	memset(ptr, 0x5a, size);
+	kfree(ptr);
+
+	EXIT;
+}
+EXPORT_SYMBOL(kmem_free_debug);
+
+void *
+vmem_alloc_debug(size_t size, int flags, const char *func, int line)
+{
+	void *ptr;
+	ENTRY;
+
+	ASSERT(flags & KM_SLEEP);
+
+	ptr = __vmalloc(size, (flags | __GFP_HIGHMEM) & ~__GFP_ZERO,
+	    PAGE_KERNEL);
+	if (ptr == NULL) {
+		CWARN("vmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
+		    (unsigned long long) size, flags,
+		    atomic64_read(&vmem_alloc_used), vmem_alloc_max);
+	} else {
+		if (flags & __GFP_ZERO)
+			memset(ptr, 0, size);
+
+		atomic64_add(size, &vmem_alloc_used);
+
+		if (unlikely(atomic64_read(&vmem_alloc_used) > vmem_alloc_max))
+			vmem_alloc_max = atomic64_read(&vmem_alloc_used);
+
+		CDEBUG_LIMIT(D_INFO, "vmem_alloc(%llu, 0x%x) = %p "
+		    "(%lld/%llu)\n", (unsigned long long) size, flags, ptr,
+		    atomic64_read(&vmem_alloc_used), vmem_alloc_max);
+	}
+
+	RETURN(ptr);
+}
+EXPORT_SYMBOL(vmem_alloc_debug);
+
+void
+vmem_free_debug(void *ptr, size_t size)
+{
+	ENTRY;
+
+	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
+	    (unsigned long long) size);
+
+	atomic64_sub(size, &vmem_alloc_used);
+
+	CDEBUG_LIMIT(D_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr,
+	    (unsigned long long) size, atomic64_read(&vmem_alloc_used),
+	    vmem_alloc_max);
+
+	memset(ptr, 0x5a, size);
+	vfree(ptr);
+
+	EXIT;
+}
+EXPORT_SYMBOL(vmem_free_debug);
+
+# endif /* DEBUG_KMEM_TRACKING */
+#endif /* DEBUG_KMEM */
+
+static void *
+kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
+{
+	void *ptr;
+
+	if (skc->skc_flags & KMC_KMEM) {
+		if (size > (2 * PAGE_SIZE)) {
+			ptr = (void *)__get_free_pages(flags, get_order(size));
+		} else
+			ptr = kmem_alloc(size, flags);
+	} else {
+		ptr = vmem_alloc(size, flags);
+	}
+
+	return ptr;
+}
+
+static void
+kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
+{
+	if (skc->skc_flags & KMC_KMEM) {
+		if (size > (2 * PAGE_SIZE))
+			free_pages((unsigned long)ptr, get_order(size));
+		else
+			kmem_free(ptr, size);
+	} else {
+		vmem_free(ptr, size);
+	}
+}
+
+static spl_kmem_slab_t *
+spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_slab_t *sks;
+	spl_kmem_obj_t *sko, *n;
+	void *base, *obj;
+	int i, size, rc = 0;
+
+	/* It's important that we pack the spl_kmem_obj_t structure
+	 * and the actual objects in to one large address space
+	 * to minimize the number of calls to the allocator.  It
+	 * is far better to do a few large allocations and then
+	 * subdivide it ourselves.  Now which allocator we use
+	 * requires balancling a few trade offs.
+	 *
+	 * For small objects we use kmem_alloc() because as long
+	 * as you are only requesting a small number of pages
+	 * (ideally just one) its cheap.  However, when you start
+	 * requesting multiple pages kmem_alloc() get increasingly
+	 * expensive since it requires contigeous pages.  For this
+	 * reason we shift to vmem_alloc() for slabs of large
+	 * objects which removes the need for contigeous pages.
+	 * We do not use vmem_alloc() in all cases because there
+	 * is significant locking overhead in __get_vm_area_node().
+	 * This function takes a single global lock when aquiring
+	 * an available virtual address range which serialize all
+	 * vmem_alloc()'s for all slab caches.  Using slightly
+	 * different allocation functions for small and large
+	 * objects should give us the best of both worlds.
+	 *
+	 * sks struct:  sizeof(spl_kmem_slab_t)
+	 * obj data:    skc->skc_obj_size
+	 * obj struct:  sizeof(spl_kmem_obj_t)
+	 * <N obj data + obj structs>
+	 *
+	 * XXX: It would probably be a good idea to more carefully
+	 *      align these data structures in memory.
+	 */
+	base = kv_alloc(skc, skc->skc_slab_size, flags);
+	if (base == NULL)
+		RETURN(NULL);
+
+	sks = (spl_kmem_slab_t *)base;
+	sks->sks_magic = SKS_MAGIC;
+	sks->sks_objs = skc->skc_slab_objs;
+	sks->sks_age = jiffies;
+	sks->sks_cache = skc;
+	INIT_LIST_HEAD(&sks->sks_list);
+	INIT_LIST_HEAD(&sks->sks_free_list);
+	sks->sks_ref = 0;
+	size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size;
+
+	for (i = 0; i < sks->sks_objs; i++) {
+		if (skc->skc_flags & KMC_OFFSLAB) {
+			obj = kv_alloc(skc, size, flags);
+			if (!obj)
+				GOTO(out, rc = -ENOMEM);
+		} else {
+			obj = base + sizeof(spl_kmem_slab_t) + i * size;
+		}
+
+		sko = obj + skc->skc_obj_size;
+		sko->sko_addr = obj;
+		sko->sko_magic = SKO_MAGIC;
+		sko->sko_slab = sks;
+		INIT_LIST_HEAD(&sko->sko_list);
+		list_add_tail(&sko->sko_list, &sks->sks_free_list);
+	}
+
+	list_for_each_entry(sko, &sks->sks_free_list, sko_list)
+		if (skc->skc_ctor)
+			skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
+out:
+	if (rc) {
+		if (skc->skc_flags & KMC_OFFSLAB)
+			list_for_each_entry_safe(sko,n,&sks->sks_free_list,sko_list)
+				kv_free(skc, sko->sko_addr, size);
+
+		kv_free(skc, base, skc->skc_slab_size);
+		sks = NULL;
+	}
+
+	RETURN(sks);
+}
+
+/* Removes slab from complete or partial list, so it must
+ * be called with the 'skc->skc_lock' held.
+ */
+static void
+spl_slab_free(spl_kmem_slab_t *sks) {
+	spl_kmem_cache_t *skc;
+	spl_kmem_obj_t *sko, *n;
+	int size;
+	ENTRY;
+
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(sks->sks_ref == 0);
+
+	skc = sks->sks_cache;
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	skc->skc_obj_total -= sks->sks_objs;
+	skc->skc_slab_total--;
+	list_del(&sks->sks_list);
+	size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size;
+
+	/* Run destructors slab is being released */
+	list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
+		ASSERT(sko->sko_magic == SKO_MAGIC);
+
+		if (skc->skc_dtor)
+			skc->skc_dtor(sko->sko_addr, skc->skc_private);
+
+		if (skc->skc_flags & KMC_OFFSLAB)
+			kv_free(skc, sko->sko_addr, size);
+	}
+
+	kv_free(skc, sks, skc->skc_slab_size);
+	EXIT;
+}
+
+static int
+__spl_slab_reclaim(spl_kmem_cache_t *skc)
+{
+	spl_kmem_slab_t *sks, *m;
+	int rc = 0;
+	ENTRY;
+
+	ASSERT(spin_is_locked(&skc->skc_lock));
+	/*
+	 * Free empty slabs which have not been touched in skc_delay
+	 * seconds.  This delay time is important to avoid thrashing.
+	 * Empty slabs will be at the end of the skc_partial_list.
+	 */
+        list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list,
+					 sks_list) {
+		if (sks->sks_ref > 0)
+		       break;
+
+		if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) {
+			spl_slab_free(sks);
+			rc++;
+		}
+	}
+
+	/* Returns number of slabs reclaimed */
+	RETURN(rc);
+}
+
+static int
+spl_slab_reclaim(spl_kmem_cache_t *skc)
+{
+	int rc;
+	ENTRY;
+
+	spin_lock(&skc->skc_lock);
+	rc = __spl_slab_reclaim(skc);
+	spin_unlock(&skc->skc_lock);
+
+	RETURN(rc);
+}
+
+static int
+spl_magazine_size(spl_kmem_cache_t *skc)
+{
+	int size;
+	ENTRY;
+
+	/* Guesses for reasonable magazine sizes, they
+	 * should really adapt based on observed usage. */
+	if (skc->skc_obj_size > (PAGE_SIZE * 256))
+		size = 4;
+	else if (skc->skc_obj_size > (PAGE_SIZE * 32))
+		size = 16;
+	else if (skc->skc_obj_size > (PAGE_SIZE))
+		size = 64;
+	else if (skc->skc_obj_size > (PAGE_SIZE / 4))
+		size = 128;
+	else
+		size = 512;
+
+	RETURN(size);
+}
+
+static spl_kmem_magazine_t *
+spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
+{
+	spl_kmem_magazine_t *skm;
+	int size = sizeof(spl_kmem_magazine_t) +
+	           sizeof(void *) * skc->skc_mag_size;
+	ENTRY;
+
+	skm = kmem_alloc_node(size, GFP_KERNEL, node);
+	if (skm) {
+		skm->skm_magic = SKM_MAGIC;
+		skm->skm_avail = 0;
+		skm->skm_size = skc->skc_mag_size;
+		skm->skm_refill = skc->skc_mag_refill;
+		if (!(skc->skc_flags & KMC_NOTOUCH))
+			skm->skm_age = jiffies;
+	}
+
+	RETURN(skm);
+}
+
+static void
+spl_magazine_free(spl_kmem_magazine_t *skm)
+{
+	int size = sizeof(spl_kmem_magazine_t) +
+	           sizeof(void *) * skm->skm_size;
+
+	ENTRY;
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+	ASSERT(skm->skm_avail == 0);
+
+	kmem_free(skm, size);
+	EXIT;
+}
+
+static int
+spl_magazine_create(spl_kmem_cache_t *skc)
+{
+	int i;
+	ENTRY;
+
+	skc->skc_mag_size = spl_magazine_size(skc);
+	skc->skc_mag_refill = (skc->skc_mag_size + 1)  / 2;
+
+	for_each_online_cpu(i) {
+		skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i));
+		if (!skc->skc_mag[i]) {
+			for (i--; i >= 0; i--)
+				spl_magazine_free(skc->skc_mag[i]);
+
+			RETURN(-ENOMEM);
+		}
+	}
+
+	RETURN(0);
+}
+
+static void
+spl_magazine_destroy(spl_kmem_cache_t *skc)
+{
+        spl_kmem_magazine_t *skm;
+	int i;
+	ENTRY;
+
+	for_each_online_cpu(i) {
+		skm = skc->skc_mag[i];
+		(void)spl_cache_flush(skc, skm, skm->skm_avail);
+		spl_magazine_free(skm);
+	}
+
+	EXIT;
+}
+
+spl_kmem_cache_t *
+spl_kmem_cache_create(char *name, size_t size, size_t align,
+                      spl_kmem_ctor_t ctor,
+                      spl_kmem_dtor_t dtor,
+                      spl_kmem_reclaim_t reclaim,
+                      void *priv, void *vmp, int flags)
+{
+        spl_kmem_cache_t *skc;
+	uint32_t slab_max, slab_size, slab_objs;
+	int rc, kmem_flags = KM_SLEEP;
+	ENTRY;
+
+	ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags);
+	ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags);
+	ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags);
+
+        /* We may be called when there is a non-zero preempt_count or
+         * interrupts are disabled is which case we must not sleep.
+	 */
+	if (current_thread_info()->preempt_count || irqs_disabled())
+		kmem_flags = KM_NOSLEEP;
+
+	/* Allocate new cache memory and initialize. */
+	skc = (spl_kmem_cache_t *)kmem_zalloc(sizeof(*skc), kmem_flags);
+	if (skc == NULL)
+		RETURN(NULL);
+
+	skc->skc_magic = SKC_MAGIC;
+	skc->skc_name_size = strlen(name) + 1;
+	skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags);
+	if (skc->skc_name == NULL) {
+		kmem_free(skc, sizeof(*skc));
+		RETURN(NULL);
+	}
+	strncpy(skc->skc_name, name, skc->skc_name_size);
+
+	skc->skc_ctor = ctor;
+	skc->skc_dtor = dtor;
+	skc->skc_reclaim = reclaim;
+	skc->skc_private = priv;
+	skc->skc_vmp = vmp;
+	skc->skc_flags = flags;
+	skc->skc_obj_size = size;
+	skc->skc_delay = SPL_KMEM_CACHE_DELAY;
+
+	INIT_LIST_HEAD(&skc->skc_list);
+	INIT_LIST_HEAD(&skc->skc_complete_list);
+	INIT_LIST_HEAD(&skc->skc_partial_list);
+	spin_lock_init(&skc->skc_lock);
+	skc->skc_slab_fail = 0;
+	skc->skc_slab_create = 0;
+	skc->skc_slab_destroy = 0;
+	skc->skc_slab_total = 0;
+	skc->skc_slab_alloc = 0;
+	skc->skc_slab_max = 0;
+	skc->skc_obj_total = 0;
+	skc->skc_obj_alloc = 0;
+	skc->skc_obj_max = 0;
+
+	/* If none passed select a cache type based on object size */
+	if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
+		if (skc->skc_obj_size < (PAGE_SIZE / 8)) {
+			skc->skc_flags |= KMC_KMEM;
+		} else {
+			skc->skc_flags |= KMC_VMEM;
+		}
+	}
+
+	/* Size slabs properly so ensure they are not too large */
+	slab_max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE;
+	if (skc->skc_flags & KMC_OFFSLAB) {
+		skc->skc_slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
+		skc->skc_slab_size = sizeof(spl_kmem_slab_t);
+		ASSERT(skc->skc_obj_size < slab_max);
+	} else {
+		slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB + 1;
+
+		do {
+			slab_objs--;
+			slab_size = sizeof(spl_kmem_slab_t) + slab_objs *
+			            (skc->skc_obj_size+sizeof(spl_kmem_obj_t));
+		} while (slab_size > slab_max);
+
+		skc->skc_slab_objs = slab_objs;
+		skc->skc_slab_size = slab_size;
+	}
+
+	rc = spl_magazine_create(skc);
+	if (rc) {
+		kmem_free(skc->skc_name, skc->skc_name_size);
+		kmem_free(skc, sizeof(*skc));
+		RETURN(NULL);
+	}
+
+	down_write(&spl_kmem_cache_sem);
+	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
+	up_write(&spl_kmem_cache_sem);
+
+	RETURN(skc);
+}
+EXPORT_SYMBOL(spl_kmem_cache_create);
+
+void
+spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
+{
+        spl_kmem_slab_t *sks, *m;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	down_write(&spl_kmem_cache_sem);
+	list_del_init(&skc->skc_list);
+	up_write(&spl_kmem_cache_sem);
+
+	spl_magazine_destroy(skc);
+	spin_lock(&skc->skc_lock);
+
+	/* Validate there are no objects in use and free all the
+	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
+	ASSERT(list_empty(&skc->skc_complete_list));
+	ASSERT(skc->skc_slab_alloc == 0);
+	ASSERT(skc->skc_obj_alloc == 0);
+
+	list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
+		spl_slab_free(sks);
+
+	ASSERT(skc->skc_slab_total == 0);
+	ASSERT(skc->skc_obj_total == 0);
+
+	kmem_free(skc->skc_name, skc->skc_name_size);
+	spin_unlock(&skc->skc_lock);
+
+	kmem_free(skc, sizeof(*skc));
+
+	EXIT;
+}
+EXPORT_SYMBOL(spl_kmem_cache_destroy);
+
+static void *
+spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
+{
+	spl_kmem_obj_t *sko;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
+	ASSERT(sko->sko_magic == SKO_MAGIC);
+	ASSERT(sko->sko_addr != NULL);
+
+	/* Remove from sks_free_list */
+	list_del_init(&sko->sko_list);
+
+	sks->sks_age = jiffies;
+	sks->sks_ref++;
+	skc->skc_obj_alloc++;
+
+	/* Track max obj usage statistics */
+	if (skc->skc_obj_alloc > skc->skc_obj_max)
+		skc->skc_obj_max = skc->skc_obj_alloc;
+
+	/* Track max slab usage statistics */
+	if (sks->sks_ref == 1) {
+		skc->skc_slab_alloc++;
+
+		if (skc->skc_slab_alloc > skc->skc_slab_max)
+			skc->skc_slab_max = skc->skc_slab_alloc;
+	}
+
+	return sko->sko_addr;
+}
+
+/* No available objects create a new slab.  Since this is an
+ * expensive operation we do it without holding the spinlock
+ * and only briefly aquire it when we link in the fully
+ * allocated and constructed slab.
+ */
+static spl_kmem_slab_t *
+spl_cache_grow(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_slab_t *sks;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	if (flags & __GFP_WAIT) {
+		flags |= __GFP_NOFAIL;
+		local_irq_enable();
+		might_sleep();
+	}
+
+	sks = spl_slab_alloc(skc, flags);
+	if (sks == NULL) {
+	        if (flags & __GFP_WAIT)
+			local_irq_disable();
+
+		RETURN(NULL);
+	}
+
+	if (flags & __GFP_WAIT)
+		local_irq_disable();
+
+	/* Link the new empty slab in to the end of skc_partial_list */
+	spin_lock(&skc->skc_lock);
+	skc->skc_slab_total++;
+	skc->skc_obj_total += sks->sks_objs;
+	list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+	spin_unlock(&skc->skc_lock);
+
+	RETURN(sks);
+}
+
+static int
+spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
+{
+	spl_kmem_slab_t *sks;
+	int rc = 0, refill;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	/* XXX: Check for refill bouncing by age perhaps */
+	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
+
+	spin_lock(&skc->skc_lock);
+
+	while (refill > 0) {
+		/* No slabs available we must grow the cache */
+		if (list_empty(&skc->skc_partial_list)) {
+			spin_unlock(&skc->skc_lock);
+
+			sks = spl_cache_grow(skc, flags);
+			if (!sks)
+				GOTO(out, rc);
+
+			/* Rescheduled to different CPU skm is not local */
+			if (skm != skc->skc_mag[smp_processor_id()])
+				GOTO(out, rc);
+
+			/* Potentially rescheduled to the same CPU but
+			 * allocations may have occured from this CPU while
+			 * we were sleeping so recalculate max refill. */
+			refill = MIN(refill, skm->skm_size - skm->skm_avail);
+
+			spin_lock(&skc->skc_lock);
+			continue;
+		}
+
+		/* Grab the next available slab */
+		sks = list_entry((&skc->skc_partial_list)->next,
+		                 spl_kmem_slab_t, sks_list);
+		ASSERT(sks->sks_magic == SKS_MAGIC);
+		ASSERT(sks->sks_ref < sks->sks_objs);
+		ASSERT(!list_empty(&sks->sks_free_list));
+
+		/* Consume as many objects as needed to refill the requested
+		 * cache.  We must also be careful not to overfill it. */
+		while (sks->sks_ref < sks->sks_objs && refill-- > 0 && ++rc) {
+			ASSERT(skm->skm_avail < skm->skm_size);
+			ASSERT(rc < skm->skm_size);
+			skm->skm_objs[skm->skm_avail++]=spl_cache_obj(skc,sks);
+		}
+
+		/* Move slab to skc_complete_list when full */
+		if (sks->sks_ref == sks->sks_objs) {
+			list_del(&sks->sks_list);
+			list_add(&sks->sks_list, &skc->skc_complete_list);
+		}
+	}
+
+	spin_unlock(&skc->skc_lock);
+out:
+	/* Returns the number of entries added to cache */
+	RETURN(rc);
+}
+
+static void
+spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_slab_t *sks = NULL;
+	spl_kmem_obj_t *sko = NULL;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	sko = obj + skc->skc_obj_size;
+	ASSERT(sko->sko_magic == SKO_MAGIC);
+
+	sks = sko->sko_slab;
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(sks->sks_cache == skc);
+	list_add(&sko->sko_list, &sks->sks_free_list);
+
+	sks->sks_age = jiffies;
+	sks->sks_ref--;
+	skc->skc_obj_alloc--;
+
+	/* Move slab to skc_partial_list when no longer full.  Slabs
+	 * are added to the head to keep the partial list is quasi-full
+	 * sorted order.  Fuller at the head, emptier at the tail. */
+	if (sks->sks_ref == (sks->sks_objs - 1)) {
+		list_del(&sks->sks_list);
+		list_add(&sks->sks_list, &skc->skc_partial_list);
+	}
+
+	/* Move emply slabs to the end of the partial list so
+	 * they can be easily found and freed during reclamation. */
+	if (sks->sks_ref == 0) {
+		list_del(&sks->sks_list);
+		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+		skc->skc_slab_alloc--;
+	}
+
+	EXIT;
+}
+
+static int
+spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
+{
+	int i, count = MIN(flush, skm->skm_avail);
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	spin_lock(&skc->skc_lock);
+
+	for (i = 0; i < count; i++)
+		spl_cache_shrink(skc, skm->skm_objs[i]);
+
+//	__spl_slab_reclaim(skc);
+	skm->skm_avail -= count;
+	memmove(skm->skm_objs, &(skm->skm_objs[count]),
+	        sizeof(void *) * skm->skm_avail);
+
+	spin_unlock(&skc->skc_lock);
+
+	RETURN(count);
+}
+
+void *
+spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_magazine_t *skm;
+	unsigned long irq_flags;
+	void *obj = NULL;
+	int id;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(flags & KM_SLEEP); /* XXX: KM_NOSLEEP not yet supported */
+	local_irq_save(irq_flags);
+
+restart:
+	/* Safe to update per-cpu structure without lock, but
+	 * in the restart case we must be careful to reaquire
+	 * the local magazine since this may have changed
+	 * when we need to grow the cache. */
+	id = smp_processor_id();
+	ASSERTF(id < 4, "cache=%p smp_processor_id=%d\n", skc, id);
+	skm = skc->skc_mag[smp_processor_id()];
+	ASSERTF(skm->skm_magic == SKM_MAGIC, "%x != %x: %s/%p/%p %x/%x/%x\n",
+		skm->skm_magic, SKM_MAGIC, skc->skc_name, skc, skm,
+		skm->skm_size, skm->skm_refill, skm->skm_avail);
+
+	if (likely(skm->skm_avail)) {
+		/* Object available in CPU cache, use it */
+		obj = skm->skm_objs[--skm->skm_avail];
+		if (!(skc->skc_flags & KMC_NOTOUCH))
+			skm->skm_age = jiffies;
+	} else {
+		/* Per-CPU cache empty, directly allocate from
+		 * the slab and refill the per-CPU cache. */
+		(void)spl_cache_refill(skc, skm, flags);
+		GOTO(restart, obj = NULL);
+	}
+
+	local_irq_restore(irq_flags);
+	ASSERT(obj);
+
+	/* Pre-emptively migrate object to CPU L1 cache */
+	prefetchw(obj);
+
+	RETURN(obj);
+}
+EXPORT_SYMBOL(spl_kmem_cache_alloc);
+
+void
+spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_magazine_t *skm;
+	unsigned long flags;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	local_irq_save(flags);
+
+	/* Safe to update per-cpu structure without lock, but
+	 * no remote memory allocation tracking is being performed
+	 * it is entirely possible to allocate an object from one
+	 * CPU cache and return it to another. */
+	skm = skc->skc_mag[smp_processor_id()];
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	/* Per-CPU cache full, flush it to make space */
+	if (unlikely(skm->skm_avail >= skm->skm_size))
+		(void)spl_cache_flush(skc, skm, skm->skm_refill);
+
+	/* Available space in cache, use it */
+	skm->skm_objs[skm->skm_avail++] = obj;
+
+	local_irq_restore(flags);
+
+	EXIT;
+}
+EXPORT_SYMBOL(spl_kmem_cache_free);
+
+static int
+spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
+{
+	spl_kmem_cache_t *skc;
+
+	/* Under linux a shrinker is not tightly coupled with a slab
+	 * cache.  In fact linux always systematically trys calling all
+	 * registered shrinker callbacks until its target reclamation level
+	 * is reached.  Because of this we only register one shrinker
+	 * function in the shim layer for all slab caches.  And we always
+	 * attempt to shrink all caches when this generic shrinker is called.
+	 */
+	down_read(&spl_kmem_cache_sem);
+
+	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list)
+		spl_kmem_cache_reap_now(skc);
+
+	up_read(&spl_kmem_cache_sem);
+
+	/* XXX: Under linux we should return the remaining number of
+	 * entries in the cache.  We should do this as well.
+	 */
+	return 1;
+}
+
+void
+spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
+{
+	spl_kmem_magazine_t *skm;
+	int i;
+	ENTRY;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	if (skc->skc_reclaim)
+		skc->skc_reclaim(skc->skc_private);
+
+	/* Ensure per-CPU caches which are idle gradually flush */
+	for_each_online_cpu(i) {
+		skm = skc->skc_mag[i];
+
+		if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
+			(void)spl_cache_flush(skc, skm, skm->skm_refill);
+	}
+
+	spl_slab_reclaim(skc);
+
+	EXIT;
+}
+EXPORT_SYMBOL(spl_kmem_cache_reap_now);
+
+void
+spl_kmem_reap(void)
+{
+	spl_kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
+}
+EXPORT_SYMBOL(spl_kmem_reap);
+
+#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
+static char *
+spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
+{
+	int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
+	int i, flag = 1;
+
+	ASSERT(str != NULL && len >= 17);
+	memset(str, 0, len);
+
+	/* Check for a fully printable string, and while we are at
+         * it place the printable characters in the passed buffer. */
+	for (i = 0; i < size; i++) {
+		str[i] = ((char *)(kd->kd_addr))[i];
+		if (isprint(str[i])) {
+			continue;
+		} else {
+			/* Minimum number of printable characters found
+			 * to make it worthwhile to print this as ascii. */
+			if (i > min)
+				break;
+
+			flag = 0;
+			break;
+		}
+	}
+
+	if (!flag) {
+		sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
+		        *((uint8_t *)kd->kd_addr),
+		        *((uint8_t *)kd->kd_addr + 2),
+		        *((uint8_t *)kd->kd_addr + 4),
+		        *((uint8_t *)kd->kd_addr + 6),
+		        *((uint8_t *)kd->kd_addr + 8),
+		        *((uint8_t *)kd->kd_addr + 10),
+		        *((uint8_t *)kd->kd_addr + 12),
+		        *((uint8_t *)kd->kd_addr + 14));
+	}
+
+	return str;
+}
+
+static int
+spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
+{
+	int i;
+	ENTRY;
+
+	spin_lock_init(lock);
+	INIT_LIST_HEAD(list);
+
+	for (i = 0; i < size; i++)
+		INIT_HLIST_HEAD(&kmem_table[i]);
+
+	RETURN(0);
+}
+
+static void
+spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
+{
+	unsigned long flags;
+	kmem_debug_t *kd;
+	char str[17];
+	ENTRY;
+
+	spin_lock_irqsave(lock, flags);
+	if (!list_empty(list))
+		printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
+		       "size", "data", "func", "line");
+
+	list_for_each_entry(kd, list, kd_list)
+		printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
+		       kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
+		       kd->kd_func, kd->kd_line);
+
+	spin_unlock_irqrestore(lock, flags);
+	EXIT;
+}
+#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
+#define spl_kmem_init_tracking(list, lock, size)
+#define spl_kmem_fini_tracking(list, lock)
+#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
+
+int
+spl_kmem_init(void)
+{
+	int rc = 0;
+	ENTRY;
+
+	init_rwsem(&spl_kmem_cache_sem);
+	INIT_LIST_HEAD(&spl_kmem_cache_list);
+
+#ifdef HAVE_SET_SHRINKER
+	spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
+					       spl_kmem_cache_generic_shrinker);
+	if (spl_kmem_cache_shrinker == NULL)
+		RETURN(rc = -ENOMEM);
+#else
+	register_shrinker(&spl_kmem_cache_shrinker);
+#endif
+
+#ifdef DEBUG_KMEM
+	atomic64_set(&kmem_alloc_used, 0);
+	atomic64_set(&vmem_alloc_used, 0);
+
+	spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
+	spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE);
+#endif
+	RETURN(rc);
+}
+
+void
+spl_kmem_fini(void)
+{
+#ifdef DEBUG_KMEM
+	/* Display all unreclaimed memory addresses, including the
+	 * allocation size and the first few bytes of what's located
+	 * at that address to aid in debugging.  Performance is not
+	 * a serious concern here since it is module unload time. */
+	if (atomic64_read(&kmem_alloc_used) != 0)
+		CWARN("kmem leaked %ld/%ld bytes\n",
+		      atomic64_read(&kmem_alloc_used), kmem_alloc_max);
+
+
+	if (atomic64_read(&vmem_alloc_used) != 0)
+		CWARN("vmem leaked %ld/%ld bytes\n",
+		      atomic64_read(&vmem_alloc_used), vmem_alloc_max);
+
+	spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
+	spl_kmem_fini_tracking(&vmem_list, &vmem_lock);
+#endif /* DEBUG_KMEM */
+	ENTRY;
+
+#ifdef HAVE_SET_SHRINKER
+	remove_shrinker(spl_kmem_cache_shrinker);
+#else
+	unregister_shrinker(&spl_kmem_cache_shrinker);
+#endif
+
+	EXIT;
+}
diff --git a/module/spl/spl-kobj.c b/module/spl/spl-kobj.c
new file mode 100644
index 000000000..e78cd9244
--- /dev/null
+++ b/module/spl/spl-kobj.c
@@ -0,0 +1,93 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/kobj.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_KOBJ
+
+struct _buf *
+kobj_open_file(const char *name)
+{
+	struct _buf *file;
+	vnode_t *vp;
+	int rc;
+	ENTRY;
+
+	file = kmalloc(sizeof(_buf_t), GFP_KERNEL);
+	if (file == NULL)
+		RETURN((_buf_t *)-1UL);
+
+	if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
+		kfree(file);
+		RETURN((_buf_t *)-1UL);
+	}
+
+	file->vp = vp;
+
+	RETURN(file);
+} /* kobj_open_file() */
+EXPORT_SYMBOL(kobj_open_file);
+
+void
+kobj_close_file(struct _buf *file)
+{
+	ENTRY;
+	VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
+	VN_RELE(file->vp);
+        kfree(file);
+        EXIT;
+} /* kobj_close_file() */
+EXPORT_SYMBOL(kobj_close_file);
+
+int
+kobj_read_file(struct _buf *file, char *buf, ssize_t size, offset_t off)
+{
+	ENTRY;
+	RETURN(vn_rdwr(UIO_READ, file->vp, buf, size, off,
+	       UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL));
+} /* kobj_read_file() */
+EXPORT_SYMBOL(kobj_read_file);
+
+int
+kobj_get_filesize(struct _buf *file, uint64_t *size)
+{
+        vattr_t vap;
+	int rc;
+	ENTRY;
+
+	rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
+	if (rc)
+		RETURN(rc);
+
+        *size = vap.va_size;
+
+        RETURN(rc);
+} /* kobj_get_filesize() */
+EXPORT_SYMBOL(kobj_get_filesize);
diff --git a/module/spl/spl-kstat.c b/module/spl/spl-kstat.c
new file mode 100644
index 000000000..bb6e9a504
--- /dev/null
+++ b/module/spl/spl-kstat.c
@@ -0,0 +1,496 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/kstat.h>
+
+#ifdef DEBUG_KSTAT
+
+static spinlock_t kstat_lock;
+static struct list_head kstat_list;
+static kid_t kstat_id;
+
+static void
+kstat_seq_show_headers(struct seq_file *f)
+{
+        kstat_t *ksp = (kstat_t *)f->private;
+        ASSERT(ksp->ks_magic == KS_MAGIC);
+
+        seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
+		   ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
+		   ksp->ks_ndata, (int)ksp->ks_data_size,
+		   ksp->ks_crtime, ksp->ks_snaptime);
+
+	switch (ksp->ks_type) {
+                case KSTAT_TYPE_RAW:
+                        seq_printf(f, "raw data");
+                        break;
+                case KSTAT_TYPE_NAMED:
+                        seq_printf(f, "%-31s %-4s %s\n",
+                                   "name", "type", "data");
+                        break;
+                case KSTAT_TYPE_INTR:
+                        seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
+                                   "hard", "soft", "watchdog",
+                                   "spurious", "multsvc");
+                        break;
+                case KSTAT_TYPE_IO:
+                        seq_printf(f,
+                                   "%-8s %-8s %-8s %-8s %-8s %-8s "
+                                   "%-8s %-8s %-8s %-8s %-8s %-8s\n",
+                                   "nread", "nwritten", "reads", "writes",
+                                   "wtime", "wlentime", "wupdate",
+                                   "rtime", "rlentime", "rupdate",
+                                   "wcnt", "rcnt");
+                        break;
+                case KSTAT_TYPE_TIMER:
+                        seq_printf(f,
+                                   "%-31s %-8s "
+                                   "%-8s %-8s %-8s %-8s %-8s\n",
+                                   "name", "events", "elapsed",
+                                   "min", "max", "start", "stop");
+                        break;
+                default:
+                        SBUG(); /* Unreachable */
+        }
+}
+
+static int
+kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
+{
+        int i, j;
+
+        for (i = 0; ; i++) {
+                seq_printf(f, "%03x:", i);
+
+                for (j = 0; j < 16; j++) {
+                        if (i * 16 + j >= l) {
+                                seq_printf(f, "\n");
+                                goto out;
+                        }
+
+                        seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
+                }
+                seq_printf(f, "\n");
+        }
+out:
+        return 0;
+}
+
+static int
+kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
+{
+        seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
+
+        switch (knp->data_type) {
+                case KSTAT_DATA_CHAR:
+                        knp->value.c[15] = '\0'; /* NULL terminate */
+                        seq_printf(f, "%-16s", knp->value.c);
+                        break;
+                /* XXX - We need to be more careful able what tokens are
+                 * used for each arch, for now this is correct for x86_64.
+                 */
+                case KSTAT_DATA_INT32:
+                        seq_printf(f, "%d", knp->value.i32);
+                        break;
+                case KSTAT_DATA_UINT32:
+                        seq_printf(f, "%u", knp->value.ui32);
+                        break;
+                case KSTAT_DATA_INT64:
+                        seq_printf(f, "%lld", (signed long long)knp->value.i64);
+                        break;
+                case KSTAT_DATA_UINT64:
+                        seq_printf(f, "%llu", (unsigned long long)knp->value.ui64);
+                        break;
+                case KSTAT_DATA_LONG:
+                        seq_printf(f, "%ld", knp->value.l);
+                        break;
+                case KSTAT_DATA_ULONG:
+                        seq_printf(f, "%lu", knp->value.ul);
+                        break;
+                case KSTAT_DATA_STRING:
+                        KSTAT_NAMED_STR_PTR(knp)
+                                [KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
+                        seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
+                        break;
+                default:
+                        SBUG(); /* Unreachable */
+        }
+
+        seq_printf(f, "\n");
+
+        return 0;
+}
+
+static int
+kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
+{
+        seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
+                   kip->intrs[KSTAT_INTR_HARD],
+                   kip->intrs[KSTAT_INTR_SOFT],
+                   kip->intrs[KSTAT_INTR_WATCHDOG],
+                   kip->intrs[KSTAT_INTR_SPURIOUS],
+                   kip->intrs[KSTAT_INTR_MULTSVC]);
+
+        return 0;
+}
+
+static int
+kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
+{
+        seq_printf(f,
+                   "%-8llu %-8llu %-8u %-8u %-8lld %-8lld "
+                   "%-8lld %-8lld %-8lld %-8lld %-8u %-8u\n",
+                   kip->nread, kip->nwritten,
+                   kip->reads, kip->writes,
+                   kip->wtime, kip->wlentime, kip->wlastupdate,
+                   kip->rtime, kip->wlentime, kip->rlastupdate,
+                   kip->wcnt,  kip->rcnt);
+
+        return 0;
+}
+
+static int
+kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
+{
+        seq_printf(f,
+                   "%-31s %-8llu %-8lld %-8lld %-8lld %-8lld %-8lld\n",
+                   ktp->name, ktp->num_events, ktp->elapsed_time,
+                   ktp->min_time, ktp->max_time,
+                   ktp->start_time, ktp->stop_time);
+
+        return 0;
+}
+
+static int
+kstat_seq_show(struct seq_file *f, void *p)
+{
+        kstat_t *ksp = (kstat_t *)f->private;
+        int rc = 0;
+
+        ASSERT(ksp->ks_magic == KS_MAGIC);
+
+	switch (ksp->ks_type) {
+                case KSTAT_TYPE_RAW:
+                        ASSERT(ksp->ks_ndata == 1);
+                        rc = kstat_seq_show_raw(f, ksp->ks_data,
+                                                ksp->ks_data_size);
+                        break;
+                case KSTAT_TYPE_NAMED:
+                        rc = kstat_seq_show_named(f, (kstat_named_t *)p);
+                        break;
+                case KSTAT_TYPE_INTR:
+                        rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
+                        break;
+                case KSTAT_TYPE_IO:
+                        rc = kstat_seq_show_io(f, (kstat_io_t *)p);
+                        break;
+                case KSTAT_TYPE_TIMER:
+                        rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
+                        break;
+                default:
+                        SBUG(); /* Unreachable */
+        }
+
+        return rc;
+}
+
+static void *
+kstat_seq_data_addr(kstat_t *ksp, loff_t n)
+{
+        void *rc = NULL;
+        ENTRY;
+
+	switch (ksp->ks_type) {
+                case KSTAT_TYPE_RAW:
+	                rc = ksp->ks_data;
+                        break;
+                case KSTAT_TYPE_NAMED:
+                        rc = ksp->ks_data + n * sizeof(kstat_named_t);
+                        break;
+                case KSTAT_TYPE_INTR:
+                        rc = ksp->ks_data + n * sizeof(kstat_intr_t);
+                        break;
+                case KSTAT_TYPE_IO:
+                        rc = ksp->ks_data + n * sizeof(kstat_io_t);
+                        break;
+                case KSTAT_TYPE_TIMER:
+                        rc = ksp->ks_data + n * sizeof(kstat_timer_t);
+                        break;
+                default:
+                        SBUG(); /* Unreachable */
+        }
+
+        RETURN(rc);
+}
+
+static void *
+kstat_seq_start(struct seq_file *f, loff_t *pos)
+{
+        loff_t n = *pos;
+        kstat_t *ksp = (kstat_t *)f->private;
+        ASSERT(ksp->ks_magic == KS_MAGIC);
+        ENTRY;
+
+        spin_lock(&ksp->ks_lock);
+	ksp->ks_snaptime = gethrtime();
+
+        if (!n)
+                kstat_seq_show_headers(f);
+
+        if (n >= ksp->ks_ndata)
+                RETURN(NULL);
+
+        RETURN(kstat_seq_data_addr(ksp, n));
+}
+
+static void *
+kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+        kstat_t *ksp = (kstat_t *)f->private;
+        ASSERT(ksp->ks_magic == KS_MAGIC);
+        ENTRY;
+
+        ++*pos;
+        if (*pos >= ksp->ks_ndata)
+                RETURN(NULL);
+
+        RETURN(kstat_seq_data_addr(ksp, *pos));
+}
+
+static void
+kstat_seq_stop(struct seq_file *f, void *v)
+{
+        kstat_t *ksp = (kstat_t *)f->private;
+        ASSERT(ksp->ks_magic == KS_MAGIC);
+
+        spin_unlock(&ksp->ks_lock);
+}
+
+static struct seq_operations kstat_seq_ops = {
+        .show  = kstat_seq_show,
+        .start = kstat_seq_start,
+        .next  = kstat_seq_next,
+        .stop  = kstat_seq_stop,
+};
+
+static int
+proc_kstat_open(struct inode *inode, struct file *filp)
+{
+        struct seq_file *f;
+        int rc;
+
+        rc = seq_open(filp, &kstat_seq_ops);
+        if (rc)
+                return rc;
+
+        f = filp->private_data;
+        f->private = PDE(inode)->data;
+
+        return rc;
+}
+
+static struct file_operations proc_kstat_operations = {
+        .open           = proc_kstat_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+
+kstat_t *
+__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
+             const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
+             uchar_t ks_flags)
+{
+	kstat_t *ksp;
+
+	ASSERT(ks_module);
+	ASSERT(ks_instance == 0);
+	ASSERT(ks_name);
+	ASSERT(!(ks_flags & KSTAT_FLAG_UNSUPPORTED));
+
+	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
+                ASSERT(ks_ndata == 1);
+
+	ksp = kmem_zalloc(sizeof(*ksp), KM_SLEEP);
+	if (ksp == NULL)
+		return ksp;
+
+	spin_lock(&kstat_lock);
+	ksp->ks_kid = kstat_id;
+        kstat_id++;
+	spin_unlock(&kstat_lock);
+
+        ksp->ks_magic = KS_MAGIC;
+	spin_lock_init(&ksp->ks_lock);
+	INIT_LIST_HEAD(&ksp->ks_list);
+
+	ksp->ks_crtime = gethrtime();
+        ksp->ks_snaptime = ksp->ks_crtime;
+	strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
+	ksp->ks_instance = ks_instance;
+	strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
+	strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
+	ksp->ks_type = ks_type;
+	ksp->ks_flags = ks_flags;
+
+	switch (ksp->ks_type) {
+                case KSTAT_TYPE_RAW:
+	                ksp->ks_ndata = 1;
+                        ksp->ks_data_size = ks_ndata;
+                        break;
+                case KSTAT_TYPE_NAMED:
+	                ksp->ks_ndata = ks_ndata;
+                        ksp->ks_data_size = ks_ndata * sizeof(kstat_named_t);
+                        break;
+                case KSTAT_TYPE_INTR:
+	                ksp->ks_ndata = ks_ndata;
+                        ksp->ks_data_size = ks_ndata * sizeof(kstat_intr_t);
+                        break;
+                case KSTAT_TYPE_IO:
+	                ksp->ks_ndata = ks_ndata;
+                        ksp->ks_data_size = ks_ndata * sizeof(kstat_io_t);
+                        break;
+                case KSTAT_TYPE_TIMER:
+	                ksp->ks_ndata = ks_ndata;
+                        ksp->ks_data_size = ks_ndata * sizeof(kstat_timer_t);
+                        break;
+                default:
+                        SBUG(); /* Unreachable */
+        }
+
+	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
+                ksp->ks_data = NULL;
+        } else {
+                ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_SLEEP);
+                if (ksp->ks_data == NULL) {
+                        kmem_free(ksp, sizeof(*ksp));
+                        ksp = NULL;
+                }
+        }
+
+	return ksp;
+}
+EXPORT_SYMBOL(__kstat_create);
+
+void
+__kstat_install(kstat_t *ksp)
+{
+	struct proc_dir_entry *de_module, *de_name;
+	kstat_t *tmp;
+	int rc = 0;
+	ENTRY;
+
+	spin_lock(&kstat_lock);
+
+	/* Item may only be added to the list once */
+        list_for_each_entry(tmp, &kstat_list, ks_list) {
+                if (tmp == ksp) {
+		        spin_unlock(&kstat_lock);
+			GOTO(out, rc = -EEXIST);
+		}
+	}
+
+        list_add_tail(&ksp->ks_list, &kstat_list);
+	spin_unlock(&kstat_lock);
+
+	de_module = proc_dir_entry_find(proc_spl_kstat, ksp->ks_module);
+	if (de_module == NULL) {
+                de_module = proc_mkdir(ksp->ks_module, proc_spl_kstat);
+		if (de_module == NULL)
+			GOTO(out, rc = -EUNATCH);
+	}
+
+	de_name = create_proc_entry(ksp->ks_name, 0444, de_module);
+	if (de_name == NULL)
+		GOTO(out, rc = -EUNATCH);
+
+	spin_lock(&ksp->ks_lock);
+	ksp->ks_proc = de_name;
+	de_name->proc_fops = &proc_kstat_operations;
+        de_name->data = (void *)ksp;
+	spin_unlock(&ksp->ks_lock);
+out:
+	if (rc) {
+		spin_lock(&kstat_lock);
+	        list_del_init(&ksp->ks_list);
+		spin_unlock(&kstat_lock);
+	}
+
+	EXIT;
+}
+EXPORT_SYMBOL(__kstat_install);
+
+void
+__kstat_delete(kstat_t *ksp)
+{
+	struct proc_dir_entry *de_module;
+
+	spin_lock(&kstat_lock);
+        list_del_init(&ksp->ks_list);
+	spin_unlock(&kstat_lock);
+
+        if (ksp->ks_proc) {
+	        de_module = ksp->ks_proc->parent;
+	        remove_proc_entry(ksp->ks_name, de_module);
+
+	        /* Remove top level module directory if it's empty */
+	        if (proc_dir_entries(de_module) == 0)
+		        remove_proc_entry(de_module->name, de_module->parent);
+	}
+
+	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
+                kmem_free(ksp->ks_data, ksp->ks_data_size);
+
+	kmem_free(ksp, sizeof(*ksp));
+
+	return;
+}
+EXPORT_SYMBOL(__kstat_delete);
+
+#endif /* DEBUG_KSTAT */
+
+int
+kstat_init(void)
+{
+	ENTRY;
+#ifdef DEBUG_KSTAT
+	spin_lock_init(&kstat_lock);
+	INIT_LIST_HEAD(&kstat_list);
+        kstat_id = 0;
+#endif /* DEBUG_KSTAT */
+	RETURN(0);
+}
+
+void
+kstat_fini(void)
+{
+	ENTRY;
+#ifdef DEBUG_KSTAT
+	ASSERT(list_empty(&kstat_list));
+#endif /* DEBUG_KSTAT */
+	EXIT;
+}
+
diff --git a/module/spl/spl-module.c b/module/spl/spl-module.c
new file mode 100644
index 000000000..c1d030f24
--- /dev/null
+++ b/module/spl/spl-module.c
@@ -0,0 +1,331 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/sunddi.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_MODULE
+
+static spinlock_t dev_info_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(dev_info_list);
+
+static struct dev_info *
+get_dev_info(dev_t dev)
+{
+	struct dev_info *di;
+
+	spin_lock(&dev_info_lock);
+
+        list_for_each_entry(di, &dev_info_list, di_list)
+		if (di->di_dev == dev)
+			goto out;
+
+	di = NULL;
+out:
+	spin_unlock(&dev_info_lock);
+	return di;
+}
+
+static int
+mod_generic_ioctl(struct inode *ino, struct file *filp,
+		  unsigned int cmd, unsigned long arg)
+{
+	struct dev_info *di;
+	int rc, flags = 0, rvalp = 0;
+	cred_t *cr = NULL;
+
+	di = get_dev_info(MKDEV(imajor(ino), iminor(ino)));
+	if (di == NULL)
+		return EINVAL;
+
+	rc = di->di_ops->devo_cb_ops->cb_ioctl(di->di_dev,
+	                                       (int)cmd,(intptr_t)arg,
+	                                       flags, cr, &rvalp);
+	return rc;
+}
+
+int
+__ddi_create_minor_node(dev_info_t *di, char *name, int spec_type,
+                        minor_t minor_num, char *node_type,
+		        int flags, struct module *mod)
+{
+	struct cdev *cdev;
+	struct dev_ops *dev_ops;
+	struct cb_ops *cb_ops;
+	struct file_operations *fops;
+	int rc;
+	ENTRY;
+
+	ASSERT(spec_type == S_IFCHR);
+	ASSERT(minor_num < di->di_minors);
+	ASSERT(!strcmp(node_type, DDI_PSEUDO));
+
+	fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
+	if (fops == NULL)
+		RETURN(DDI_FAILURE);
+
+	cdev = cdev_alloc();
+	if (cdev == NULL) {
+		kfree(fops);
+		RETURN(DDI_FAILURE);
+	}
+
+	cdev->ops = fops;
+
+	mutex_enter(&di->di_lock);
+	dev_ops = di->di_ops;
+	ASSERT(dev_ops);
+	cb_ops = di->di_ops->devo_cb_ops;
+	ASSERT(cb_ops);
+
+	/* Setup the fops to cb_ops mapping */
+	fops->owner = mod;
+	if (cb_ops->cb_ioctl)
+		fops->ioctl = mod_generic_ioctl;
+
+#if 0
+	if (cb_ops->cb_open)
+		fops->open = mod_generic_open;
+
+	if (cb_ops->cb_close)
+		fops->release = mod_generic_close;
+
+	if (cb_ops->cb_read)
+		fops->read = mod_generic_read;
+
+	if (cb_ops->cb_write)
+		fops->write = mod_generic_write;
+#endif
+	/* XXX: Currently unsupported operations */
+	ASSERT(cb_ops->cb_open == NULL);
+	ASSERT(cb_ops->cb_close == NULL);
+	ASSERT(cb_ops->cb_read == NULL);
+	ASSERT(cb_ops->cb_write == NULL);
+	ASSERT(cb_ops->cb_strategy == NULL);
+	ASSERT(cb_ops->cb_print == NULL);
+	ASSERT(cb_ops->cb_dump == NULL);
+	ASSERT(cb_ops->cb_devmap == NULL);
+	ASSERT(cb_ops->cb_mmap == NULL);
+	ASSERT(cb_ops->cb_segmap == NULL);
+	ASSERT(cb_ops->cb_chpoll == NULL);
+	ASSERT(cb_ops->cb_prop_op == NULL);
+	ASSERT(cb_ops->cb_str == NULL);
+	ASSERT(cb_ops->cb_aread == NULL);
+	ASSERT(cb_ops->cb_awrite == NULL);
+
+	di->di_cdev  = cdev;
+	di->di_flags = flags;
+	di->di_minor = minor_num;
+	di->di_dev   = MKDEV(di->di_major, di->di_minor);
+
+	rc = cdev_add(cdev, di->di_dev, 1);
+	if (rc) {
+		CERROR("Error adding cdev, %d\n", rc);
+		kfree(fops);
+		cdev_del(cdev);
+		mutex_exit(&di->di_lock);
+		RETURN(DDI_FAILURE);
+	}
+
+	spin_lock(&dev_info_lock);
+	list_add(&di->di_list, &dev_info_list);
+	spin_unlock(&dev_info_lock);
+
+	mutex_exit(&di->di_lock);
+
+	RETURN(DDI_SUCCESS);
+}
+EXPORT_SYMBOL(__ddi_create_minor_node);
+
+static void
+__ddi_remove_minor_node_locked(dev_info_t *di, char *name)
+{
+	if (di->di_cdev) {
+		cdev_del(di->di_cdev);
+		di->di_cdev = NULL;
+	}
+
+	spin_lock(&dev_info_lock);
+        list_del_init(&di->di_list);
+	spin_unlock(&dev_info_lock);
+}
+
+void
+__ddi_remove_minor_node(dev_info_t *di, char *name)
+{
+	ENTRY;
+	mutex_enter(&di->di_lock);
+	__ddi_remove_minor_node_locked(di, name);
+	mutex_exit(&di->di_lock);
+	EXIT;
+}
+EXPORT_SYMBOL(__ddi_remove_minor_node);
+
+int
+ddi_quiesce_not_needed(dev_info_t *dip)
+{
+	RETURN(DDI_SUCCESS);
+}
+EXPORT_SYMBOL(ddi_quiesce_not_needed);
+
+#if 0
+static int
+mod_generic_open(struct inode *, struct file *)
+{
+	open(dev_t *devp, int flags, int otyp, cred_t *credp);
+}
+
+static int
+mod_generic_close(struct inode *, struct file *)
+{
+	close(dev_t dev, int flags, int otyp, cred_t *credp);
+}
+
+static ssize_t
+mod_generic_read(struct file *, char __user *, size_t, loff_t *)
+{
+	read(dev_t dev, struct uio *uiop, cred_t *credp);
+}
+
+static ssize_t
+mod_generic_write(struct file *, const char __user *, size_t, loff_t *)
+{
+	write(dev_t dev, struct uio *uiop, cred_t *credp);
+}
+#endif
+
+static struct dev_info *
+dev_info_alloc(major_t major, minor_t minors, struct dev_ops *ops) {
+	struct dev_info *di;
+
+	di = kmalloc(sizeof(struct dev_info), GFP_KERNEL);
+	if (di == NULL)
+		return NULL;
+
+	mutex_init(&di->di_lock, NULL, MUTEX_DEFAULT, NULL);
+	INIT_LIST_HEAD(&di->di_list);
+	di->di_ops = ops;
+	di->di_class = NULL;
+	di->di_cdev = NULL;
+	di->di_major = major;
+	di->di_minor = 0;
+	di->di_minors = minors;
+	di->di_dev = 0;
+
+	return di;
+}
+
+static void
+dev_info_free(struct dev_info *di)
+{
+	mutex_enter(&di->di_lock);
+	__ddi_remove_minor_node_locked(di, NULL);
+	mutex_exit(&di->di_lock);
+	mutex_destroy(&di->di_lock);
+	kfree(di);
+}
+
+int
+__mod_install(struct modlinkage *modlp)
+{
+	struct modldrv *drv = modlp->ml_modldrv;
+	struct dev_info *di;
+	int rc;
+	ENTRY;
+
+	di = dev_info_alloc(modlp->ml_major, modlp->ml_minors,
+			    drv->drv_dev_ops);
+	if (di == NULL)
+		RETURN(ENOMEM);
+
+	/* XXX: Really we need to be calling devo_probe if it's available
+	 * and then calling devo_attach for each device discovered.  However
+	 * for now we just call it once and let the app sort it out.
+	 */
+	rc = drv->drv_dev_ops->devo_attach(di, DDI_ATTACH);
+	if (rc != DDI_SUCCESS) {
+		dev_info_free(di);
+		RETURN(rc);
+	}
+
+	drv->drv_dev_info = di;
+
+	RETURN(DDI_SUCCESS);
+}
+EXPORT_SYMBOL(__mod_install);
+
+int
+__mod_remove(struct modlinkage *modlp)
+{
+	struct modldrv *drv = modlp->ml_modldrv;
+	struct dev_info *di = drv->drv_dev_info;
+	int rc;
+	ENTRY;
+
+	rc = drv->drv_dev_ops->devo_detach(di, DDI_DETACH);
+	if (rc != DDI_SUCCESS)
+		RETURN(rc);
+
+	dev_info_free(di);
+	drv->drv_dev_info = NULL;
+
+	RETURN(DDI_SUCCESS);
+}
+EXPORT_SYMBOL(__mod_remove);
+
+int
+ldi_ident_from_mod(struct modlinkage *modlp, ldi_ident_t *lip)
+{
+	ldi_ident_t li;
+	ENTRY;
+
+	ASSERT(modlp);
+	ASSERT(lip);
+
+	li = kmalloc(sizeof(struct ldi_ident), GFP_KERNEL);
+	if (li == NULL)
+		RETURN(ENOMEM);
+
+	li->li_dev = MKDEV(modlp->ml_major, 0);
+	*lip = li;
+
+	RETURN(0);
+}
+EXPORT_SYMBOL(ldi_ident_from_mod);
+
+void
+ldi_ident_release(ldi_ident_t lip)
+{
+	ENTRY;
+	ASSERT(lip);
+	kfree(lip);
+	EXIT;
+}
+EXPORT_SYMBOL(ldi_ident_release);
diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c
new file mode 100644
index 000000000..e7ec41cf4
--- /dev/null
+++ b/module/spl/spl-mutex.c
@@ -0,0 +1,309 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/mutex.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_MUTEX
+
+/* Mutex implementation based on those found in Solaris.  This means
+ * they the MUTEX_DEFAULT type is an adaptive mutex.  When calling
+ * mutex_enter() your process will spin waiting for the lock if it's
+ * likely the lock will be free'd shortly.  If it looks like the
+ * lock will be held for a longer time we schedule and sleep waiting
+ * for it.  This determination is made by checking if the holder of
+ * the lock is currently running on cpu or sleeping waiting to be
+ * scheduled.  If the holder is currently running it's likely the
+ * lock will be shortly dropped.
+ *
+ * XXX: This is basically a rough implementation to see if this
+ * helps our performance.  If it does a more careful implementation
+ * should be done, perhaps in assembly.
+ */
+
+/*  0:         Never spin when trying to aquire lock
+ * -1:         Spin until aquired or holder yeilds without dropping lock
+ *  1-MAX_INT: Spin for N attempts before sleeping for lock
+ */
+int mutex_spin_max = 0;
+
+#ifdef DEBUG_MUTEX
+int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
+spinlock_t mutex_stats_lock;
+struct list_head mutex_stats_list;
+#endif
+
+int
+__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
+{
+	int flags = KM_SLEEP;
+
+	ASSERT(mp);
+	ASSERT(name);
+	ASSERT(ibc == NULL);
+	ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
+
+	mp->km_name = NULL;
+	mp->km_name_size = strlen(name) + 1;
+
+	switch (type) {
+		case MUTEX_DEFAULT:
+			mp->km_type = MUTEX_ADAPTIVE;
+			break;
+		case MUTEX_SPIN:
+		case MUTEX_ADAPTIVE:
+			mp->km_type = type;
+			break;
+		default:
+			SBUG();
+	}
+
+	/* We may be called when there is a non-zero preempt_count or
+	 * interrupts are disabled is which case we must not sleep.
+	 */
+        if (current_thread_info()->preempt_count || irqs_disabled())
+		flags = KM_NOSLEEP;
+
+	/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
+	mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags);
+	if (mp->km_sem == NULL)
+		return -ENOMEM;
+
+	mp->km_name = kmem_alloc(mp->km_name_size, flags);
+	if (mp->km_name == NULL) {
+		kmem_free(mp->km_sem, sizeof(struct semaphore));
+		return -ENOMEM;
+	}
+
+	sema_init(mp->km_sem, 1);
+	strncpy(mp->km_name, name, mp->km_name_size);
+
+#ifdef DEBUG_MUTEX
+	mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags);
+        if (mp->km_stats == NULL) {
+		kmem_free(mp->km_name, mp->km_name_size);
+		kmem_free(mp->km_sem, sizeof(struct semaphore));
+		return -ENOMEM;
+	}
+
+	/* XXX - This appears to be a much more contended lock than I
+	 * would have expected.  To run with this debugging enabled and
+	 * get reasonable performance we may need to be more clever and
+	 * do something like hash the mutex ptr on to one of several
+	 * lists to ease this single point of contention.
+	 */
+	spin_lock(&mutex_stats_lock);
+	list_add_tail(&mp->km_list, &mutex_stats_list);
+	spin_unlock(&mutex_stats_lock);
+#endif
+	mp->km_magic = KM_MAGIC;
+	mp->km_owner = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(__spl_mutex_init);
+
+void
+__spl_mutex_destroy(kmutex_t *mp)
+{
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+
+#ifdef DEBUG_MUTEX
+	spin_lock(&mutex_stats_lock);
+	list_del_init(&mp->km_list);
+	spin_unlock(&mutex_stats_lock);
+
+	kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
+#endif
+	kmem_free(mp->km_name, mp->km_name_size);
+	kmem_free(mp->km_sem, sizeof(struct semaphore));
+
+	memset(mp, KM_POISON, sizeof(*mp));
+}
+EXPORT_SYMBOL(__spl_mutex_destroy);
+
+/* Return 1 if we acquired the mutex, else zero.  */
+int
+__mutex_tryenter(kmutex_t *mp)
+{
+	int rc;
+	ENTRY;
+
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
+	MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
+
+	rc = down_trylock(mp->km_sem);
+	if (rc == 0) {
+		ASSERT(mp->km_owner == NULL);
+		mp->km_owner = current;
+		MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
+		MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
+	}
+
+	RETURN(!rc);
+}
+EXPORT_SYMBOL(__mutex_tryenter);
+
+#ifndef HAVE_TASK_CURR
+#define task_curr(owner)                0
+#endif
+
+
+static void
+mutex_enter_adaptive(kmutex_t *mp)
+{
+	struct task_struct *owner;
+	int count = 0;
+
+	/* Lock is not held so we expect to aquire the lock */
+	if ((owner = mp->km_owner) == NULL) {
+		down(mp->km_sem);
+		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
+		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
+	} else {
+		/* The lock is held by a currently running task which
+		 * we expect will drop the lock before leaving the
+		 * head of the runqueue.  So the ideal thing to do
+		 * is spin until we aquire the lock and avoid a
+		 * context switch.  However it is also possible the
+		 * task holding the lock yields the processor with
+		 * out dropping lock.  In which case, we know it's
+		 * going to be a while so we stop spinning and go
+		 * to sleep waiting for the lock to be available.
+		 * This should strike the optimum balance between
+		 * spinning and sleeping waiting for a lock.
+		 */
+		while (task_curr(owner) && (count <= mutex_spin_max)) {
+			if (down_trylock(mp->km_sem) == 0) {
+				MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+				MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+				GOTO(out, count);
+			}
+			count++;
+		}
+
+		/* The lock is held by a sleeping task so it's going to
+		 * cost us minimally one context switch.  We might as
+		 * well sleep and yield the processor to other tasks.
+		 */
+		down(mp->km_sem);
+		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
+		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
+	}
+out:
+	MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
+	MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
+}
+
+void
+__mutex_enter(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+
+	switch (mp->km_type) {
+		case MUTEX_SPIN:
+			while (down_trylock(mp->km_sem));
+			MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+			MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+			break;
+		case MUTEX_ADAPTIVE:
+			mutex_enter_adaptive(mp);
+			break;
+	}
+
+	ASSERT(mp->km_owner == NULL);
+	mp->km_owner = current;
+
+	EXIT;
+}
+EXPORT_SYMBOL(__mutex_enter);
+
+void
+__mutex_exit(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	ASSERT(mp->km_owner == current);
+	mp->km_owner = NULL;
+	up(mp->km_sem);
+	EXIT;
+}
+EXPORT_SYMBOL(__mutex_exit);
+
+/* Return 1 if mutex is held by current process, else zero.  */
+int
+__mutex_owned(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	RETURN(mp->km_owner == current);
+}
+EXPORT_SYMBOL(__mutex_owned);
+
+/* Return owner if mutex is owned, else NULL.  */
+kthread_t *
+__spl_mutex_owner(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	RETURN(mp->km_owner);
+}
+EXPORT_SYMBOL(__spl_mutex_owner);
+
+int
+spl_mutex_init(void)
+{
+	ENTRY;
+#ifdef DEBUG_MUTEX
+	spin_lock_init(&mutex_stats_lock);
+        INIT_LIST_HEAD(&mutex_stats_list);
+#endif
+	RETURN(0);
+}
+
+void
+spl_mutex_fini(void)
+{
+        ENTRY;
+#ifdef DEBUG_MUTEX
+	ASSERT(list_empty(&mutex_stats_list));
+#endif
+        EXIT;
+}
+
+module_param(mutex_spin_max, int, 0644);
+MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock");
diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c
new file mode 100644
index 000000000..bf185c60e
--- /dev/null
+++ b/module/spl/spl-proc.c
@@ -0,0 +1,1049 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/proc.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_PROC
+
+#ifdef DEBUG_KMEM
+static unsigned long table_min = 0;
+static unsigned long table_max = ~0;
+#endif
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *spl_header = NULL;
+#endif /* CONFIG_SYSCTL */
+
+#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
+static struct proc_dir_entry *proc_spl = NULL;
+#ifdef DEBUG_MUTEX
+static struct proc_dir_entry *proc_spl_mutex = NULL;
+static struct proc_dir_entry *proc_spl_mutex_stats = NULL;
+#endif /* DEBUG_MUTEX */
+#ifdef DEBUG_KMEM
+static struct proc_dir_entry *proc_spl_kmem = NULL;
+static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
+#endif /* DEBUG_KMEM */
+#ifdef DEBUG_KSTAT
+struct proc_dir_entry *proc_spl_kstat = NULL;
+#endif /* DEBUG_KSTAT */
+#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
+
+#ifdef HAVE_CTL_UNNUMBERED
+
+#define CTL_SPL			CTL_UNNUMBERED
+#define CTL_SPL_DEBUG		CTL_UNNUMBERED
+#define CTL_SPL_MUTEX		CTL_UNNUMBERED
+#define CTL_SPL_KMEM		CTL_UNNUMBERED
+#define CTL_SPL_KSTAT		CTL_UNNUMBERED
+
+#define CTL_VERSION		CTL_UNNUMBERED /* Version */
+#define CTL_HOSTID		CTL_UNNUMBERED /* Host id by /usr/bin/hostid */
+#define CTL_HW_SERIAL		CTL_UNNUMBERED /* HW serial number by hostid */
+
+#define CTL_DEBUG_SUBSYS	CTL_UNNUMBERED /* Debug subsystem */
+#define CTL_DEBUG_MASK		CTL_UNNUMBERED /* Debug mask */
+#define CTL_DEBUG_PRINTK	CTL_UNNUMBERED /* All messages to console */
+#define CTL_DEBUG_MB		CTL_UNNUMBERED /* Debug buffer size */
+#define CTL_DEBUG_BINARY	CTL_UNNUMBERED /* Binary data in buffer */
+#define CTL_DEBUG_CATASTROPHE	CTL_UNNUMBERED /* Set if BUG'd or panic'd */
+#define CTL_DEBUG_PANIC_ON_BUG	CTL_UNNUMBERED /* Should panic on BUG */
+#define CTL_DEBUG_PATH		CTL_UNNUMBERED /* Dump log location */
+#define CTL_DEBUG_DUMP		CTL_UNNUMBERED /* Dump debug buffer to file */
+#define CTL_DEBUG_FORCE_BUG	CTL_UNNUMBERED /* Hook to force a BUG */
+#define CTL_DEBUG_STACK_SIZE	CTL_UNNUMBERED /* Max observed stack size */
+
+#define CTL_CONSOLE_RATELIMIT	CTL_UNNUMBERED /* Ratelimit console messages */
+#define CTL_CONSOLE_MAX_DELAY_CS CTL_UNNUMBERED /* Max delay skip messages */
+#define CTL_CONSOLE_MIN_DELAY_CS CTL_UNNUMBERED /* Init delay skip messages */
+#define CTL_CONSOLE_BACKOFF	CTL_UNNUMBERED /* Delay increase factor */
+
+#ifdef DEBUG_KMEM
+#define CTL_KMEM_KMEMUSED	CTL_UNNUMBERED /* Alloc'd kmem bytes */
+#define CTL_KMEM_KMEMMAX	CTL_UNNUMBERED /* Max alloc'd by kmem bytes */
+#define CTL_KMEM_VMEMUSED	CTL_UNNUMBERED /* Alloc'd vmem bytes */
+#define CTL_KMEM_VMEMMAX	CTL_UNNUMBERED /* Max alloc'd by vmem bytes */
+#define CTL_KMEM_ALLOC_FAILED	CTL_UNNUMBERED /* Cache allocations failed */
+#endif
+
+#define CTL_MUTEX_STATS		CTL_UNNUMBERED /* Global mutex statistics */
+#define CTL_MUTEX_STATS_PER	CTL_UNNUMBERED /* Per mutex statistics */
+#define CTL_MUTEX_SPIN_MAX	CTL_UNNUMBERED /* Max mutex spin iterations */
+
+#else /* HAVE_CTL_UNNUMBERED */
+
+#define CTL_SPL		0x87
+#define CTL_SPL_DEBUG	0x88
+#define CTL_SPL_MUTEX	0x89
+#define CTL_SPL_KMEM	0x90
+#define CTL_SPL_KSTAT	0x91
+
+enum {
+	CTL_VERSION = 1,          /* Version */
+	CTL_HOSTID,               /* Host id reported by /usr/bin/hostid */
+	CTL_HW_SERIAL,            /* Hardware serial number from hostid */
+
+	CTL_DEBUG_SUBSYS,         /* Debug subsystem */
+        CTL_DEBUG_MASK,           /* Debug mask */
+        CTL_DEBUG_PRINTK,         /* Force all messages to console */
+        CTL_DEBUG_MB,             /* Debug buffer size */
+        CTL_DEBUG_BINARY,         /* Include binary data in buffer */
+        CTL_DEBUG_CATASTROPHE,    /* Set if we have BUG'd or panic'd */
+        CTL_DEBUG_PANIC_ON_BUG,   /* Set if we should panic on BUG */
+        CTL_DEBUG_PATH,           /* Dump log location */
+        CTL_DEBUG_DUMP,           /* Dump debug buffer to file */
+        CTL_DEBUG_FORCE_BUG,      /* Hook to force a BUG */
+        CTL_DEBUG_STACK_SIZE,     /* Max observed stack size */
+
+	CTL_CONSOLE_RATELIMIT,    /* Ratelimit console messages */
+        CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */
+        CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */
+        CTL_CONSOLE_BACKOFF,      /* Delay increase factor */
+
+#ifdef DEBUG_KMEM
+        CTL_KMEM_KMEMUSED,        /* Alloc'd kmem bytes */
+        CTL_KMEM_KMEMMAX,         /* Max alloc'd by kmem bytes */
+        CTL_KMEM_VMEMUSED,        /* Alloc'd vmem bytes */
+        CTL_KMEM_VMEMMAX,         /* Max alloc'd by vmem bytes */
+#endif
+
+	CTL_MUTEX_STATS,          /* Global mutex statistics */
+	CTL_MUTEX_STATS_PER,      /* Per mutex statistics */
+	CTL_MUTEX_SPIN_MAX,       /* Maximum mutex spin iterations */
+};
+#endif /* HAVE_CTL_UNNUMBERED */
+
+static int
+proc_copyin_string(char *kbuffer, int kbuffer_size,
+                   const char *ubuffer, int ubuffer_size)
+{
+        int size;
+
+        if (ubuffer_size > kbuffer_size)
+                return -EOVERFLOW;
+
+        if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
+                return -EFAULT;
+
+        /* strip trailing whitespace */
+        size = strnlen(kbuffer, ubuffer_size);
+        while (size-- >= 0)
+                if (!isspace(kbuffer[size]))
+                        break;
+
+        /* empty string */
+        if (size < 0)
+                return -EINVAL;
+
+        /* no space to terminate */
+        if (size == kbuffer_size)
+                return -EOVERFLOW;
+
+        kbuffer[size + 1] = 0;
+        return 0;
+}
+
+static int
+proc_copyout_string(char *ubuffer, int ubuffer_size,
+                    const char *kbuffer, char *append)
+{
+        /* NB if 'append' != NULL, it's a single character to append to the
+         * copied out string - usually "\n", for /proc entries and
+         * (i.e. a terminating zero byte) for sysctl entries
+         */
+        int size = MIN(strlen(kbuffer), ubuffer_size);
+
+        if (copy_to_user(ubuffer, kbuffer, size))
+                return -EFAULT;
+
+        if (append != NULL && size < ubuffer_size) {
+                if (copy_to_user(ubuffer + size, append, 1))
+                        return -EFAULT;
+
+                size++;
+        }
+
+        return size;
+}
+
+static int
+proc_dobitmasks(struct ctl_table *table, int write, struct file *filp,
+                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        unsigned long *mask = table->data;
+        int is_subsys = (mask == &spl_debug_subsys) ? 1 : 0;
+        int is_printk = (mask == &spl_debug_printk) ? 1 : 0;
+        int size = 512, rc;
+        char *str;
+        ENTRY;
+
+        str = kmem_alloc(size, KM_SLEEP);
+        if (str == NULL)
+                RETURN(-ENOMEM);
+
+        if (write) {
+                rc = proc_copyin_string(str, size, buffer, *lenp);
+                if (rc < 0)
+                        RETURN(rc);
+
+                rc = spl_debug_str2mask(mask, str, is_subsys);
+                /* Always print BUG/ASSERT to console, so keep this mask */
+                if (is_printk)
+                        *mask |= D_EMERG;
+
+                *ppos += *lenp;
+        } else {
+                rc = spl_debug_mask2str(str, size, *mask, is_subsys);
+                if (*ppos >= rc)
+                        rc = 0;
+                else
+                        rc = proc_copyout_string(buffer, *lenp,
+                                                 str + *ppos, "\n");
+                if (rc >= 0) {
+                        *lenp = rc;
+                        *ppos += rc;
+                }
+        }
+
+        kmem_free(str, size);
+        RETURN(rc);
+}
+
+static int
+proc_debug_mb(struct ctl_table *table, int write, struct file *filp,
+              void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        char str[32];
+        int rc, len;
+        ENTRY;
+
+        if (write) {
+                rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
+                if (rc < 0)
+                        RETURN(rc);
+
+                rc = spl_debug_set_mb(simple_strtoul(str, NULL, 0));
+                *ppos += *lenp;
+        } else {
+                len = snprintf(str, sizeof(str), "%d", spl_debug_get_mb());
+                if (*ppos >= len)
+                        rc = 0;
+                else
+                        rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n");
+
+                if (rc >= 0) {
+                        *lenp = rc;
+                        *ppos += rc;
+                }
+        }
+
+        RETURN(rc);
+}
+
+static int
+proc_dump_kernel(struct ctl_table *table, int write, struct file *filp,
+                 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	ENTRY;
+
+        if (write) {
+               spl_debug_dumplog(0);
+                *ppos += *lenp;
+        } else {
+                *lenp = 0;
+        }
+
+        RETURN(0);
+}
+
+static int
+proc_force_bug(struct ctl_table *table, int write, struct file *filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	ENTRY;
+
+        if (write) {
+               CERROR("Crashing due to forced SBUG\n");
+               SBUG();
+	       /* Unreachable */
+        } else {
+                *lenp = 0;
+	}
+
+	RETURN(0);
+}
+
+static int
+proc_console_max_delay_cs(struct ctl_table *table, int write, struct file *filp,
+                          void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int rc, max_delay_cs;
+        struct ctl_table dummy = *table;
+        long d;
+	ENTRY;
+
+        dummy.data = &max_delay_cs;
+        dummy.proc_handler = &proc_dointvec;
+
+        if (write) {
+                max_delay_cs = 0;
+                rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+                if (rc < 0)
+                        RETURN(rc);
+
+                if (max_delay_cs <= 0)
+                        RETURN(-EINVAL);
+
+                d = (max_delay_cs * HZ) / 100;
+                if (d == 0 || d < spl_console_min_delay)
+                        RETURN(-EINVAL);
+
+                spl_console_max_delay = d;
+        } else {
+                max_delay_cs = (spl_console_max_delay * 100) / HZ;
+                rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+        }
+
+        RETURN(rc);
+}
+
+static int
+proc_console_min_delay_cs(struct ctl_table *table, int write, struct file *filp,
+                          void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int rc, min_delay_cs;
+        struct ctl_table dummy = *table;
+        long d;
+	ENTRY;
+
+        dummy.data = &min_delay_cs;
+        dummy.proc_handler = &proc_dointvec;
+
+        if (write) {
+                min_delay_cs = 0;
+                rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+                if (rc < 0)
+                        RETURN(rc);
+
+                if (min_delay_cs <= 0)
+                        RETURN(-EINVAL);
+
+                d = (min_delay_cs * HZ) / 100;
+                if (d == 0 || d > spl_console_max_delay)
+                        RETURN(-EINVAL);
+
+                spl_console_min_delay = d;
+        } else {
+                min_delay_cs = (spl_console_min_delay * 100) / HZ;
+                rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+        }
+
+        RETURN(rc);
+}
+
+static int
+proc_console_backoff(struct ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int rc, backoff;
+        struct ctl_table dummy = *table;
+	ENTRY;
+
+        dummy.data = &backoff;
+        dummy.proc_handler = &proc_dointvec;
+
+        if (write) {
+                backoff = 0;
+                rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+                if (rc < 0)
+                        RETURN(rc);
+
+                if (backoff <= 0)
+                        RETURN(-EINVAL);
+
+                spl_console_backoff = backoff;
+        } else {
+                backoff = spl_console_backoff;
+                rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+        }
+
+        RETURN(rc);
+}
+
+#ifdef DEBUG_KMEM
+static int
+proc_doatomic64(struct ctl_table *table, int write, struct file *filp,
+                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int rc = 0;
+        unsigned long min = 0, max = ~0, val;
+        struct ctl_table dummy = *table;
+	ENTRY;
+
+        dummy.data = &val;
+        dummy.proc_handler = &proc_dointvec;
+        dummy.extra1 = &min;
+        dummy.extra2 = &max;
+
+        if (write) {
+                *ppos += *lenp;
+        } else {
+                val = atomic64_read((atomic64_t *)table->data);
+                rc = proc_doulongvec_minmax(&dummy, write, filp,
+                                            buffer, lenp, ppos);
+        }
+
+        RETURN(rc);
+}
+#endif /* DEBUG_KMEM */
+
+static int
+proc_dohostid(struct ctl_table *table, int write, struct file *filp,
+              void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int len, rc = 0;
+        int32_t val;
+        char *end, str[32];
+	ENTRY;
+
+        if (write) {
+		/* We can't use proc_doulongvec_minmax() in the write
+		 * case hear because hostid while a hex value has no
+		 * leading 0x which confuses the helper function. */
+                rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
+                if (rc < 0)
+                        RETURN(rc);
+
+                val = simple_strtol(str, &end, 16);
+		if (str == end)
+			RETURN(-EINVAL);
+
+		spl_hostid = (long)val;
+                (void)snprintf(hw_serial, 11, "%u", (val >= 0) ? val : -val);
+                *ppos += *lenp;
+        } else {
+                len = snprintf(str, sizeof(str), "%lx", spl_hostid);
+                if (*ppos >= len)
+                        rc = 0;
+                else
+                        rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n");
+
+                if (rc >= 0) {
+                        *lenp = rc;
+                        *ppos += rc;
+                }
+        }
+
+        RETURN(rc);
+}
+
+#ifdef DEBUG_MUTEX
+static void
+mutex_seq_show_headers(struct seq_file *f)
+{
+        seq_printf(f, "%-36s %-4s %-16s\t"
+                   "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n",
+		   "name", "type", "owner");
+}
+
+static int
+mutex_seq_show(struct seq_file *f, void *p)
+{
+        kmutex_t *mp = p;
+	char t = 'X';
+        int i;
+
+	ASSERT(mp->km_magic == KM_MAGIC);
+
+	switch (mp->km_type) {
+		case MUTEX_DEFAULT:	t = 'D';	break;
+		case MUTEX_SPIN:	t = 'S';	break;
+		case MUTEX_ADAPTIVE:	t = 'A';	break;
+		default:
+			SBUG();
+	}
+        seq_printf(f, "%-36s %c    ", mp->km_name, t);
+	if (mp->km_owner)
+                seq_printf(f, "%p\t", mp->km_owner);
+	else
+                seq_printf(f, "%-16s\t", "<not held>");
+
+        for (i = 0; i < MUTEX_STATS_SIZE; i++)
+                seq_printf(f, "%d%c", mp->km_stats[i],
+                           (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t');
+
+        return 0;
+}
+
+static void *
+mutex_seq_start(struct seq_file *f, loff_t *pos)
+{
+        struct list_head *p;
+        loff_t n = *pos;
+        ENTRY;
+
+	spin_lock(&mutex_stats_lock);
+        if (!n)
+                mutex_seq_show_headers(f);
+
+        p = mutex_stats_list.next;
+        while (n--) {
+                p = p->next;
+                if (p == &mutex_stats_list)
+                        RETURN(NULL);
+        }
+
+        RETURN(list_entry(p, kmutex_t, km_list));
+}
+
+static void *
+mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	kmutex_t *mp = p;
+        ENTRY;
+
+        ++*pos;
+        RETURN((mp->km_list.next == &mutex_stats_list) ?
+	       NULL : list_entry(mp->km_list.next, kmutex_t, km_list));
+}
+
+static void
+mutex_seq_stop(struct seq_file *f, void *v)
+{
+	spin_unlock(&mutex_stats_lock);
+}
+
+static struct seq_operations mutex_seq_ops = {
+        .show  = mutex_seq_show,
+        .start = mutex_seq_start,
+        .next  = mutex_seq_next,
+        .stop  = mutex_seq_stop,
+};
+
+static int
+proc_mutex_open(struct inode *inode, struct file *filp)
+{
+        return seq_open(filp, &mutex_seq_ops);
+}
+
+static struct file_operations proc_mutex_operations = {
+        .open           = proc_mutex_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif /* DEBUG_MUTEX */
+
+#ifdef DEBUG_KMEM
+static void
+slab_seq_show_headers(struct seq_file *f)
+{
+        seq_printf(f, "%-36s\n", "name");
+}
+
+static int
+slab_seq_show(struct seq_file *f, void *p)
+{
+	spl_kmem_cache_t *skc = p;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	spin_lock(&skc->skc_lock);
+        seq_printf(f, "%-36s      ", skc->skc_name);
+        seq_printf(f, "%u %u %u - %lu %lu %lu - %lu %lu %lu - %lu %lu %lu\n",
+		   (unsigned)skc->skc_obj_size,
+		   (unsigned)skc->skc_slab_objs,
+		   (unsigned)skc->skc_slab_size,
+		   (long unsigned)skc->skc_slab_fail,
+		   (long unsigned)skc->skc_slab_create,
+		   (long unsigned)skc->skc_slab_destroy,
+		   (long unsigned)skc->skc_slab_total,
+		   (long unsigned)skc->skc_slab_alloc,
+		   (long unsigned)skc->skc_slab_max,
+		   (long unsigned)skc->skc_obj_total,
+		   (long unsigned)skc->skc_obj_alloc,
+		   (long unsigned)skc->skc_obj_max);
+
+	spin_unlock(&skc->skc_lock);
+
+        return 0;
+}
+
+static void *
+slab_seq_start(struct seq_file *f, loff_t *pos)
+{
+        struct list_head *p;
+        loff_t n = *pos;
+        ENTRY;
+
+	down_read(&spl_kmem_cache_sem);
+        if (!n)
+                slab_seq_show_headers(f);
+
+        p = spl_kmem_cache_list.next;
+        while (n--) {
+                p = p->next;
+                if (p == &spl_kmem_cache_list)
+                        RETURN(NULL);
+        }
+
+        RETURN(list_entry(p, spl_kmem_cache_t, skc_list));
+}
+
+static void *
+slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	spl_kmem_cache_t *skc = p;
+        ENTRY;
+
+        ++*pos;
+        RETURN((skc->skc_list.next == &spl_kmem_cache_list) ?
+	       NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
+}
+
+static void
+slab_seq_stop(struct seq_file *f, void *v)
+{
+	up_read(&spl_kmem_cache_sem);
+}
+
+static struct seq_operations slab_seq_ops = {
+        .show  = slab_seq_show,
+        .start = slab_seq_start,
+        .next  = slab_seq_next,
+        .stop  = slab_seq_stop,
+};
+
+static int
+proc_slab_open(struct inode *inode, struct file *filp)
+{
+        return seq_open(filp, &slab_seq_ops);
+}
+
+static struct file_operations proc_slab_operations = {
+        .open           = proc_slab_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif /* DEBUG_KMEM */
+
+static struct ctl_table spl_debug_table[] = {
+        {
+                .ctl_name = CTL_DEBUG_SUBSYS,
+                .procname = "subsystem",
+                .data     = &spl_debug_subsys,
+                .maxlen   = sizeof(unsigned long),
+                .mode     = 0644,
+                .proc_handler = &proc_dobitmasks
+        },
+        {
+                .ctl_name = CTL_DEBUG_MASK,
+                .procname = "mask",
+                .data     = &spl_debug_mask,
+                .maxlen   = sizeof(unsigned long),
+                .mode     = 0644,
+                .proc_handler = &proc_dobitmasks
+        },
+        {
+                .ctl_name = CTL_DEBUG_PRINTK,
+                .procname = "printk",
+                .data     = &spl_debug_printk,
+                .maxlen   = sizeof(unsigned long),
+                .mode     = 0644,
+                .proc_handler = &proc_dobitmasks
+        },
+        {
+                .ctl_name = CTL_DEBUG_MB,
+                .procname = "mb",
+                .mode     = 0644,
+                .proc_handler = &proc_debug_mb,
+        },
+        {
+                .ctl_name = CTL_DEBUG_BINARY,
+                .procname = "binary",
+                .data     = &spl_debug_binary,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+        },
+        {
+                .ctl_name = CTL_DEBUG_CATASTROPHE,
+                .procname = "catastrophe",
+                .data     = &spl_debug_catastrophe,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+        },
+        {
+                .ctl_name = CTL_DEBUG_PANIC_ON_BUG,
+                .procname = "panic_on_bug",
+                .data     = &spl_debug_panic_on_bug,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        },
+        {
+                .ctl_name = CTL_DEBUG_PATH,
+                .procname = "path",
+                .data     = spl_debug_file_path,
+                .maxlen   = sizeof(spl_debug_file_path),
+                .mode     = 0644,
+                .proc_handler = &proc_dostring,
+        },
+        {
+                .ctl_name = CTL_DEBUG_DUMP,
+                .procname = "dump",
+                .mode     = 0200,
+                .proc_handler = &proc_dump_kernel,
+        },
+        {       .ctl_name = CTL_DEBUG_FORCE_BUG,
+                .procname = "force_bug",
+                .mode     = 0200,
+                .proc_handler = &proc_force_bug,
+        },
+        {
+                .ctl_name = CTL_CONSOLE_RATELIMIT,
+                .procname = "console_ratelimit",
+                .data     = &spl_console_ratelimit,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+        },
+        {
+                .ctl_name = CTL_CONSOLE_MAX_DELAY_CS,
+                .procname = "console_max_delay_centisecs",
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_console_max_delay_cs,
+        },
+        {
+                .ctl_name = CTL_CONSOLE_MIN_DELAY_CS,
+                .procname = "console_min_delay_centisecs",
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_console_min_delay_cs,
+        },
+        {
+                .ctl_name = CTL_CONSOLE_BACKOFF,
+                .procname = "console_backoff",
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_console_backoff,
+        },
+        {
+                .ctl_name = CTL_DEBUG_STACK_SIZE,
+                .procname = "stack_max",
+                .data     = &spl_debug_stack,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+        },
+	{0},
+};
+
+#ifdef DEBUG_MUTEX
+static struct ctl_table spl_mutex_table[] = {
+        {
+                .ctl_name = CTL_MUTEX_STATS,
+                .procname = "stats",
+                .data     = &mutex_stats,
+                .maxlen   = sizeof(int) * MUTEX_STATS_SIZE,
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+        },
+        {
+                .ctl_name = CTL_MUTEX_SPIN_MAX,
+                .procname = "spin_max",
+                .data     = &mutex_spin_max,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+        },
+	{0},
+};
+#endif /* DEBUG_MUTEX */
+
+#ifdef DEBUG_KMEM
+static struct ctl_table spl_kmem_table[] = {
+        {
+                .ctl_name = CTL_KMEM_KMEMUSED,
+                .procname = "kmem_used",
+                .data     = &kmem_alloc_used,
+                .maxlen   = sizeof(atomic64_t),
+                .mode     = 0444,
+                .proc_handler = &proc_doatomic64,
+        },
+        {
+                .ctl_name = CTL_KMEM_KMEMMAX,
+                .procname = "kmem_max",
+                .data     = &kmem_alloc_max,
+                .maxlen   = sizeof(unsigned long),
+                .extra1   = &table_min,
+                .extra2   = &table_max,
+                .mode     = 0444,
+                .proc_handler = &proc_doulongvec_minmax,
+        },
+        {
+                .ctl_name = CTL_KMEM_VMEMUSED,
+                .procname = "vmem_used",
+                .data     = &vmem_alloc_used,
+                .maxlen   = sizeof(atomic64_t),
+                .mode     = 0444,
+                .proc_handler = &proc_doatomic64,
+        },
+        {
+                .ctl_name = CTL_KMEM_VMEMMAX,
+                .procname = "vmem_max",
+                .data     = &vmem_alloc_max,
+                .maxlen   = sizeof(unsigned long),
+                .extra1   = &table_min,
+                .extra2   = &table_max,
+                .mode     = 0444,
+                .proc_handler = &proc_doulongvec_minmax,
+        },
+	{0},
+};
+#endif /* DEBUG_KMEM */
+
+#ifdef DEBUG_KSTAT
+static struct ctl_table spl_kstat_table[] = {
+	{0},
+};
+#endif /* DEBUG_KSTAT */
+
+static struct ctl_table spl_table[] = {
+        /* NB No .strategy entries have been provided since
+         * sysctl(8) prefers to go via /proc for portability.
+         */
+        {
+                .ctl_name = CTL_VERSION,
+                .procname = "version",
+                .data     = spl_version,
+                .maxlen   = sizeof(spl_version),
+                .mode     = 0444,
+                .proc_handler = &proc_dostring,
+        },
+        {
+                .ctl_name = CTL_HOSTID,
+                .procname = "hostid",
+                .data     = &spl_hostid,
+                .maxlen   = sizeof(unsigned long),
+                .mode     = 0644,
+                .proc_handler = &proc_dohostid,
+        },
+        {
+                .ctl_name = CTL_HW_SERIAL,
+                .procname = "hw_serial",
+                .data     = hw_serial,
+                .maxlen   = sizeof(hw_serial),
+                .mode     = 0444,
+                .proc_handler = &proc_dostring,
+        },
+	{
+		.ctl_name = CTL_SPL_DEBUG,
+		.procname = "debug",
+		.mode     = 0555,
+		.child    = spl_debug_table,
+	},
+#ifdef DEBUG_MUTEX
+	{
+		.ctl_name = CTL_SPL_MUTEX,
+		.procname = "mutex",
+		.mode     = 0555,
+		.child    = spl_mutex_table,
+	},
+#endif
+#ifdef DEBUG_KMEM
+	{
+		.ctl_name = CTL_SPL_KMEM,
+		.procname = "kmem",
+		.mode     = 0555,
+		.child    = spl_kmem_table,
+	},
+#endif
+#ifdef DEBUG_KSTAT
+	{
+		.ctl_name = CTL_SPL_KSTAT,
+		.procname = "kstat",
+		.mode     = 0555,
+		.child    = spl_kstat_table,
+	},
+#endif
+        { 0 },
+};
+
+static struct ctl_table spl_dir[] = {
+        {
+                .ctl_name = CTL_SPL,
+                .procname = "spl",
+                .mode     = 0555,
+                .child    = spl_table,
+        },
+        { 0 }
+};
+
+static struct ctl_table spl_root[] = {
+	{
+	.ctl_name = CTL_KERN,
+	.procname = "kernel",
+	.mode = 0555,
+	.child = spl_dir,
+	},
+	{ 0 }
+};
+
+static int
+proc_dir_entry_match(int len, const char *name, struct proc_dir_entry *de)
+{
+        if (de->namelen != len)
+                return 0;
+
+        return !memcmp(name, de->name, len);
+}
+
+struct proc_dir_entry *
+proc_dir_entry_find(struct proc_dir_entry *root, const char *str)
+{
+	struct proc_dir_entry *de;
+
+	for (de = root->subdir; de; de = de->next)
+		if (proc_dir_entry_match(strlen(str), str, de))
+			return de;
+
+	return NULL;
+}
+
+int
+proc_dir_entries(struct proc_dir_entry *root)
+{
+	struct proc_dir_entry *de;
+	int i = 0;
+
+	for (de = root->subdir; de; de = de->next)
+		i++;
+
+	return i;
+}
+
+int
+proc_init(void)
+{
+	int rc = 0;
+        ENTRY;
+
+#ifdef CONFIG_SYSCTL
+        spl_header = spl_register_sysctl_table(spl_root, 0);
+	if (spl_header == NULL)
+		RETURN(-EUNATCH);
+#endif /* CONFIG_SYSCTL */
+
+#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
+	proc_spl = proc_mkdir("spl", NULL);
+	if (proc_spl == NULL)
+		GOTO(out, rc = -EUNATCH);
+
+#ifdef DEBUG_MUTEX
+	proc_spl_mutex = proc_mkdir("mutex", proc_spl);
+	if (proc_spl_mutex == NULL)
+		GOTO(out, rc = -EUNATCH);
+
+	proc_spl_mutex_stats = create_proc_entry("stats_per", 0444,
+						 proc_spl_mutex);
+        if (proc_spl_mutex_stats == NULL)
+		GOTO(out, rc = -EUNATCH);
+
+        proc_spl_mutex_stats->proc_fops = &proc_mutex_operations;
+#endif /* DEBUG_MUTEX */
+
+#ifdef DEBUG_KMEM
+        proc_spl_kmem = proc_mkdir("kmem", proc_spl);
+        if (proc_spl_kmem == NULL)
+                GOTO(out, rc = -EUNATCH);
+
+	proc_spl_kmem_slab = create_proc_entry("slab", 0444, proc_spl_kmem);
+        if (proc_spl_kmem_slab == NULL)
+		GOTO(out, rc = -EUNATCH);
+
+        proc_spl_kmem_slab->proc_fops = &proc_slab_operations;
+#endif /* DEBUG_KMEM */
+
+#ifdef DEBUG_KSTAT
+        proc_spl_kstat = proc_mkdir("kstat", proc_spl);
+        if (proc_spl_kstat == NULL)
+                GOTO(out, rc = -EUNATCH);
+#endif /* DEBUG_KSTAT */
+
+out:
+	if (rc) {
+		remove_proc_entry("kstat", proc_spl);
+#ifdef DEBUG_KMEM
+	        remove_proc_entry("slab", proc_spl_kmem);
+#endif
+		remove_proc_entry("kmem", proc_spl);
+#ifdef DEBUG_MUTEX
+	        remove_proc_entry("stats_per", proc_spl_mutex);
+#endif
+		remove_proc_entry("mutex", proc_spl);
+		remove_proc_entry("spl", NULL);
+#ifdef CONFIG_SYSCTL
+	        spl_unregister_sysctl_table(spl_header);
+#endif /* CONFIG_SYSCTL */
+	}
+#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
+
+        RETURN(rc);
+}
+
+void
+proc_fini(void)
+{
+        ENTRY;
+
+#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
+	remove_proc_entry("kstat", proc_spl);
+#ifdef DEBUG_KMEM
+        remove_proc_entry("slab", proc_spl_kmem);
+#endif
+	remove_proc_entry("kmem", proc_spl);
+#ifdef DEBUG_MUTEX
+        remove_proc_entry("stats_per", proc_spl_mutex);
+#endif
+	remove_proc_entry("mutex", proc_spl);
+	remove_proc_entry("spl", NULL);
+#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
+
+#ifdef CONFIG_SYSCTL
+        ASSERT(spl_header != NULL);
+        spl_unregister_sysctl_table(spl_header);
+#endif /* CONFIG_SYSCTL */
+
+        EXIT;
+}
diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c
new file mode 100644
index 000000000..07fc2aae4
--- /dev/null
+++ b/module/spl/spl-rwlock.c
@@ -0,0 +1,361 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/rwlock.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_RWLOCK
+
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+struct rwsem_waiter {
+        struct list_head list;
+        struct task_struct *task;
+        unsigned int flags;
+#define RWSEM_WAITING_FOR_READ  0x00000001
+#define RWSEM_WAITING_FOR_WRITE 0x00000002
+};
+/* wake a single writer */
+static struct rw_semaphore *
+__rwsem_wake_one_writer_locked(struct rw_semaphore *sem)
+{
+	struct rwsem_waiter *waiter;
+	struct task_struct *tsk;
+
+	sem->activity = -1;
+
+	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+	list_del(&waiter->list);
+
+	tsk = waiter->task;
+	smp_mb();
+	waiter->task = NULL;
+	wake_up_process(tsk);
+	put_task_struct(tsk);
+	return sem;
+}
+
+/* release a read lock on the semaphore */
+static void
+__up_read_locked(struct rw_semaphore *sem)
+{
+	if (--sem->activity == 0 && !list_empty(&sem->wait_list))
+		(void)__rwsem_wake_one_writer_locked(sem);
+}
+
+/* trylock for writing -- returns 1 if successful, 0 if contention */
+static int
+__down_write_trylock_locked(struct rw_semaphore *sem)
+{
+	int ret = 0;
+
+	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+		/* granted */
+		sem->activity = -1;
+		ret = 1;
+	}
+
+	return ret;
+}
+#endif
+
+void
+__rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg)
+{
+	int flags = KM_SLEEP;
+
+        ASSERT(rwlp);
+        ASSERT(name);
+	ASSERT(type == RW_DEFAULT);	/* XXX no irq handler use */
+	ASSERT(arg == NULL);		/* XXX no irq handler use */
+
+	rwlp->rw_magic = RW_MAGIC;
+	rwlp->rw_owner = NULL;
+	rwlp->rw_name = NULL;
+        rwlp->rw_name_size = strlen(name) + 1;
+
+        /* We may be called when there is a non-zero preempt_count or
+         * interrupts are disabled is which case we must not sleep.
+         */
+        if (current_thread_info()->preempt_count || irqs_disabled())
+                flags = KM_NOSLEEP;
+
+        rwlp->rw_name = kmem_alloc(rwlp->rw_name_size, flags);
+        if (rwlp->rw_name == NULL)
+                return;
+
+	init_rwsem(&rwlp->rw_sem);
+        strcpy(rwlp->rw_name, name);
+}
+EXPORT_SYMBOL(__rw_init);
+
+void
+__rw_destroy(krwlock_t *rwlp)
+{
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+	ASSERT(rwlp->rw_owner == NULL);
+	spin_lock(&rwlp->rw_sem.wait_lock);
+	ASSERT(list_empty(&rwlp->rw_sem.wait_list));
+	spin_unlock(&rwlp->rw_sem.wait_lock);
+
+        kmem_free(rwlp->rw_name, rwlp->rw_name_size);
+
+	memset(rwlp, RW_POISON, sizeof(krwlock_t));
+}
+EXPORT_SYMBOL(__rw_destroy);
+
+/* Return 0 if the lock could not be obtained without blocking. */
+int
+__rw_tryenter(krwlock_t *rwlp, krw_t rw)
+{
+	int rc = 0;
+	ENTRY;
+
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+
+	switch (rw) {
+		/* these functions return 1 if success, 0 if contention */
+		case RW_READER:
+			/* Here the Solaris code would return 0
+			 * if there were any write waiters.  Specifically
+			 * thinking about the case where readers may have
+			 * the lock and we would also allow this thread
+			 * to grab the read lock with a writer waiting in the
+			 * queue. This doesn't seem like a correctness
+			 * issue, so just call down_read_trylock()
+			 * for the test.  We may have to revisit this if
+			 * it becomes an issue */
+			rc = down_read_trylock(&rwlp->rw_sem);
+			break;
+		case RW_WRITER:
+			rc = down_write_trylock(&rwlp->rw_sem);
+			if (rc) {
+				/* there better not be anyone else
+				 * holding the write lock here */
+				ASSERT(rwlp->rw_owner == NULL);
+				rwlp->rw_owner = current;
+			}
+			break;
+		default:
+			SBUG();
+	}
+
+	RETURN(rc);
+}
+EXPORT_SYMBOL(__rw_tryenter);
+
+void
+__rw_enter(krwlock_t *rwlp, krw_t rw)
+{
+	ENTRY;
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+
+	switch (rw) {
+		case RW_READER:
+			/* Here the Solaris code would block
+			 * if there were any write waiters.  Specifically
+			 * thinking about the case where readers may have
+			 * the lock and we would also allow this thread
+			 * to grab the read lock with a writer waiting in the
+			 * queue. This doesn't seem like a correctness
+			 * issue, so just call down_read()
+			 * for the test.  We may have to revisit this if
+			 * it becomes an issue */
+			down_read(&rwlp->rw_sem);
+			break;
+		case RW_WRITER:
+			down_write(&rwlp->rw_sem);
+
+			/* there better not be anyone else
+			 * holding the write lock here */
+			ASSERT(rwlp->rw_owner == NULL);
+			rwlp->rw_owner = current;
+			break;
+		default:
+			SBUG();
+	}
+	EXIT;
+}
+EXPORT_SYMBOL(__rw_enter);
+
+void
+__rw_exit(krwlock_t *rwlp)
+{
+	ENTRY;
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+
+	/* rw_owner is held by current
+	 * thread iff it is a writer */
+	if (rwlp->rw_owner == current) {
+		rwlp->rw_owner = NULL;
+		up_write(&rwlp->rw_sem);
+	} else {
+		up_read(&rwlp->rw_sem);
+	}
+	EXIT;
+}
+EXPORT_SYMBOL(__rw_exit);
+
+void
+__rw_downgrade(krwlock_t *rwlp)
+{
+	ENTRY;
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+	ASSERT(rwlp->rw_owner == current);
+
+	rwlp->rw_owner = NULL;
+	downgrade_write(&rwlp->rw_sem);
+	EXIT;
+}
+EXPORT_SYMBOL(__rw_downgrade);
+
+/* Return 0 if unable to perform the upgrade.
+ * Might be wise to fix the caller
+ * to acquire the write lock first?
+ */
+int
+__rw_tryupgrade(krwlock_t *rwlp)
+{
+	int rc = 0;
+	ENTRY;
+
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+
+	spin_lock(&rwlp->rw_sem.wait_lock);
+
+	/* Check if there is anyone waiting for the
+	 * lock.  If there is, then we know we should
+	 * not try to upgrade the lock */
+	if (!list_empty(&rwlp->rw_sem.wait_list)) {
+		spin_unlock(&rwlp->rw_sem.wait_lock);
+		RETURN(0);
+	}
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	/* Note that activity is protected by
+	 * the wait_lock.  Don't try to upgrade
+	 * if there are multiple readers currently
+	 * holding the lock */
+	if (rwlp->rw_sem.activity > 1) {
+#else
+	/* Don't try to upgrade
+	 * if there are multiple readers currently
+	 * holding the lock */
+	if ((rwlp->rw_sem.count & RWSEM_ACTIVE_MASK) > 1) {
+#endif
+		spin_unlock(&rwlp->rw_sem.wait_lock);
+		RETURN(0);
+	}
+
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	/* Here it should be safe to drop the
+	 * read lock and reacquire it for writing since
+	 * we know there are no waiters */
+	__up_read_locked(&rwlp->rw_sem);
+
+	/* returns 1 if success, 0 if contention */
+	rc = __down_write_trylock_locked(&rwlp->rw_sem);
+#else
+	/* Here it should be safe to drop the
+	 * read lock and reacquire it for writing since
+	 * we know there are no waiters */
+	up_read(&rwlp->rw_sem);
+
+	/* returns 1 if success, 0 if contention */
+	rc = down_write_trylock(&rwlp->rw_sem);
+#endif
+
+	/* Check if upgrade failed.  Should not ever happen
+	 * if we got to this point */
+	ASSERT(rc);
+	ASSERT(rwlp->rw_owner == NULL);
+	rwlp->rw_owner = current;
+	spin_unlock(&rwlp->rw_sem.wait_lock);
+
+	RETURN(1);
+}
+EXPORT_SYMBOL(__rw_tryupgrade);
+
+kthread_t *
+__rw_owner(krwlock_t *rwlp)
+{
+	ENTRY;
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+	RETURN(rwlp->rw_owner);
+}
+EXPORT_SYMBOL(__rw_owner);
+
+int
+__rw_read_held(krwlock_t *rwlp)
+{
+	ENTRY;
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+	RETURN(__rw_lock_held(rwlp) && rwlp->rw_owner == NULL);
+}
+EXPORT_SYMBOL(__rw_read_held);
+
+int
+__rw_write_held(krwlock_t *rwlp)
+{
+	ENTRY;
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+	RETURN(rwlp->rw_owner == current);
+}
+EXPORT_SYMBOL(__rw_write_held);
+
+int
+__rw_lock_held(krwlock_t *rwlp)
+{
+	int rc = 0;
+	ENTRY;
+
+	ASSERT(rwlp);
+	ASSERT(rwlp->rw_magic == RW_MAGIC);
+
+	spin_lock_irq(&(rwlp->rw_sem.wait_lock));
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	if (rwlp->rw_sem.activity != 0) {
+#else
+	if (rwlp->rw_sem.count != 0) {
+#endif
+		rc = 1;
+	}
+
+	spin_unlock_irq(&(rwlp->rw_sem.wait_lock));
+
+	RETURN(rc);
+}
+EXPORT_SYMBOL(__rw_lock_held);
diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c
new file mode 100644
index 000000000..799b54839
--- /dev/null
+++ b/module/spl/spl-taskq.c
@@ -0,0 +1,491 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/taskq.h>
+#include <sys/kmem.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_TASKQ
+
+/* Global system-wide dynamic task queue available for all consumers */
+taskq_t *system_taskq;
+EXPORT_SYMBOL(system_taskq);
+
+typedef struct spl_task {
+        spinlock_t              t_lock;
+        struct list_head        t_list;
+        taskqid_t               t_id;
+        task_func_t             *t_func;
+        void                    *t_arg;
+} spl_task_t;
+
+/* NOTE: Must be called with tq->tq_lock held, returns a list_t which
+ * is not attached to the free, work, or pending taskq lists.
+ */
+static spl_task_t *
+task_alloc(taskq_t *tq, uint_t flags)
+{
+        spl_task_t *t;
+        int count = 0;
+        ENTRY;
+
+        ASSERT(tq);
+        ASSERT(flags & (TQ_SLEEP | TQ_NOSLEEP));               /* One set */
+        ASSERT(!((flags & TQ_SLEEP) && (flags & TQ_NOSLEEP))); /* Not both */
+        ASSERT(spin_is_locked(&tq->tq_lock));
+retry:
+        /* Aquire spl_task_t's from free list if available */
+        if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
+                t = list_entry(tq->tq_free_list.next, spl_task_t, t_list);
+                list_del_init(&t->t_list);
+                RETURN(t);
+        }
+
+        /* Free list is empty and memory allocs are prohibited */
+        if (flags & TQ_NOALLOC)
+                RETURN(NULL);
+
+        /* Hit maximum spl_task_t pool size */
+        if (tq->tq_nalloc >= tq->tq_maxalloc) {
+                if (flags & TQ_NOSLEEP)
+                        RETURN(NULL);
+
+                /* Sleep periodically polling the free list for an available
+                 * spl_task_t.  If a full second passes and we have not found
+                 * one gives up and return a NULL to the caller. */
+                if (flags & TQ_SLEEP) {
+                        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+                        schedule_timeout(HZ / 100);
+                        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+                        if (count < 100)
+                                GOTO(retry, count++);
+
+                        RETURN(NULL);
+                }
+
+                /* Unreachable, TQ_SLEEP xor TQ_NOSLEEP */
+                SBUG();
+        }
+
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+        t = kmem_alloc(sizeof(spl_task_t), flags & (TQ_SLEEP | TQ_NOSLEEP));
+        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	if (t) {
+		spin_lock_init(&t->t_lock);
+                INIT_LIST_HEAD(&t->t_list);
+		t->t_id = 0;
+		t->t_func = NULL;
+		t->t_arg = NULL;
+		tq->tq_nalloc++;
+	}
+
+        RETURN(t);
+}
+
+/* NOTE: Must be called with tq->tq_lock held, expectes the spl_task_t
+ * to already be removed from the free, work, or pending taskq lists.
+ */
+static void
+task_free(taskq_t *tq, spl_task_t *t)
+{
+        ENTRY;
+
+        ASSERT(tq);
+        ASSERT(t);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+	ASSERT(list_empty(&t->t_list));
+
+        kmem_free(t, sizeof(spl_task_t));
+        tq->tq_nalloc--;
+
+	EXIT;
+}
+
+/* NOTE: Must be called with tq->tq_lock held, either destroyes the
+ * spl_task_t if too many exist or moves it to the free list for later use.
+ */
+static void
+task_done(taskq_t *tq, spl_task_t *t)
+{
+	ENTRY;
+	ASSERT(tq);
+	ASSERT(t);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	list_del_init(&t->t_list);
+
+        if (tq->tq_nalloc <= tq->tq_minalloc) {
+		t->t_id = 0;
+		t->t_func = NULL;
+		t->t_arg = NULL;
+                list_add_tail(&t->t_list, &tq->tq_free_list);
+	} else {
+		task_free(tq, t);
+	}
+
+        EXIT;
+}
+
+/* Taskqid's are handed out in a monotonically increasing fashion per
+ * taskq_t.  We don't handle taskqid wrapping yet, but fortuntely it isi
+ * a 64-bit value so this is probably never going to happen.  The lowest
+ * pending taskqid is stored in the taskq_t to make it easy for any
+ * taskq_wait()'ers to know if the tasks they're waiting for have
+ * completed.  Unfortunately, tq_task_lowest is kept up to date is
+ * a pretty brain dead way, something more clever should be done.
+ */
+static int
+taskq_wait_check(taskq_t *tq, taskqid_t id)
+{
+	RETURN(tq->tq_lowest_id >= id);
+}
+
+/* Expected to wait for all previously scheduled tasks to complete.  We do
+ * not need to wait for tasked scheduled after this call to complete.  In
+ * otherwords we do not need to drain the entire taskq. */
+void
+__taskq_wait_id(taskq_t *tq, taskqid_t id)
+{
+	ENTRY;
+	ASSERT(tq);
+
+	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id));
+
+	EXIT;
+}
+EXPORT_SYMBOL(__taskq_wait_id);
+
+void
+__taskq_wait(taskq_t *tq)
+{
+	taskqid_t id;
+	ENTRY;
+	ASSERT(tq);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	id = tq->tq_next_id;
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	__taskq_wait_id(tq, id);
+
+	EXIT;
+
+}
+EXPORT_SYMBOL(__taskq_wait);
+
+int
+__taskq_member(taskq_t *tq, void *t)
+{
+        int i;
+        ENTRY;
+
+	ASSERT(tq);
+        ASSERT(t);
+
+        for (i = 0; i < tq->tq_nthreads; i++)
+                if (tq->tq_threads[i] == (struct task_struct *)t)
+                        RETURN(1);
+
+        RETURN(0);
+}
+EXPORT_SYMBOL(__taskq_member);
+
+taskqid_t
+__taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
+{
+        spl_task_t *t;
+	taskqid_t rc = 0;
+        ENTRY;
+
+        ASSERT(tq);
+        ASSERT(func);
+        if (unlikely(in_atomic() && (flags & TQ_SLEEP))) {
+		CERROR("May schedule while atomic: %s/0x%08x/%d\n",
+                       current->comm, preempt_count(), current->pid);
+		SBUG();
+	}
+
+        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TQ_ACTIVE))
+		GOTO(out, rc = 0);
+
+	/* Do not queue the task unless there is idle thread for it */
+	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
+	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
+		GOTO(out, rc = 0);
+
+        if ((t = task_alloc(tq, flags)) == NULL)
+		GOTO(out, rc = 0);
+
+	spin_lock(&t->t_lock);
+	list_add_tail(&t->t_list, &tq->tq_pend_list);
+	t->t_id = rc = tq->tq_next_id;
+	tq->tq_next_id++;
+        t->t_func = func;
+        t->t_arg = arg;
+	spin_unlock(&t->t_lock);
+
+	wake_up(&tq->tq_work_waitq);
+out:
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	RETURN(rc);
+}
+EXPORT_SYMBOL(__taskq_dispatch);
+
+/* NOTE: Must be called with tq->tq_lock held */
+static taskqid_t
+taskq_lowest_id(taskq_t *tq)
+{
+	taskqid_t lowest_id = ~0;
+        spl_task_t *t;
+	ENTRY;
+
+	ASSERT(tq);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	list_for_each_entry(t, &tq->tq_pend_list, t_list)
+		if (t->t_id < lowest_id)
+			lowest_id = t->t_id;
+
+	list_for_each_entry(t, &tq->tq_work_list, t_list)
+		if (t->t_id < lowest_id)
+			lowest_id = t->t_id;
+
+	RETURN(lowest_id);
+}
+
+static int
+taskq_thread(void *args)
+{
+        DECLARE_WAITQUEUE(wait, current);
+        sigset_t blocked;
+	taskqid_t id;
+        taskq_t *tq = args;
+        spl_task_t *t;
+	ENTRY;
+
+        ASSERT(tq);
+        current->flags |= PF_NOFREEZE;
+
+        sigfillset(&blocked);
+        sigprocmask(SIG_BLOCK, &blocked, NULL);
+        flush_signals(current);
+
+        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+        tq->tq_nthreads++;
+        wake_up(&tq->tq_wait_waitq);
+        set_current_state(TASK_INTERRUPTIBLE);
+
+        while (!kthread_should_stop()) {
+
+		add_wait_queue(&tq->tq_work_waitq, &wait);
+		if (list_empty(&tq->tq_pend_list)) {
+			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+			schedule();
+			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+		} else {
+			__set_current_state(TASK_RUNNING);
+		}
+
+		remove_wait_queue(&tq->tq_work_waitq, &wait);
+                if (!list_empty(&tq->tq_pend_list)) {
+                        t = list_entry(tq->tq_pend_list.next, spl_task_t, t_list);
+                        list_del_init(&t->t_list);
+			list_add_tail(&t->t_list, &tq->tq_work_list);
+                        tq->tq_nactive++;
+			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+			/* Perform the requested task */
+                        t->t_func(t->t_arg);
+
+			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+                        tq->tq_nactive--;
+			id = t->t_id;
+                        task_done(tq, t);
+
+			/* Update the lowest remaining taskqid yet to run */
+			if (tq->tq_lowest_id == id) {
+				tq->tq_lowest_id = taskq_lowest_id(tq);
+				ASSERT(tq->tq_lowest_id > id);
+			}
+
+                        wake_up_all(&tq->tq_wait_waitq);
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+        }
+
+	__set_current_state(TASK_RUNNING);
+        tq->tq_nthreads--;
+        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	RETURN(0);
+}
+
+taskq_t *
+__taskq_create(const char *name, int nthreads, pri_t pri,
+               int minalloc, int maxalloc, uint_t flags)
+{
+        taskq_t *tq;
+        struct task_struct *t;
+        int rc = 0, i, j = 0;
+        ENTRY;
+
+        ASSERT(name != NULL);
+        ASSERT(pri <= maxclsyspri);
+        ASSERT(minalloc >= 0);
+        ASSERT(maxalloc <= INT_MAX);
+        ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */
+
+        tq = kmem_alloc(sizeof(*tq), KM_SLEEP);
+        if (tq == NULL)
+                RETURN(NULL);
+
+        tq->tq_threads = kmem_alloc(nthreads * sizeof(t), KM_SLEEP);
+        if (tq->tq_threads == NULL) {
+                kmem_free(tq, sizeof(*tq));
+                RETURN(NULL);
+        }
+
+        spin_lock_init(&tq->tq_lock);
+        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+        tq->tq_name      = name;
+        tq->tq_nactive   = 0;
+	tq->tq_nthreads  = 0;
+        tq->tq_pri       = pri;
+        tq->tq_minalloc  = minalloc;
+        tq->tq_maxalloc  = maxalloc;
+	tq->tq_nalloc    = 0;
+        tq->tq_flags     = (flags | TQ_ACTIVE);
+	tq->tq_next_id   = 1;
+	tq->tq_lowest_id = 1;
+        INIT_LIST_HEAD(&tq->tq_free_list);
+        INIT_LIST_HEAD(&tq->tq_work_list);
+        INIT_LIST_HEAD(&tq->tq_pend_list);
+        init_waitqueue_head(&tq->tq_work_waitq);
+        init_waitqueue_head(&tq->tq_wait_waitq);
+
+        if (flags & TASKQ_PREPOPULATE)
+                for (i = 0; i < minalloc; i++)
+                        task_done(tq, task_alloc(tq, TQ_SLEEP | TQ_NEW));
+
+        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+        for (i = 0; i < nthreads; i++) {
+                t = kthread_create(taskq_thread, tq, "%s/%d", name, i);
+                if (t) {
+                        tq->tq_threads[i] = t;
+                        kthread_bind(t, i % num_online_cpus());
+                        set_user_nice(t, PRIO_TO_NICE(pri));
+                        wake_up_process(t);
+			j++;
+                } else {
+                        tq->tq_threads[i] = NULL;
+                        rc = 1;
+                }
+        }
+
+        /* Wait for all threads to be started before potential destroy */
+	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
+
+        if (rc) {
+                __taskq_destroy(tq);
+                tq = NULL;
+        }
+
+        RETURN(tq);
+}
+EXPORT_SYMBOL(__taskq_create);
+
+void
+__taskq_destroy(taskq_t *tq)
+{
+	spl_task_t *t;
+	int i, nthreads;
+	ENTRY;
+
+	ASSERT(tq);
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+        tq->tq_flags &= ~TQ_ACTIVE;
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* TQ_ACTIVE cleared prevents new tasks being added to pending */
+        __taskq_wait(tq);
+
+	nthreads = tq->tq_nthreads;
+	for (i = 0; i < nthreads; i++)
+		if (tq->tq_threads[i])
+			kthread_stop(tq->tq_threads[i]);
+
+        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+        while (!list_empty(&tq->tq_free_list)) {
+		t = list_entry(tq->tq_free_list.next, spl_task_t, t_list);
+	        list_del_init(&t->t_list);
+                task_free(tq, t);
+        }
+
+        ASSERT(tq->tq_nthreads == 0);
+        ASSERT(tq->tq_nalloc == 0);
+        ASSERT(list_empty(&tq->tq_free_list));
+        ASSERT(list_empty(&tq->tq_work_list));
+        ASSERT(list_empty(&tq->tq_pend_list));
+
+        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+        kmem_free(tq->tq_threads, nthreads * sizeof(spl_task_t *));
+        kmem_free(tq, sizeof(taskq_t));
+
+	EXIT;
+}
+EXPORT_SYMBOL(__taskq_destroy);
+
+int
+spl_taskq_init(void)
+{
+        ENTRY;
+
+        system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
+                                    TASKQ_PREPOPULATE);
+	if (system_taskq == NULL)
+		RETURN(1);
+
+        RETURN(0);
+}
+
+void
+spl_taskq_fini(void)
+{
+        ENTRY;
+	taskq_destroy(system_taskq);
+        EXIT;
+}
diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c
new file mode 100644
index 000000000..953c5ce7f
--- /dev/null
+++ b/module/spl/spl-thread.c
@@ -0,0 +1,135 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/thread.h>
+#include <sys/kmem.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_THREAD
+
+/*
+ * Thread interfaces
+ */
+typedef struct thread_priv_s {
+	unsigned long tp_magic;		/* Magic */
+        int tp_name_size;		/* Name size */
+        char *tp_name;			/* Name (without _thread suffix) */
+	void (*tp_func)(void *);	/* Registered function */
+	void *tp_args;			/* Args to be passed to function */
+	size_t tp_len;			/* Len to be passed to function */
+	int tp_state;			/* State to start thread at */
+	pri_t tp_pri;			/* Priority to start threat at */
+} thread_priv_t;
+
+static int
+thread_generic_wrapper(void *arg)
+{
+	thread_priv_t *tp = (thread_priv_t *)arg;
+	void (*func)(void *);
+	void *args;
+
+	ASSERT(tp->tp_magic == TP_MAGIC);
+	func = tp->tp_func;
+	args = tp->tp_args;
+	set_current_state(tp->tp_state);
+	set_user_nice((kthread_t *)get_current(), PRIO_TO_NICE(tp->tp_pri));
+	kmem_free(tp->tp_name, tp->tp_name_size);
+	kmem_free(tp, sizeof(thread_priv_t));
+
+	if (func)
+		func(args);
+
+	return 0;
+}
+
+void
+__thread_exit(void)
+{
+	ENTRY;
+	EXIT;
+	complete_and_exit(NULL, 0);
+	/* Unreachable */
+}
+EXPORT_SYMBOL(__thread_exit);
+
+/* thread_create() may block forever if it cannot create a thread or
+ * allocate memory.  This is preferable to returning a NULL which Solaris
+ * style callers likely never check for... since it can't fail. */
+kthread_t *
+__thread_create(caddr_t stk, size_t  stksize, thread_func_t func,
+		const char *name, void *args, size_t len, int *pp,
+		int state, pri_t pri)
+{
+	thread_priv_t *tp;
+	struct task_struct *tsk;
+	char *p;
+	ENTRY;
+
+	/* Option pp is simply ignored */
+	/* Variable stack size unsupported */
+	ASSERT(stk == NULL);
+
+	tp = kmem_alloc(sizeof(thread_priv_t), KM_SLEEP);
+	if (tp == NULL)
+		RETURN(NULL);
+
+	tp->tp_magic = TP_MAGIC;
+	tp->tp_name_size = strlen(name) + 1;
+
+	tp->tp_name = kmem_alloc(tp->tp_name_size, KM_SLEEP);
+        if (tp->tp_name == NULL) {
+		kmem_free(tp, sizeof(thread_priv_t));
+		RETURN(NULL);
+	}
+
+	strncpy(tp->tp_name, name, tp->tp_name_size);
+
+	/* Strip trailing "_thread" from passed name which will be the func
+	 * name since the exposed API has no parameter for passing a name.
+	 */
+	p = strstr(tp->tp_name, "_thread");
+	if (p)
+		p[0] = '\0';
+
+	tp->tp_func  = func;
+	tp->tp_args  = args;
+	tp->tp_len   = len;
+	tp->tp_state = state;
+	tp->tp_pri   = pri;
+
+	tsk = kthread_create(thread_generic_wrapper, (void *)tp, tp->tp_name);
+	if (IS_ERR(tsk)) {
+		CERROR("Failed to create thread: %ld\n", PTR_ERR(tsk));
+		RETURN(NULL);
+	}
+
+	wake_up_process(tsk);
+	RETURN((kthread_t *)tsk);
+}
+EXPORT_SYMBOL(__thread_create);
diff --git a/module/spl/spl-time.c b/module/spl/spl-time.c
new file mode 100644
index 000000000..88722afe1
--- /dev/null
+++ b/module/spl/spl-time.c
@@ -0,0 +1,92 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+
+#ifdef HAVE_MONOTONIC_CLOCK
+extern unsigned long long monotonic_clock(void);
+#endif
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_TIME
+
+void
+__gethrestime(timestruc_t *ts)
+{
+        struct timeval tv;
+
+	do_gettimeofday(&tv);
+	ts->tv_sec = tv.tv_sec;
+	ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
+}
+EXPORT_SYMBOL(__gethrestime);
+
+/* Use monotonic_clock() by default. It's faster and is available on older
+ * kernels, but few architectures have them, so we must fallback to
+ * do_posix_clock_monotonic_gettime().
+ */
+hrtime_t
+__gethrtime(void) {
+#ifdef HAVE_MONOTONIC_CLOCK
+        unsigned long long res = monotonic_clock();
+
+        /* Deal with signed/unsigned mismatch */
+        return (hrtime_t)(res & ~(1ULL << 63));
+#else
+        int64_t j = get_jiffies_64();
+
+        return j * NSEC_PER_SEC / HZ;
+#endif
+}
+EXPORT_SYMBOL(__gethrtime);
+
+/* set_normalized_timespec() API changes
+ * 2.6.0  - 2.6.15: Inline function provided by linux/time.h
+ * 2.6.16 - 2.6.25: Function prototype defined but not exported
+ * 2.6.26 - 2.6.x:  Function defined and exported
+ */
+#if !defined(HAVE_SET_NORMALIZED_TIMESPEC_INLINE) && \
+    !defined(HAVE_SET_NORMALIZED_TIMESPEC_EXPORT)
+void
+set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
+{
+	while (nsec >= NSEC_PER_SEC) {
+	        nsec -= NSEC_PER_SEC;
+	        ++sec;
+	}
+	while (nsec < 0) {
+	        nsec += NSEC_PER_SEC;
+	        --sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+EXPORT_SYMBOL(set_normalized_timespec);
+#endif
diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c
new file mode 100644
index 000000000..b19d9f1de
--- /dev/null
+++ b/module/spl/spl-vnode.c
@@ -0,0 +1,678 @@
+/*
+ *  This file is part of the SPL: Solaris Porting Layer.
+ *
+ *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory
+ *  Written by:
+ *          Brian Behlendorf <[email protected]>,
+ *          Herb Wartens <[email protected]>,
+ *          Jim Garlick <[email protected]>
+ *  UCRL-CODE-235197
+ *
+ *  This is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/vnode.h>
+
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_VNODE
+
+void *rootdir = NULL;
+EXPORT_SYMBOL(rootdir);
+
+static spl_kmem_cache_t *vn_cache;
+static spl_kmem_cache_t *vn_file_cache;
+
+static spinlock_t vn_file_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(vn_file_list);
+
+static vtype_t
+vn_get_sol_type(umode_t mode)
+{
+	if (S_ISREG(mode))
+		return VREG;
+
+	if (S_ISDIR(mode))
+		return VDIR;
+
+	if (S_ISCHR(mode))
+		return VCHR;
+
+	if (S_ISBLK(mode))
+		return VBLK;
+
+	if (S_ISFIFO(mode))
+		return VFIFO;
+
+	if (S_ISLNK(mode))
+		return VLNK;
+
+	if (S_ISSOCK(mode))
+		return VSOCK;
+
+	if (S_ISCHR(mode))
+		return VCHR;
+
+	return VNON;
+} /* vn_get_sol_type() */
+
+vnode_t *
+vn_alloc(int flag)
+{
+	vnode_t *vp;
+	ENTRY;
+
+	vp = kmem_cache_alloc(vn_cache, flag);
+	if (vp != NULL) {
+		vp->v_file = NULL;
+		vp->v_type = 0;
+	}
+
+	RETURN(vp);
+} /* vn_alloc() */
+EXPORT_SYMBOL(vn_alloc);
+
+void
+vn_free(vnode_t *vp)
+{
+	ENTRY;
+	kmem_cache_free(vn_cache, vp);
+	EXIT;
+} /* vn_free() */
+EXPORT_SYMBOL(vn_free);
+
+int
+vn_open(const char *path, uio_seg_t seg, int flags, int mode,
+	vnode_t **vpp, int x1, void *x2)
+{
+        struct file *fp;
+        struct kstat stat;
+        int rc, saved_umask = 0;
+	vnode_t *vp;
+	ENTRY;
+
+	ASSERT(flags & (FWRITE | FREAD));
+	ASSERT(seg == UIO_SYSSPACE);
+	ASSERT(vpp);
+	*vpp = NULL;
+
+	if (!(flags & FCREAT) && (flags & FWRITE))
+		flags |= FEXCL;
+
+	/* Note for filp_open() the two low bits must be remapped to mean:
+	 * 01 - read-only  -> 00 read-only
+	 * 10 - write-only -> 01 write-only
+	 * 11 - read-write -> 10 read-write
+	 */
+	flags--;
+
+	if (flags & FCREAT)
+		saved_umask = xchg(&current->fs->umask, 0);
+
+        fp = filp_open(path, flags, mode);
+
+	if (flags & FCREAT)
+		(void)xchg(&current->fs->umask, saved_umask);
+
+        if (IS_ERR(fp))
+		RETURN(-PTR_ERR(fp));
+
+        rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat);
+	if (rc) {
+		filp_close(fp, 0);
+		RETURN(-rc);
+	}
+
+	vp = vn_alloc(KM_SLEEP);
+	if (!vp) {
+		filp_close(fp, 0);
+		RETURN(ENOMEM);
+	}
+
+	mutex_enter(&vp->v_lock);
+	vp->v_type = vn_get_sol_type(stat.mode);
+	vp->v_file = fp;
+	*vpp = vp;
+	mutex_exit(&vp->v_lock);
+
+	RETURN(0);
+} /* vn_open() */
+EXPORT_SYMBOL(vn_open);
+
+int
+vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
+	  vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
+{
+	char *realpath;
+	int len, rc;
+	ENTRY;
+
+	ASSERT(vp == rootdir);
+
+	len = strlen(path) + 2;
+	realpath = kmalloc(len, GFP_KERNEL);
+	if (!realpath)
+		RETURN(ENOMEM);
+
+	(void)snprintf(realpath, len, "/%s", path);
+	rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
+	kfree(realpath);
+
+	RETURN(rc);
+} /* vn_openat() */
+EXPORT_SYMBOL(vn_openat);
+
+int
+vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
+	uio_seg_t seg, int x1, rlim64_t x2, void *x3, ssize_t *residp)
+{
+	loff_t offset;
+	mm_segment_t saved_fs;
+	struct file *fp;
+	int rc;
+	ENTRY;
+
+	ASSERT(uio == UIO_WRITE || uio == UIO_READ);
+	ASSERT(vp);
+	ASSERT(vp->v_file);
+	ASSERT(seg == UIO_SYSSPACE);
+	ASSERT(x1 == 0);
+	ASSERT(x2 == RLIM64_INFINITY);
+
+	offset = off;
+	fp = vp->v_file;
+
+	/* Writable user data segment must be briefly increased for this
+	 * process so we can use the user space read call paths to write
+	 * in to memory allocated by the kernel. */
+	saved_fs = get_fs();
+        set_fs(get_ds());
+
+	if (uio & UIO_WRITE)
+		rc = vfs_write(fp, addr, len, &offset);
+	else
+		rc = vfs_read(fp, addr, len, &offset);
+
+	set_fs(saved_fs);
+
+	if (rc < 0)
+		RETURN(-rc);
+
+	if (residp) {
+		*residp = len - rc;
+	} else {
+		if (rc != len)
+			RETURN(EIO);
+	}
+
+	RETURN(0);
+} /* vn_rdwr() */
+EXPORT_SYMBOL(vn_rdwr);
+
+int
+vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
+{
+	int rc;
+	ENTRY;
+
+	ASSERT(vp);
+	ASSERT(vp->v_file);
+
+	rc = filp_close(vp->v_file, 0);
+	vn_free(vp);
+
+	RETURN(-rc);
+} /* vn_close() */
+EXPORT_SYMBOL(vn_close);
+
+/* vn_seek() does not actually seek it only performs bounds checking on the
+ * proposed seek.  We perform minimal checking and allow vn_rdwr() to catch
+ * anything more serious. */
+int
+vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
+{
+	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+}
+EXPORT_SYMBOL(vn_seek);
+
+static struct dentry *
+vn_lookup_hash(struct nameidata *nd)
+{
+	return lookup_one_len(nd->last.name, nd->nd_dentry, nd->last.len);
+} /* lookup_hash() */
+
+static void
+vn_path_release(struct nameidata *nd)
+{
+	dput(nd->nd_dentry);
+	mntput(nd->nd_mnt);
+}
+
+/* Modified do_unlinkat() from linux/fs/namei.c, only uses exported symbols */
+int
+vn_remove(const char *path, uio_seg_t seg, int flags)
+{
+        struct dentry *dentry;
+        struct nameidata nd;
+        struct inode *inode = NULL;
+        int rc = 0;
+        ENTRY;
+
+        ASSERT(seg == UIO_SYSSPACE);
+        ASSERT(flags == RMFILE);
+
+        rc = path_lookup(path, LOOKUP_PARENT, &nd);
+        if (rc)
+                GOTO(exit, rc);
+
+        rc = -EISDIR;
+        if (nd.last_type != LAST_NORM)
+                GOTO(exit1, rc);
+
+#ifdef HAVE_INODE_I_MUTEX
+        mutex_lock_nested(&nd.nd_dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+#else
+        down(&nd.nd_dentry->d_inode->i_sem);
+#endif
+        dentry = vn_lookup_hash(&nd);
+        rc = PTR_ERR(dentry);
+        if (!IS_ERR(dentry)) {
+                /* Why not before? Because we want correct rc value */
+                if (nd.last.name[nd.last.len])
+                        GOTO(slashes, rc);
+
+                inode = dentry->d_inode;
+                if (inode)
+                        atomic_inc(&inode->i_count);
+                rc = vfs_unlink(nd.nd_dentry->d_inode, dentry);
+exit2:
+                dput(dentry);
+        }
+#ifdef HAVE_INODE_I_MUTEX
+        mutex_unlock(&nd.nd_dentry->d_inode->i_mutex);
+#else
+        up(&nd.nd_dentry->d_inode->i_sem);
+#endif
+        if (inode)
+                iput(inode);    /* truncate the inode here */
+exit1:
+        vn_path_release(&nd);
+exit:
+        RETURN(-rc);
+
+slashes:
+        rc = !dentry->d_inode ? -ENOENT :
+                S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+        GOTO(exit2, rc);
+} /* vn_remove() */
+EXPORT_SYMBOL(vn_remove);
+
+/* Modified do_rename() from linux/fs/namei.c, only uses exported symbols */
+int
+vn_rename(const char *oldname, const char *newname, int x1)
+{
+        struct dentry * old_dir, * new_dir;
+        struct dentry * old_dentry, *new_dentry;
+        struct dentry * trap;
+        struct nameidata oldnd, newnd;
+        int rc = 0;
+	ENTRY;
+
+        rc = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
+        if (rc)
+                GOTO(exit, rc);
+
+        rc = path_lookup(newname, LOOKUP_PARENT, &newnd);
+        if (rc)
+                GOTO(exit1, rc);
+
+        rc = -EXDEV;
+        if (oldnd.nd_mnt != newnd.nd_mnt)
+                GOTO(exit2, rc);
+
+        old_dir = oldnd.nd_dentry;
+        rc = -EBUSY;
+        if (oldnd.last_type != LAST_NORM)
+                GOTO(exit2, rc);
+
+        new_dir = newnd.nd_dentry;
+        if (newnd.last_type != LAST_NORM)
+                GOTO(exit2, rc);
+
+        trap = lock_rename(new_dir, old_dir);
+
+        old_dentry = vn_lookup_hash(&oldnd);
+
+        rc = PTR_ERR(old_dentry);
+        if (IS_ERR(old_dentry))
+                GOTO(exit3, rc);
+
+        /* source must exist */
+        rc = -ENOENT;
+        if (!old_dentry->d_inode)
+                GOTO(exit4, rc);
+
+        /* unless the source is a directory trailing slashes give -ENOTDIR */
+        if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+                rc = -ENOTDIR;
+                if (oldnd.last.name[oldnd.last.len])
+                        GOTO(exit4, rc);
+                if (newnd.last.name[newnd.last.len])
+                        GOTO(exit4, rc);
+        }
+
+        /* source should not be ancestor of target */
+        rc = -EINVAL;
+        if (old_dentry == trap)
+                GOTO(exit4, rc);
+
+        new_dentry = vn_lookup_hash(&newnd);
+        rc = PTR_ERR(new_dentry);
+        if (IS_ERR(new_dentry))
+                GOTO(exit4, rc);
+
+        /* target should not be an ancestor of source */
+        rc = -ENOTEMPTY;
+        if (new_dentry == trap)
+                GOTO(exit5, rc);
+
+        rc = vfs_rename(old_dir->d_inode, old_dentry,
+                        new_dir->d_inode, new_dentry);
+exit5:
+        dput(new_dentry);
+exit4:
+        dput(old_dentry);
+exit3:
+        unlock_rename(new_dir, old_dir);
+exit2:
+        vn_path_release(&newnd);
+exit1:
+        vn_path_release(&oldnd);
+exit:
+        RETURN(-rc);
+}
+EXPORT_SYMBOL(vn_rename);
+
+int
+vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
+{
+	struct file *fp;
+        struct kstat stat;
+	int rc;
+	ENTRY;
+
+	ASSERT(vp);
+	ASSERT(vp->v_file);
+	ASSERT(vap);
+
+	fp = vp->v_file;
+
+        rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat);
+	if (rc)
+		RETURN(-rc);
+
+	vap->va_type          = vn_get_sol_type(stat.mode);
+	vap->va_mode          = stat.mode;
+	vap->va_uid           = stat.uid;
+	vap->va_gid           = stat.gid;
+	vap->va_fsid          = 0;
+	vap->va_nodeid        = stat.ino;
+	vap->va_nlink         = stat.nlink;
+        vap->va_size          = stat.size;
+	vap->va_blocksize     = stat.blksize;
+	vap->va_atime.tv_sec  = stat.atime.tv_sec;
+	vap->va_atime.tv_usec = stat.atime.tv_nsec / NSEC_PER_USEC;
+	vap->va_mtime.tv_sec  = stat.mtime.tv_sec;
+	vap->va_mtime.tv_usec = stat.mtime.tv_nsec / NSEC_PER_USEC;
+	vap->va_ctime.tv_sec  = stat.ctime.tv_sec;
+	vap->va_ctime.tv_usec = stat.ctime.tv_nsec / NSEC_PER_USEC;
+	vap->va_rdev          = stat.rdev;
+	vap->va_blocks        = stat.blocks;
+
+        RETURN(0);
+}
+EXPORT_SYMBOL(vn_getattr);
+
+int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
+{
+	int datasync = 0;
+	ENTRY;
+
+	ASSERT(vp);
+	ASSERT(vp->v_file);
+
+	if (flags & FDSYNC)
+		datasync = 1;
+
+	RETURN(-file_fsync(vp->v_file, vp->v_file->f_dentry, datasync));
+} /* vn_fsync() */
+EXPORT_SYMBOL(vn_fsync);
+
+/* Function must be called while holding the vn_file_lock */
+static file_t *
+file_find(int fd)
+{
+        file_t *fp;
+
+	ASSERT(spin_is_locked(&vn_file_lock));
+
+        list_for_each_entry(fp, &vn_file_list,  f_list) {
+		if (fd == fp->f_fd) {
+			ASSERT(atomic_read(&fp->f_ref) != 0);
+                        return fp;
+		}
+	}
+
+        return NULL;
+} /* file_find() */
+
+file_t *
+vn_getf(int fd)
+{
+        struct kstat stat;
+	struct file *lfp;
+	file_t *fp;
+	vnode_t *vp;
+	int rc = 0;
+	ENTRY;
+
+	/* Already open just take an extra reference */
+	spin_lock(&vn_file_lock);
+
+	fp = file_find(fd);
+	if (fp) {
+		atomic_inc(&fp->f_ref);
+		spin_unlock(&vn_file_lock);
+		RETURN(fp);
+	}
+
+	spin_unlock(&vn_file_lock);
+
+	/* File was not yet opened create the object and setup */
+	fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
+	if (fp == NULL)
+		GOTO(out, rc);
+
+	mutex_enter(&fp->f_lock);
+
+	fp->f_fd = fd;
+	fp->f_offset = 0;
+	atomic_inc(&fp->f_ref);
+
+	lfp = fget(fd);
+	if (lfp == NULL)
+		GOTO(out_mutex, rc);
+
+	vp = vn_alloc(KM_SLEEP);
+	if (vp == NULL)
+		GOTO(out_fget, rc);
+
+        if (vfs_getattr(lfp->f_vfsmnt, lfp->f_dentry, &stat))
+		GOTO(out_vnode, rc);
+
+	mutex_enter(&vp->v_lock);
+	vp->v_type = vn_get_sol_type(stat.mode);
+	vp->v_file = lfp;
+	mutex_exit(&vp->v_lock);
+
+	fp->f_vnode = vp;
+	fp->f_file = lfp;
+
+	/* Put it on the tracking list */
+	spin_lock(&vn_file_lock);
+	list_add(&fp->f_list, &vn_file_list);
+	spin_unlock(&vn_file_lock);
+
+	mutex_exit(&fp->f_lock);
+	RETURN(fp);
+
+out_vnode:
+	vn_free(vp);
+out_fget:
+	fput(lfp);
+out_mutex:
+	mutex_exit(&fp->f_lock);
+	kmem_cache_free(vn_file_cache, fp);
+out:
+        RETURN(NULL);
+} /* getf() */
+EXPORT_SYMBOL(getf);
+
+static void releasef_locked(file_t *fp)
+{
+	ASSERT(fp->f_file);
+	ASSERT(fp->f_vnode);
+
+	/* Unlinked from list, no refs, safe to free outside mutex */
+	fput(fp->f_file);
+	vn_free(fp->f_vnode);
+
+	kmem_cache_free(vn_file_cache, fp);
+}
+
+void
+vn_releasef(int fd)
+{
+	file_t *fp;
+	ENTRY;
+
+	spin_lock(&vn_file_lock);
+	fp = file_find(fd);
+	if (fp) {
+		atomic_dec(&fp->f_ref);
+		if (atomic_read(&fp->f_ref) > 0) {
+			spin_unlock(&vn_file_lock);
+			EXIT;
+			return;
+		}
+
+	        list_del(&fp->f_list);
+		releasef_locked(fp);
+	}
+	spin_unlock(&vn_file_lock);
+
+	EXIT;
+	return;
+} /* releasef() */
+EXPORT_SYMBOL(releasef);
+
+static int
+vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+	struct vnode *vp = buf;
+
+	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	return (0);
+} /* vn_cache_constructor() */
+
+static void
+vn_cache_destructor(void *buf, void *cdrarg)
+{
+	struct vnode *vp = buf;
+
+	mutex_destroy(&vp->v_lock);
+} /* vn_cache_destructor() */
+
+static int
+vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+	file_t *fp = buf;
+
+	atomic_set(&fp->f_ref, 0);
+        mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
+	INIT_LIST_HEAD(&fp->f_list);
+
+        return (0);
+} /* file_cache_constructor() */
+
+static void
+vn_file_cache_destructor(void *buf, void *cdrarg)
+{
+	file_t *fp = buf;
+
+	mutex_destroy(&fp->f_lock);
+} /* vn_file_cache_destructor() */
+
+int
+vn_init(void)
+{
+	ENTRY;
+	vn_cache = kmem_cache_create("spl_vn_cache",
+				     sizeof(struct vnode), 64,
+	                             vn_cache_constructor,
+				     vn_cache_destructor,
+				     NULL, NULL, NULL, 0);
+
+	vn_file_cache = kmem_cache_create("spl_vn_file_cache",
+					  sizeof(file_t), 64,
+				          vn_file_cache_constructor,
+				          vn_file_cache_destructor,
+				          NULL, NULL, NULL, 0);
+	RETURN(0);
+} /* vn_init() */
+
+void
+vn_fini(void)
+{
+        file_t *fp, *next_fp;
+	int leaked = 0;
+	ENTRY;
+
+	spin_lock(&vn_file_lock);
+
+        list_for_each_entry_safe(fp, next_fp, &vn_file_list,  f_list) {
+	        list_del(&fp->f_list);
+		releasef_locked(fp);
+		leaked++;
+	}
+
+	kmem_cache_destroy(vn_file_cache);
+	vn_file_cache = NULL;
+	spin_unlock(&vn_file_lock);
+
+	if (leaked > 0)
+		CWARN("Warning %d files leaked\n", leaked);
+
+	kmem_cache_destroy(vn_cache);
+
+	EXIT;
+	return;
+} /* vn_fini() */
author	Brian Behlendorf <[email protected]>	2009-01-15 10:44:54 -0800
committer	Brian Behlendorf <[email protected]>	2009-01-15 10:44:54 -0800
commit	617d5a673cd16aa91fa9668b94cc385094fae852 (patch)
tree	37c7e043f3599d458a3aa0e763363853c298fba3 /module/spl
parent	f6a19c0d37992755ed6b1b50344047537a1efe5c (diff)