aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--include/sys/debug.h11
-rw-r--r--include/sys/kmem.h16
-rw-r--r--include/sys/mutex.h215
-rw-r--r--modules/spl/Makefile.in1
-rw-r--r--modules/spl/spl-generic.c16
-rw-r--r--modules/spl/spl-mutex.c256
-rw-r--r--modules/spl/spl-proc.c234
-rw-r--r--modules/spl/spl-taskq.c6
9 files changed, 561 insertions, 201 deletions
diff --git a/ChangeLog b/ChangeLog
index 0106bbd5e..a65d6b15d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-04-26 Brian Behlendorf <[email protected]>
+
+ * include/sys/mutex.h : Implemented a close approximation
+ of adaptive mutexes. These changes however required me to
+ export a new symbol from the kernel proper 'task_curr()'
+ which means we are now dependant on a patched kernel.
+
2008-04-24 Brian Behlendorf <[email protected]>
* : Tag spl-0.2.1
diff --git a/include/sys/debug.h b/include/sys/debug.h
index 64aa1808f..39585ba19 100644
--- a/include/sys/debug.h
+++ b/include/sys/debug.h
@@ -310,16 +310,19 @@ do { \
return RETURN__ret; \
} while (0)
-#define ENTRY \
+#define __ENTRY(subsys) \
do { \
- CDEBUG(D_TRACE, "Process entered\n"); \
+ __CDEBUG(NULL, subsys, D_TRACE, "Process entered\n"); \
} while (0)
-#define EXIT \
+#define __EXIT(subsys) \
do { \
- CDEBUG(D_TRACE, "Process leaving\n"); \
+ __CDEBUG(NULL, subsys, D_TRACE, "Process leaving\n"); \
} while(0)
+#define ENTRY __ENTRY(DEBUG_SUBSYSTEM)
+#define EXIT __EXIT(DEBUG_SUBSYSTEM)
+
extern int spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask,
const char *file, const char *fn, const int line,
const char *format1, va_list args, const char *format2, ...);
diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index 73965c58b..cc56ddd36 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -54,9 +54,9 @@ extern int kmem_warning_flag;
if (unlikely(atomic64_read(&kmem_alloc_used)>kmem_alloc_max)) \
kmem_alloc_max = atomic64_read(&kmem_alloc_used); \
\
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x)'d " \
+ __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x) = %p " \
"(%ld/%ld)\n", (int)(size), (int)(flags), \
- atomic64_read(&kmem_alloc_used), \
+ _ptr_, atomic64_read(&kmem_alloc_used), \
kmem_alloc_max); \
} \
\
@@ -70,8 +70,8 @@ extern int kmem_warning_flag;
({ \
ASSERT((ptr) || (size > 0)); \
atomic64_sub((size), &kmem_alloc_used); \
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%d)'d (%ld/%ld)\n", \
- (int)(size), atomic64_read(&kmem_alloc_used), \
+ __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%p, %d) (%ld/%ld)\n", \
+ (ptr), (int)(size), atomic64_read(&kmem_alloc_used), \
kmem_alloc_max); \
memset(ptr, 0x5a, (size)); /* Poison */ \
kfree(ptr); \
@@ -99,9 +99,9 @@ extern int kmem_warning_flag;
if (unlikely(atomic64_read(&vmem_alloc_used)>vmem_alloc_max)) \
vmem_alloc_max = atomic64_read(&vmem_alloc_used); \
\
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(%d, 0x%x)'d " \
+ __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(%d, 0x%x) = %p " \
"(%ld/%ld)\n", (int)(size), (int)(flags), \
- atomic64_read(&vmem_alloc_used), \
+ _ptr_, atomic64_read(&vmem_alloc_used), \
vmem_alloc_max); \
} \
\
@@ -116,8 +116,8 @@ extern int kmem_warning_flag;
({ \
ASSERT((ptr) || (size > 0)); \
atomic64_sub((size), &vmem_alloc_used); \
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%d)'d (%ld/%ld)\n", \
- (int)(size), atomic64_read(&vmem_alloc_used), \
+ __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%p, %d) (%ld/%ld)\n", \
+ (ptr), (int)(size), atomic64_read(&vmem_alloc_used), \
vmem_alloc_max); \
memset(ptr, 0x5a, (size)); /* Poison */ \
vfree(ptr); \
diff --git a/include/sys/mutex.h b/include/sys/mutex.h
index 045842d72..fd787bb9d 100644
--- a/include/sys/mutex.h
+++ b/include/sys/mutex.h
@@ -8,175 +8,86 @@ extern "C" {
#include <linux/module.h>
#include <linux/hardirq.h>
#include <sys/types.h>
+#include <sys/kmem.h>
-/* See the "Big Theory Statement" in solaris mutex.c.
- *
- * Spin mutexes apparently aren't needed by zfs so we assert
- * if ibc is non-zero.
- *
- * Our impementation of adaptive mutexes aren't really adaptive.
- * They go to sleep every time.
- */
+//#define DEBUG_MUTEX
+#undef DEBUG_MUTEX
#define MUTEX_DEFAULT 0
-#define MUTEX_HELD(x) (mutex_owned(x))
+#define MUTEX_SPIN 1
+#define MUTEX_ADAPTIVE 2
+
+#define MUTEX_ENTER_TOTAL 0
+#define MUTEX_ENTER_NOT_HELD 1
+#define MUTEX_ENTER_SPIN 2
+#define MUTEX_ENTER_SLEEP 3
+#define MUTEX_TRYENTER_TOTAL 4
+#define MUTEX_TRYENTER_NOT_HELD 5
+#define MUTEX_STATS_SIZE 6
#define KM_MAGIC 0x42424242
#define KM_POISON 0x84
typedef struct {
- int km_magic;
+ int32_t km_magic;
+ int16_t km_type;
+ int16_t km_name_size;
char *km_name;
struct task_struct *km_owner;
- struct semaphore km_sem;
- spinlock_t km_lock;
+ struct semaphore *km_sem;
+#ifdef DEBUG_MUTEX
+ int *km_stats;
+ struct list_head km_list;
+#endif
} kmutex_t;
-#undef mutex_init
-static __inline__ void
-mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
-{
- ENTRY;
- ASSERT(mp);
- ASSERT(ibc == NULL); /* XXX - Spin mutexes not needed */
- ASSERT(type == MUTEX_DEFAULT); /* XXX - Only default type supported */
-
- mp->km_magic = KM_MAGIC;
- spin_lock_init(&mp->km_lock);
- sema_init(&mp->km_sem, 1);
- mp->km_owner = NULL;
- mp->km_name = NULL;
-
- if (name) {
- mp->km_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
- if (mp->km_name)
- strcpy(mp->km_name, name);
- }
- EXIT;
-}
-
-#undef mutex_destroy
-static __inline__ void
-mutex_destroy(kmutex_t *mp)
-{
- ENTRY;
- ASSERT(mp);
- ASSERT(mp->km_magic == KM_MAGIC);
- spin_lock(&mp->km_lock);
-
- if (mp->km_name)
- kfree(mp->km_name);
-
- memset(mp, KM_POISON, sizeof(*mp));
- spin_unlock(&mp->km_lock);
- EXIT;
-}
+extern int mutex_spin_max;
-static __inline__ void
-mutex_enter(kmutex_t *mp)
-{
- ENTRY;
- ASSERT(mp);
- ASSERT(mp->km_magic == KM_MAGIC);
- spin_lock(&mp->km_lock);
-
- if (unlikely(in_atomic() && !current->exit_state)) {
- spin_unlock(&mp->km_lock);
- __CDEBUG_LIMIT(S_MUTEX, D_ERROR,
- "May schedule while atomic: %s/0x%08x/%d\n",
- current->comm, preempt_count(), current->pid);
- SBUG();
- }
-
- spin_unlock(&mp->km_lock);
-
- down(&mp->km_sem);
-
- spin_lock(&mp->km_lock);
- ASSERT(mp->km_owner == NULL);
- mp->km_owner = current;
- spin_unlock(&mp->km_lock);
- EXIT;
-}
-
-/* Return 1 if we acquired the mutex, else zero. */
-static __inline__ int
-mutex_tryenter(kmutex_t *mp)
-{
- int rc;
- ENTRY;
-
- ASSERT(mp);
- ASSERT(mp->km_magic == KM_MAGIC);
- spin_lock(&mp->km_lock);
-
- if (unlikely(in_atomic() && !current->exit_state)) {
- spin_unlock(&mp->km_lock);
- __CDEBUG_LIMIT(S_MUTEX, D_ERROR,
- "May schedule while atomic: %s/0x%08x/%d\n",
- current->comm, preempt_count(), current->pid);
- SBUG();
- }
-
- spin_unlock(&mp->km_lock);
- rc = down_trylock(&mp->km_sem); /* returns 0 if acquired */
- if (rc == 0) {
- spin_lock(&mp->km_lock);
- ASSERT(mp->km_owner == NULL);
- mp->km_owner = current;
- spin_unlock(&mp->km_lock);
- RETURN(1);
- }
-
- RETURN(0);
-}
-
-static __inline__ void
-mutex_exit(kmutex_t *mp)
-{
- ENTRY;
- ASSERT(mp);
- ASSERT(mp->km_magic == KM_MAGIC);
- spin_lock(&mp->km_lock);
-
- ASSERT(mp->km_owner == current);
- mp->km_owner = NULL;
- spin_unlock(&mp->km_lock);
- up(&mp->km_sem);
- EXIT;
-}
-
-/* Return 1 if mutex is held by current process, else zero. */
-static __inline__ int
-mutex_owned(kmutex_t *mp)
-{
- int rc;
- ENTRY;
+#ifdef DEBUG_MUTEX
+extern int mutex_stats[MUTEX_STATS_SIZE];
+extern struct mutex mutex_stats_lock;
+extern struct list_head mutex_stats_list;
+#define MUTEX_STAT_INC(stats, stat) ((stats)[stat]++)
+#else
+#define MUTEX_STAT_INC(stats, stat)
+#endif
- ASSERT(mp);
- ASSERT(mp->km_magic == KM_MAGIC);
- spin_lock(&mp->km_lock);
- rc = (mp->km_owner == current);
- spin_unlock(&mp->km_lock);
+int spl_mutex_init(void);
+void spl_mutex_fini(void);
- RETURN(rc);
-}
+extern void __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc);
+extern void __spl_mutex_destroy(kmutex_t *mp);
+extern int __mutex_tryenter(kmutex_t *mp);
+extern void __mutex_enter(kmutex_t *mp);
+extern void __mutex_exit(kmutex_t *mp);
+extern int __mutex_owned(kmutex_t *mp);
+extern kthread_t *__spl_mutex_owner(kmutex_t *mp);
-/* Return owner if mutex is owned, else NULL. */
-static __inline__ kthread_t *
-mutex_owner(kmutex_t *mp)
-{
- kthread_t *thr;
- ENTRY;
-
- ASSERT(mp);
- ASSERT(mp->km_magic == KM_MAGIC);
- spin_lock(&mp->km_lock);
- thr = mp->km_owner;
- spin_unlock(&mp->km_lock);
+#undef mutex_init
+#undef mutex_destroy
- RETURN(thr);
-}
+#define mutex_init(mp, name, type, ibc) \
+({ \
+ __ENTRY(S_MUTEX); \
+ if ((name) == NULL) \
+ __spl_mutex_init(mp, #mp, type, ibc); \
+ else \
+ __spl_mutex_init(mp, name, type, ibc); \
+ __EXIT(S_MUTEX); \
+})
+#define mutex_destroy(mp) \
+({ \
+ __ENTRY(S_MUTEX); \
+ __spl_mutex_destroy(mp); \
+ __EXIT(S_MUTEX); \
+})
+
+#define mutex_tryenter(mp) __mutex_tryenter(mp)
+#define mutex_enter(mp) __mutex_enter(mp)
+#define mutex_exit(mp) __mutex_exit(mp)
+#define mutex_owned(mp) __mutex_owned(mp)
+#define mutex_owner(mp) __spl_mutex_owner(mp)
+#define MUTEX_HELD(mp) mutex_owned(mp)
#ifdef __cplusplus
}
diff --git a/modules/spl/Makefile.in b/modules/spl/Makefile.in
index ff283dfd6..bd2a5f9f2 100644
--- a/modules/spl/Makefile.in
+++ b/modules/spl/Makefile.in
@@ -22,6 +22,7 @@ spl-objs += spl-kobj.o
spl-objs += spl-module.o
spl-objs += spl-generic.o
spl-objs += spl-atomic.o
+spl-objs += spl-mutex.o
splmodule := spl.ko
splmoduledir := @kmoduledir@/kernel/lib/
diff --git a/modules/spl/spl-generic.c b/modules/spl/spl-generic.c
index 1aadb990e..99497dd51 100644
--- a/modules/spl/spl-generic.c
+++ b/modules/spl/spl-generic.c
@@ -2,6 +2,7 @@
#include <sys/vmsystm.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
+#include <sys/mutex.h>
#include <sys/debug.h>
#include <sys/proc.h>
#include <linux/kmod.h>
@@ -99,21 +100,26 @@ static int __init spl_init(void)
if ((rc = kmem_init()))
GOTO(out , rc);
+ if ((rc = spl_mutex_init()))
+ GOTO(out2 , rc);
+
if ((rc = vn_init()))
- GOTO(out2, rc);
+ GOTO(out3, rc);
if ((rc = proc_init()))
- GOTO(out3, rc);
+ GOTO(out4, rc);
if ((rc = set_hostid()))
- GOTO(out4, rc = -EADDRNOTAVAIL);
+ GOTO(out5, rc = -EADDRNOTAVAIL);
printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION);
RETURN(rc);
-out4:
+out5:
proc_fini();
-out3:
+out4:
vn_fini();
+out3:
+ spl_mutex_fini();
out2:
kmem_fini();
out:
diff --git a/modules/spl/spl-mutex.c b/modules/spl/spl-mutex.c
new file mode 100644
index 000000000..06a8f316b
--- /dev/null
+++ b/modules/spl/spl-mutex.c
@@ -0,0 +1,256 @@
+#include <sys/mutex.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_MUTEX
+
+/* Mutex implementation based on those found in Solaris. This means
+ * they the MUTEX_DEFAULT type is an adaptive mutex. When calling
+ * mutex_enter() your process will spin waiting for the lock if it's
+ * likely the lock will be free'd shortly. If it looks like the
+ * lock will be held for a longer time we schedule and sleep waiting
+ * for it. This determination is made by checking if the holder of
+ * the lock is currently running on cpu or sleeping waiting to be
+ * scheduled. If the holder is currently running it's likely the
+ * lock will be shortly dropped.
+ *
+ * XXX: This is basically a rough implementation to see if this
+ * helps our performance. If it does a more careful implementation
+ * should be done, perhaps in assembly.
+ */
+
+/* 0: Never spin when trying to aquire lock
+ * -1: Spin until aquired or holder yeilds without dropping lock
+ * 1-MAX_INT: Spin for N attempts before sleeping for lock
+ */
+int mutex_spin_max = 100;
+
+#ifdef DEBUG_MUTEX
+int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
+DEFINE_MUTEX(mutex_stats_lock);
+LIST_HEAD(mutex_stats_list);
+#endif
+
+void
+__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
+{
+ ASSERT(mp);
+ ASSERT(name);
+ ASSERT(ibc == NULL);
+ ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
+
+ mp->km_magic = KM_MAGIC;
+ mp->km_owner = NULL;
+ mp->km_name = NULL;
+ mp->km_name_size = strlen(name) + 1;
+
+ switch (type) {
+ case MUTEX_DEFAULT:
+ mp->km_type = MUTEX_ADAPTIVE;
+ break;
+ case MUTEX_SPIN:
+ case MUTEX_ADAPTIVE:
+ mp->km_type = type;
+ break;
+ default:
+ SBUG();
+ }
+
+ /* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
+ mp->km_sem = kmem_alloc(sizeof(struct semaphore), KM_SLEEP);
+ if (mp->km_sem == NULL)
+ return;
+
+ mp->km_name = kmem_alloc(mp->km_name_size, KM_SLEEP);
+ if (mp->km_name == NULL) {
+ kmem_free(mp->km_sem, sizeof(struct semaphore));
+ return;
+ }
+
+ sema_init(mp->km_sem, 1);
+ strcpy(mp->km_name, name);
+
+#ifdef DEBUG_MUTEX
+ mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, KM_SLEEP);
+ if (mp->km_stats == NULL) {
+ kmem_free(mp->km_name, mp->km_name_size);
+ kmem_free(mp->km_sem, sizeof(struct semaphore));
+ return;
+ }
+
+ mutex_lock(&mutex_stats_lock);
+ list_add_tail(&mp->km_list, &mutex_stats_list);
+ mutex_unlock(&mutex_stats_lock);
+#endif
+}
+EXPORT_SYMBOL(__spl_mutex_init);
+
+void
+__spl_mutex_destroy(kmutex_t *mp)
+{
+ ASSERT(mp);
+ ASSERT(mp->km_magic == KM_MAGIC);
+
+#ifdef DEBUG_MUTEX
+ mutex_lock(&mutex_stats_lock);
+ list_del_init(&mp->km_list);
+ mutex_unlock(&mutex_stats_lock);
+
+ kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
+#endif
+ kmem_free(mp->km_name, mp->km_name_size);
+ kmem_free(mp->km_sem, sizeof(struct semaphore));
+
+ memset(mp, KM_POISON, sizeof(*mp));
+}
+EXPORT_SYMBOL(__spl_mutex_destroy);
+
+/* Return 1 if we acquired the mutex, else zero. */
+int
+__mutex_tryenter(kmutex_t *mp)
+{
+ int rc;
+ ENTRY;
+
+ ASSERT(mp);
+ ASSERT(mp->km_magic == KM_MAGIC);
+ MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
+
+ rc = down_trylock(mp->km_sem);
+ if (rc == 0) {
+ ASSERT(mp->km_owner == NULL);
+ mp->km_owner = current;
+ MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
+ }
+
+ RETURN(!rc);
+}
+EXPORT_SYMBOL(__mutex_tryenter);
+
+static void
+mutex_enter_adaptive(kmutex_t *mp)
+{
+ struct task_struct *owner;
+ int count = 0;
+
+ /* Lock is not held so we expect to aquire the lock */
+ if ((owner = mp->km_owner) == NULL) {
+ down(mp->km_sem);
+ MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
+ } else {
+ /* The lock is held by a currently running task which
+ * we expect will drop the lock before leaving the
+ * head of the runqueue. So the ideal thing to do
+ * is spin until we aquire the lock and avoid a
+ * context switch. However it is also possible the
+ * task holding the lock yields the processor with
+ * out dropping lock. In which case, we know it's
+ * going to be a while so we stop spinning and go
+ * to sleep waiting for the lock to be available.
+ * This should strike the optimum balance between
+ * spinning and sleeping waiting for a lock.
+ */
+ while (task_curr(owner) && (count <= mutex_spin_max)) {
+ if (down_trylock(mp->km_sem) == 0) {
+ MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+ GOTO(out, count);
+ }
+ count++;
+ }
+
+ /* The lock is held by a sleeping task so it's going to
+ * cost us minimally one context switch. We might as
+ * well sleep and yield the processor to other tasks.
+ */
+ down(mp->km_sem);
+ MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
+ }
+out:
+ MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
+}
+
+void
+__mutex_enter(kmutex_t *mp)
+{
+ ENTRY;
+ ASSERT(mp);
+ ASSERT(mp->km_magic == KM_MAGIC);
+
+ switch (mp->km_type) {
+ case MUTEX_SPIN:
+ while (down_trylock(mp->km_sem));
+ MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+ MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+ break;
+ case MUTEX_ADAPTIVE:
+ mutex_enter_adaptive(mp);
+ break;
+ }
+
+ ASSERT(mp->km_owner == NULL);
+ mp->km_owner = current;
+
+ EXIT;
+}
+EXPORT_SYMBOL(__mutex_enter);
+
+void
+__mutex_exit(kmutex_t *mp)
+{
+ ENTRY;
+ ASSERT(mp);
+ ASSERT(mp->km_magic == KM_MAGIC);
+ ASSERT(mp->km_owner == current);
+ mp->km_owner = NULL;
+ up(mp->km_sem);
+ EXIT;
+}
+EXPORT_SYMBOL(__mutex_exit);
+
+/* Return 1 if mutex is held by current process, else zero. */
+int
+__mutex_owned(kmutex_t *mp)
+{
+ ENTRY;
+ ASSERT(mp);
+ ASSERT(mp->km_magic == KM_MAGIC);
+ RETURN(mp->km_owner == current);
+}
+EXPORT_SYMBOL(__mutex_owned);
+
+/* Return owner if mutex is owned, else NULL. */
+kthread_t *
+__spl_mutex_owner(kmutex_t *mp)
+{
+ ENTRY;
+ ASSERT(mp);
+ ASSERT(mp->km_magic == KM_MAGIC);
+ RETURN(mp->km_owner);
+}
+EXPORT_SYMBOL(__spl_mutex_owner);
+
+int
+spl_mutex_init(void)
+{
+ ENTRY;
+ RETURN(0);
+}
+
+void
+spl_mutex_fini(void)
+{
+ ENTRY;
+#ifdef DEBUG_MUTEX
+ ASSERT(list_empty(&mutex_stats_list));
+#endif
+ EXIT;
+}
+
diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c
index 94dd937a1..64423c186 100644
--- a/modules/spl/spl-proc.c
+++ b/modules/spl/spl-proc.c
@@ -3,8 +3,10 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/sysctl.h>
+#include <linux/seq_file.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
+#include <sys/mutex.h>
#include <sys/debug.h>
#include "config.h"
@@ -18,10 +20,17 @@ static struct ctl_table_header *spl_header = NULL;
static unsigned long table_min = 0;
static unsigned long table_max = ~0;
-#define CTL_SPL 0x87
+#define CTL_SPL 0x87
+#define CTL_SPL_DEBUG 0x88
+#define CTL_SPL_MUTEX 0x89
+#define CTL_SPL_KMEM 0x90
+
enum {
CTL_VERSION = 1, /* Version */
- CTL_DEBUG_SUBSYS, /* Debug subsystem */
+ CTL_HOSTID, /* Host id reported by /usr/bin/hostid */
+ CTL_HW_SERIAL, /* Hardware serial number from hostid */
+
+ CTL_DEBUG_SUBSYS, /* Debug subsystem */
CTL_DEBUG_MASK, /* Debug mask */
CTL_DEBUG_PRINTK, /* Force all messages to console */
CTL_DEBUG_MB, /* Debug buffer size */
@@ -31,19 +40,23 @@ enum {
CTL_DEBUG_PATH, /* Dump log location */
CTL_DEBUG_DUMP, /* Dump debug buffer to file */
CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */
- CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */
+ CTL_DEBUG_STACK_SIZE, /* Max observed stack size */
+
+ CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */
CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */
CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */
CTL_CONSOLE_BACKOFF, /* Delay increase factor */
- CTL_STACK_SIZE, /* Max observed stack size */
+
#ifdef DEBUG_KMEM
CTL_KMEM_KMEMUSED, /* Crrently alloc'd kmem bytes */
CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */
CTL_KMEM_VMEMUSED, /* Currently alloc'd vmem bytes */
CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */
#endif
- CTL_HOSTID, /* Host id reported by /usr/bin/hostid */
- CTL_HW_SERIAL, /* Hardware serial number from hostid */
+
+ CTL_MUTEX_STATS, /* Global mutex statistics */
+ CTL_MUTEX_STATS_PER, /* Per mutex statistics */
+ CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */
};
static int
@@ -368,21 +381,107 @@ proc_dohostid(struct ctl_table *table, int write, struct file *filp,
RETURN(rc);
}
-static struct ctl_table spl_table[] = {
- /* NB No .strategy entries have been provided since
- * sysctl(8) prefers to go via /proc for portability.
- */
- {
- .ctl_name = CTL_VERSION,
- .procname = "version",
- .data = spl_version,
- .maxlen = sizeof(spl_version),
- .mode = 0444,
- .proc_handler = &proc_dostring,
- },
+#ifdef DEBUG_MUTEX
+static void
+mutex_seq_show_headers(struct seq_file *f)
+{
+ seq_printf(f, "%-36s %-4s %-16s\t"
+ "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n",
+ "name", "type", "owner");
+}
+
+static int
+mutex_seq_show(struct seq_file *f, void *p)
+{
+ kmutex_t *mp = p;
+ char t = 'X';
+ int i;
+
+ ASSERT(mp->km_magic == KM_MAGIC);
+
+ switch (mp->km_type) {
+ case MUTEX_DEFAULT: t = 'D'; break;
+ case MUTEX_SPIN: t = 'S'; break;
+ case MUTEX_ADAPTIVE: t = 'A'; break;
+ default:
+ SBUG();
+ }
+ seq_printf(f, "%-36s %c ", mp->km_name, t);
+ if (mp->km_owner)
+ seq_printf(f, "%p\t", mp->km_owner);
+ else
+ seq_printf(f, "%-16s\t", "<not held>");
+
+ for (i = 0; i < MUTEX_STATS_SIZE; i++)
+ seq_printf(f, "%d%c", mp->km_stats[i],
+ (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t');
+
+ return 0;
+}
+
+static void *
+mutex_seq_start(struct seq_file *f, loff_t *pos)
+{
+ struct list_head *p;
+ loff_t n = *pos;
+ ENTRY;
+
+ mutex_lock(&mutex_stats_lock);
+ if (!n)
+ mutex_seq_show_headers(f);
+
+ p = mutex_stats_list.next;
+ while (n--) {
+ p = p->next;
+ if (p == &mutex_stats_list)
+ RETURN(NULL);
+ }
+
+ RETURN(list_entry(p, kmutex_t, km_list));
+}
+
+static void *
+mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+ kmutex_t *mp = p;
+ ENTRY;
+
+ ++*pos;
+ RETURN((mp->km_list.next == &mutex_stats_list) ?
+ NULL : list_entry(mp->km_list.next, kmutex_t, km_list));
+}
+
+static void
+mutex_seq_stop(struct seq_file *f, void *v)
+{
+ mutex_unlock(&mutex_stats_lock);
+}
+
+static struct seq_operations mutex_seq_ops = {
+ .show = mutex_seq_show,
+ .start = mutex_seq_start,
+ .next = mutex_seq_next,
+ .stop = mutex_seq_stop,
+};
+
+static int
+proc_mutex_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &mutex_seq_ops);
+}
+
+static struct file_operations proc_mutex_operations = {
+ .open = proc_mutex_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* DEBUG_MUTEX */
+
+static struct ctl_table spl_debug_table[] = {
{
.ctl_name = CTL_DEBUG_SUBSYS,
- .procname = "debug_subsystem",
+ .procname = "subsystem",
.data = &spl_debug_subsys,
.maxlen = sizeof(unsigned long),
.mode = 0644,
@@ -390,7 +489,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_MASK,
- .procname = "debug_mask",
+ .procname = "mask",
.data = &spl_debug_mask,
.maxlen = sizeof(unsigned long),
.mode = 0644,
@@ -398,7 +497,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_PRINTK,
- .procname = "debug_printk",
+ .procname = "printk",
.data = &spl_debug_printk,
.maxlen = sizeof(unsigned long),
.mode = 0644,
@@ -406,13 +505,13 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_MB,
- .procname = "debug_mb",
+ .procname = "mb",
.mode = 0644,
.proc_handler = &proc_debug_mb,
},
{
.ctl_name = CTL_DEBUG_BINARY,
- .procname = "debug_binary",
+ .procname = "binary",
.data = &spl_debug_binary,
.maxlen = sizeof(int),
.mode = 0644,
@@ -436,7 +535,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_PATH,
- .procname = "debug_path",
+ .procname = "path",
.data = spl_debug_file_path,
.maxlen = sizeof(spl_debug_file_path),
.mode = 0644,
@@ -444,7 +543,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_DUMP,
- .procname = "debug_dump",
+ .procname = "dump",
.mode = 0200,
.proc_handler = &proc_dump_kernel,
},
@@ -483,14 +582,40 @@ static struct ctl_table spl_table[] = {
.proc_handler = &proc_console_backoff,
},
{
- .ctl_name = CTL_STACK_SIZE,
+ .ctl_name = CTL_DEBUG_STACK_SIZE,
.procname = "stack_max",
.data = &spl_debug_stack,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
+ {0},
+};
+
+#ifdef DEBUG_MUTEX
+static struct ctl_table spl_mutex_table[] = {
+ {
+ .ctl_name = CTL_MUTEX_STATS,
+ .procname = "stats",
+ .data = &mutex_stats,
+ .maxlen = sizeof(int) * MUTEX_STATS_SIZE,
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_MUTEX_SPIN_MAX,
+ .procname = "spin_max",
+ .data = &mutex_spin_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {0},
+};
+#endif /* DEBUG_MUTEX */
+
#ifdef DEBUG_KMEM
+static struct ctl_table spl_kmem_table[] = {
{
.ctl_name = CTL_KMEM_KMEMUSED,
.procname = "kmem_used",
@@ -527,7 +652,22 @@ static struct ctl_table spl_table[] = {
.mode = 0444,
.proc_handler = &proc_doulongvec_minmax,
},
-#endif
+ {0},
+};
+#endif /* DEBUG_MUTEX */
+
+static struct ctl_table spl_table[] = {
+ /* NB No .strategy entries have been provided since
+ * sysctl(8) prefers to go via /proc for portability.
+ */
+ {
+ .ctl_name = CTL_VERSION,
+ .procname = "version",
+ .data = spl_version,
+ .maxlen = sizeof(spl_version),
+ .mode = 0444,
+ .proc_handler = &proc_dostring,
+ },
{
.ctl_name = CTL_HOSTID,
.procname = "hostid",
@@ -544,10 +684,32 @@ static struct ctl_table spl_table[] = {
.mode = 0444,
.proc_handler = &proc_dostring,
},
+ {
+ .ctl_name = CTL_SPL_DEBUG,
+ .procname = "debug",
+ .mode = 0555,
+ .child = spl_debug_table,
+ },
+#ifdef DEBUG_MUTEX
+ {
+ .ctl_name = CTL_SPL_MUTEX,
+ .procname = "mutex",
+ .mode = 0555,
+ .child = spl_mutex_table,
+ },
+#endif
+#ifdef DEBUG_KMEM
+ {
+ .ctl_name = CTL_SPL_KMEM,
+ .procname = "kmem",
+ .mode = 0555,
+ .child = spl_kmem_table,
+ },
+#endif
{ 0 },
};
-static struct ctl_table spl_dir_table[] = {
+static struct ctl_table spl_dir[] = {
{
.ctl_name = CTL_SPL,
.procname = "spl",
@@ -563,9 +725,22 @@ proc_init(void)
ENTRY;
#ifdef CONFIG_SYSCTL
- spl_header = register_sysctl_table(spl_dir_table, 0);
+ spl_header = register_sysctl_table(spl_dir, 0);
if (spl_header == NULL)
RETURN(-EUNATCH);
+
+#ifdef DEBUG_MUTEX
+ {
+ struct proc_dir_entry *entry = create_proc_entry("mutex_stats",
+ 0444, NULL);
+ if (entry) {
+ entry->proc_fops = &proc_mutex_operations;
+ } else {
+ unregister_sysctl_table(spl_header);
+ RETURN(-EUNATCH);
+ }
+ }
+#endif /* DEBUG_MUTEX */
#endif
RETURN(0);
}
@@ -577,6 +752,7 @@ proc_fini(void)
#ifdef CONFIG_SYSCTL
ASSERT(spl_header != NULL);
+ remove_proc_entry("mutex_stats", NULL);
unregister_sysctl_table(spl_header);
#endif
EXIT;
diff --git a/modules/spl/spl-taskq.c b/modules/spl/spl-taskq.c
index ad9be695b..70deb0aea 100644
--- a/modules/spl/spl-taskq.c
+++ b/modules/spl/spl-taskq.c
@@ -106,7 +106,7 @@ task_done(taskq_t *tq, task_t *t)
t->t_id = 0;
t->t_func = NULL;
t->t_arg = NULL;
- list_add(&t->t_list, &tq->tq_free_list);
+ list_add_tail(&t->t_list, &tq->tq_free_list);
} else {
task_free(tq, t);
}
@@ -209,7 +209,7 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
spin_lock(&t->t_lock);
- list_add(&t->t_list, &tq->tq_pend_list);
+ list_add_tail(&t->t_list, &tq->tq_pend_list);
t->t_id = rc = tq->tq_next_id;
tq->tq_next_id++;
t->t_func = func;
@@ -282,7 +282,7 @@ taskq_thread(void *args)
if (!list_empty(&tq->tq_pend_list)) {
t = list_entry(tq->tq_pend_list.next, task_t, t_list);
list_del_init(&t->t_list);
- list_add(&t->t_list, &tq->tq_work_list);
+ list_add_tail(&t->t_list, &tq->tq_work_list);
tq->tq_nactive++;
spin_unlock_irq(&tq->tq_lock);