2 files changed, 38 insertions, 429 deletions
diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c
index f0389f5d1..0af74571d 100644
--- a/module/spl/spl-mutex.c
+++ b/module/spl/spl-mutex.c
@@ -1,7 +1,7 @@
 /*
  *  This file is part of the SPL: Solaris Porting Layer.
  *
- *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Copyright (c) 2009 Lawrence Livermore National Security, LLC.
  *  Produced at Lawrence Livermore National Laboratory
  *  Written by:
  *          Brian Behlendorf <[email protected]>,
@@ -32,277 +32,46 @@
 
 #define DEBUG_SUBSYSTEM S_MUTEX
 
-/* Mutex implementation based on those found in Solaris.  This means
- * they the MUTEX_DEFAULT type is an adaptive mutex.  When calling
- * mutex_enter() your process will spin waiting for the lock if it's
- * likely the lock will be free'd shortly.  If it looks like the
- * lock will be held for a longer time we schedule and sleep waiting
- * for it.  This determination is made by checking if the holder of
- * the lock is currently running on cpu or sleeping waiting to be
- * scheduled.  If the holder is currently running it's likely the
- * lock will be shortly dropped.
+/*
+ * While a standard mutex implementation has been available in the kernel
+ * for quite some time.  It was not until 2.6.29 and latter kernels that
+ * adaptive mutexs were embraced and integrated with the scheduler.  This
+ * brought a significant performance improvement, but just as importantly
+ * it added a lock owner to the generic mutex outside CONFIG_DEBUG_MUTEXES
+ * builds.  This is critical for correctly supporting the mutex_owner()
+ * Solaris primitive.  When the owner is available we use a pure Linux
+ * mutex implementation.  When the owner is not available we still use
+ * Linux mutexs as a base but also reserve space for an owner field right
+ * after the mutex structure.
  *
- * XXX: This is basically a rough implementation to see if this
- * helps our performance.  If it does a more careful implementation
- * should be done, perhaps in assembly.
+ * In the case when HAVE_MUTEX_OWNER is not defined your code may
+ * still me able to leverage adaptive mutexs.  As long as the task_curr()
+ * symbol is exported this code will provide a poor mans adaptive mutex
+ * implementation.  However, this is not required and if the symbol is
+ * unavailable we provide a standard mutex.
  */
 
-/*  0:         Never spin when trying to aquire lock
- * -1:         Spin until aquired or holder yeilds without dropping lock
+#ifndef HAVE_MUTEX_OWNER
+#ifdef HAVE_TASK_CURR
+/*
+ * mutex_spin_max = { 0, -1, 1-MAX_INT }
+ *  0:         Never spin when trying to acquire lock
+ * -1:         Spin until acquired or holder yields without dropping lock
  *  1-MAX_INT: Spin for N attempts before sleeping for lock
  */
 int mutex_spin_max = 0;
-
-#ifdef DEBUG_MUTEX
-int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
-spinlock_t mutex_stats_lock;
-struct list_head mutex_stats_list;
-#endif
-
-int
-__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
-{
-	int flags = KM_SLEEP;
-
-	ASSERT(mp);
-	ASSERT(name);
-	ASSERT(ibc == NULL);
-
-	mp->km_name = NULL;
-	mp->km_name_size = strlen(name) + 1;
-
-	switch (type) {
-		case MUTEX_DEFAULT:
-			mp->km_type = MUTEX_ADAPTIVE;
-			break;
-		case MUTEX_SPIN:
-		case MUTEX_ADAPTIVE:
-			mp->km_type = type;
-			break;
-		default:
-			SBUG();
-	}
-
-	/* We may be called when there is a non-zero preempt_count or
-	 * interrupts are disabled is which case we must not sleep.
-	 */
-        if (current_thread_info()->preempt_count || irqs_disabled())
-		flags = KM_NOSLEEP;
-
-	/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
-	mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags);
-	if (mp->km_sem == NULL)
-		return -ENOMEM;
-
-	mp->km_name = kmem_alloc(mp->km_name_size, flags);
-	if (mp->km_name == NULL) {
-		kmem_free(mp->km_sem, sizeof(struct semaphore));
-		return -ENOMEM;
-	}
-
-	sema_init(mp->km_sem, 1);
-	strncpy(mp->km_name, name, mp->km_name_size);
-
-#ifdef DEBUG_MUTEX
-	mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags);
-        if (mp->km_stats == NULL) {
-		kmem_free(mp->km_name, mp->km_name_size);
-		kmem_free(mp->km_sem, sizeof(struct semaphore));
-		return -ENOMEM;
-	}
-
-	/* XXX - This appears to be a much more contended lock than I
-	 * would have expected.  To run with this debugging enabled and
-	 * get reasonable performance we may need to be more clever and
-	 * do something like hash the mutex ptr on to one of several
-	 * lists to ease this single point of contention.
-	 */
-	spin_lock(&mutex_stats_lock);
-	list_add_tail(&mp->km_list, &mutex_stats_list);
-	spin_unlock(&mutex_stats_lock);
-#endif
-	mp->km_magic = KM_MAGIC;
-	mp->km_owner = NULL;
-
-	return 0;
-}
-EXPORT_SYMBOL(__spl_mutex_init);
-
-void
-__spl_mutex_destroy(kmutex_t *mp)
-{
-	ASSERT(mp);
-	ASSERT(mp->km_magic == KM_MAGIC);
-
-#ifdef DEBUG_MUTEX
-	spin_lock(&mutex_stats_lock);
-	list_del_init(&mp->km_list);
-	spin_unlock(&mutex_stats_lock);
-
-	kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
-#endif
-	kmem_free(mp->km_name, mp->km_name_size);
-	kmem_free(mp->km_sem, sizeof(struct semaphore));
-
-	memset(mp, KM_POISON, sizeof(*mp));
-}
-EXPORT_SYMBOL(__spl_mutex_destroy);
-
-/* Return 1 if we acquired the mutex, else zero.  */
-int
-__mutex_tryenter(kmutex_t *mp)
-{
-	int rc;
-	ENTRY;
-
-	ASSERT(mp);
-	ASSERT(mp->km_magic == KM_MAGIC);
-	MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
-	MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
-
-	rc = down_trylock(mp->km_sem);
-	if (rc == 0) {
-		ASSERT(mp->km_owner == NULL);
-		mp->km_owner = current;
-		MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
-		MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
-	}
-
-	RETURN(!rc);
-}
-EXPORT_SYMBOL(__mutex_tryenter);
-
-#ifndef HAVE_TASK_CURR
-#define task_curr(owner)                0
-#endif
-
-
-static void
-mutex_enter_adaptive(kmutex_t *mp)
-{
-	struct task_struct *owner;
-	int count = 0;
-
-	/* Lock is not held so we expect to aquire the lock */
-	if ((owner = mp->km_owner) == NULL) {
-		down(mp->km_sem);
-		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
-		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
-	} else {
-		/* The lock is held by a currently running task which
-		 * we expect will drop the lock before leaving the
-		 * head of the runqueue.  So the ideal thing to do
-		 * is spin until we aquire the lock and avoid a
-		 * context switch.  However it is also possible the
-		 * task holding the lock yields the processor with
-		 * out dropping lock.  In which case, we know it's
-		 * going to be a while so we stop spinning and go
-		 * to sleep waiting for the lock to be available.
-		 * This should strike the optimum balance between
-		 * spinning and sleeping waiting for a lock.
-		 */
-		while (task_curr(owner) && (count <= mutex_spin_max)) {
-			if (down_trylock(mp->km_sem) == 0) {
-				MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
-				MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
-				GOTO(out, count);
-			}
-			count++;
-		}
-
-		/* The lock is held by a sleeping task so it's going to
-		 * cost us minimally one context switch.  We might as
-		 * well sleep and yield the processor to other tasks.
-		 */
-		down(mp->km_sem);
-		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
-		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
-	}
-out:
-	MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
-	MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
-}
-
-void
-__mutex_enter(kmutex_t *mp)
-{
-	ENTRY;
-	ASSERT(mp);
-	ASSERT(mp->km_magic == KM_MAGIC);
-
-	switch (mp->km_type) {
-		case MUTEX_SPIN:
-			while (down_trylock(mp->km_sem));
-			MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
-			MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
-			break;
-		case MUTEX_ADAPTIVE:
-			mutex_enter_adaptive(mp);
-			break;
-	}
-
-	ASSERT(mp->km_owner == NULL);
-	mp->km_owner = current;
-
-	EXIT;
-}
-EXPORT_SYMBOL(__mutex_enter);
-
-void
-__mutex_exit(kmutex_t *mp)
-{
-	ENTRY;
-	ASSERT(mp);
-	ASSERT(mp->km_magic == KM_MAGIC);
-	ASSERT(mp->km_owner == current);
-	mp->km_owner = NULL;
-	up(mp->km_sem);
-	EXIT;
-}
-EXPORT_SYMBOL(__mutex_exit);
-
-/* Return 1 if mutex is held by current process, else zero.  */
-int
-__mutex_owned(kmutex_t *mp)
-{
-	ENTRY;
-	ASSERT(mp);
-	ASSERT(mp->km_magic == KM_MAGIC);
-	RETURN(mp->km_owner == current);
-}
-EXPORT_SYMBOL(__mutex_owned);
-
-/* Return owner if mutex is owned, else NULL.  */
-kthread_t *
-__spl_mutex_owner(kmutex_t *mp)
-{
-	ENTRY;
-	ASSERT(mp);
-	ASSERT(mp->km_magic == KM_MAGIC);
-	RETURN(mp->km_owner);
-}
-EXPORT_SYMBOL(__spl_mutex_owner);
+module_param(mutex_spin_max, int, 0644);
+MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to acquire lock");
 
 int
-spl_mutex_init(void)
+spl_mutex_spin_max(void)
 {
-	ENTRY;
-#ifdef DEBUG_MUTEX
-	spin_lock_init(&mutex_stats_lock);
-        INIT_LIST_HEAD(&mutex_stats_list);
-#endif
-	RETURN(0);
+        return mutex_spin_max;
 }
+EXPORT_SYMBOL(spl_mutex_spin_max);
 
-void
-spl_mutex_fini(void)
-{
-        ENTRY;
-#ifdef DEBUG_MUTEX
-	ASSERT(list_empty(&mutex_stats_list));
-#endif
-        EXIT;
-}
+#endif /* HAVE_TASK_CURR */
+#endif /* !HAVE_MUTEX_OWNER */
 
-module_param(mutex_spin_max, int, 0644);
-MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock");
+int spl_mutex_init(void) { return 0; }
+void spl_mutex_fini(void) { }
diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c
index 5dd7884f6..690f2991e 100644
--- a/module/spl/spl-proc.c
+++ b/module/spl/spl-proc.c
@@ -41,12 +41,8 @@ static unsigned long table_max = ~0;
 static struct ctl_table_header *spl_header = NULL;
 #endif /* CONFIG_SYSCTL */
 
-#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
+#if defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
 static struct proc_dir_entry *proc_spl = NULL;
-#ifdef DEBUG_MUTEX
-static struct proc_dir_entry *proc_spl_mutex = NULL;
-static struct proc_dir_entry *proc_spl_mutex_stats = NULL;
-#endif /* DEBUG_MUTEX */
 #ifdef DEBUG_KMEM
 static struct proc_dir_entry *proc_spl_kmem = NULL;
 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
@@ -54,7 +50,7 @@ static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
 #ifdef DEBUG_KSTAT
 struct proc_dir_entry *proc_spl_kstat = NULL;
 #endif /* DEBUG_KSTAT */
-#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
+#endif /* DEBUG_KMEM || DEBUG_KSTAT */
 
 #ifdef HAVE_CTL_UNNUMBERED
 
@@ -105,10 +101,6 @@ struct proc_dir_entry *proc_spl_kstat = NULL;
 #define CTL_KMEM_ALLOC_FAILED	CTL_UNNUMBERED /* Cache allocations failed */
 #endif
 
-#define CTL_MUTEX_STATS		CTL_UNNUMBERED /* Global mutex statistics */
-#define CTL_MUTEX_STATS_PER	CTL_UNNUMBERED /* Per mutex statistics */
-#define CTL_MUTEX_SPIN_MAX	CTL_UNNUMBERED /* Max mutex spin iterations */
-
 #else /* HAVE_CTL_UNNUMBERED */
 
 enum {
@@ -159,10 +151,6 @@ enum {
 	CTL_KMEM_VMEMUSED,		/* Alloc'd vmem bytes */
 	CTL_KMEM_VMEMMAX,		/* Max alloc'd by vmem bytes */
 #endif
-
-	CTL_MUTEX_STATS,		/* Global mutex statistics */
-	CTL_MUTEX_STATS_PER,		/* Per mutex statistics */
-	CTL_MUTEX_SPIN_MAX,		/* Maximum mutex spin iterations */
 };
 #endif /* HAVE_CTL_UNNUMBERED */
 
@@ -589,103 +577,6 @@ proc_dofreemem(struct ctl_table *table, int write, struct file *filp,
         RETURN(rc);
 }
 
-#ifdef DEBUG_MUTEX
-static void
-mutex_seq_show_headers(struct seq_file *f)
-{
-        seq_printf(f, "%-36s %-4s %-16s\t"
-                   "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n",
-		   "name", "type", "owner");
-}
-
-static int
-mutex_seq_show(struct seq_file *f, void *p)
-{
-        kmutex_t *mp = p;
-	char t = 'X';
-        int i;
-
-	ASSERT(mp->km_magic == KM_MAGIC);
-
-	switch (mp->km_type) {
-		case MUTEX_DEFAULT:	t = 'D';	break;
-		case MUTEX_SPIN:	t = 'S';	break;
-		case MUTEX_ADAPTIVE:	t = 'A';	break;
-		default:
-			SBUG();
-	}
-        seq_printf(f, "%-36s %c    ", mp->km_name, t);
-	if (mp->km_owner)
-                seq_printf(f, "%p\t", mp->km_owner);
-	else
-                seq_printf(f, "%-16s\t", "<not held>");
-
-        for (i = 0; i < MUTEX_STATS_SIZE; i++)
-                seq_printf(f, "%d%c", mp->km_stats[i],
-                           (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t');
-
-        return 0;
-}
-
-static void *
-mutex_seq_start(struct seq_file *f, loff_t *pos)
-{
-        struct list_head *p;
-        loff_t n = *pos;
-        ENTRY;
-
-	spin_lock(&mutex_stats_lock);
-        if (!n)
-                mutex_seq_show_headers(f);
-
-        p = mutex_stats_list.next;
-        while (n--) {
-                p = p->next;
-                if (p == &mutex_stats_list)
-                        RETURN(NULL);
-        }
-
-        RETURN(list_entry(p, kmutex_t, km_list));
-}
-
-static void *
-mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
-{
-	kmutex_t *mp = p;
-        ENTRY;
-
-        ++*pos;
-        RETURN((mp->km_list.next == &mutex_stats_list) ?
-	       NULL : list_entry(mp->km_list.next, kmutex_t, km_list));
-}
-
-static void
-mutex_seq_stop(struct seq_file *f, void *v)
-{
-	spin_unlock(&mutex_stats_lock);
-}
-
-static struct seq_operations mutex_seq_ops = {
-        .show  = mutex_seq_show,
-        .start = mutex_seq_start,
-        .next  = mutex_seq_next,
-        .stop  = mutex_seq_stop,
-};
-
-static int
-proc_mutex_open(struct inode *inode, struct file *filp)
-{
-        return seq_open(filp, &mutex_seq_ops);
-}
-
-static struct file_operations proc_mutex_operations = {
-        .open           = proc_mutex_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = seq_release,
-};
-#endif /* DEBUG_MUTEX */
-
 #ifdef DEBUG_KMEM
 static void
 slab_seq_show_headers(struct seq_file *f)
@@ -968,28 +859,6 @@ static struct ctl_table spl_vm_table[] = {
 	{0},
 };
 
-#ifdef DEBUG_MUTEX
-static struct ctl_table spl_mutex_table[] = {
-        {
-                .ctl_name = CTL_MUTEX_STATS,
-                .procname = "stats",
-                .data     = &mutex_stats,
-                .maxlen   = sizeof(int) * MUTEX_STATS_SIZE,
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-        },
-        {
-                .ctl_name = CTL_MUTEX_SPIN_MAX,
-                .procname = "spin_max",
-                .data     = &mutex_spin_max,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-        },
-	{0},
-};
-#endif /* DEBUG_MUTEX */
-
 #ifdef DEBUG_KMEM
 static struct ctl_table spl_kmem_table[] = {
         {
@@ -1088,14 +957,6 @@ static struct ctl_table spl_table[] = {
 		.mode     = 0555,
 		.child    = spl_vm_table,
 	},
-#ifdef DEBUG_MUTEX
-	{
-		.ctl_name = CTL_SPL_MUTEX,
-		.procname = "mutex",
-		.mode     = 0555,
-		.child    = spl_mutex_table,
-	},
-#endif
 #ifdef DEBUG_KMEM
 	{
 		.ctl_name = CTL_SPL_KMEM,
@@ -1180,24 +1041,11 @@ proc_init(void)
 		RETURN(-EUNATCH);
 #endif /* CONFIG_SYSCTL */
 
-#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
+#if defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL)
 		GOTO(out, rc = -EUNATCH);
 
-#ifdef DEBUG_MUTEX
-	proc_spl_mutex = proc_mkdir("mutex", proc_spl);
-	if (proc_spl_mutex == NULL)
-		GOTO(out, rc = -EUNATCH);
-
-	proc_spl_mutex_stats = create_proc_entry("stats_per", 0444,
-						 proc_spl_mutex);
-        if (proc_spl_mutex_stats == NULL)
-		GOTO(out, rc = -EUNATCH);
-
-        proc_spl_mutex_stats->proc_fops = &proc_mutex_operations;
-#endif /* DEBUG_MUTEX */
-
 #ifdef DEBUG_KMEM
         proc_spl_kmem = proc_mkdir("kmem", proc_spl);
         if (proc_spl_kmem == NULL)
@@ -1223,16 +1071,12 @@ out:
 	        remove_proc_entry("slab", proc_spl_kmem);
 #endif
 		remove_proc_entry("kmem", proc_spl);
-#ifdef DEBUG_MUTEX
-	        remove_proc_entry("stats_per", proc_spl_mutex);
-#endif
-		remove_proc_entry("mutex", proc_spl);
 		remove_proc_entry("spl", NULL);
 #ifdef CONFIG_SYSCTL
 	        spl_unregister_sysctl_table(spl_header);
 #endif /* CONFIG_SYSCTL */
 	}
-#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
+#endif /* DEBUG_KMEM || DEBUG_KSTAT */
 
         RETURN(rc);
 }
@@ -1242,18 +1086,14 @@ proc_fini(void)
 {
         ENTRY;
 
-#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
+#if defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
 	remove_proc_entry("kstat", proc_spl);
 #ifdef DEBUG_KMEM
         remove_proc_entry("slab", proc_spl_kmem);
 #endif
 	remove_proc_entry("kmem", proc_spl);
-#ifdef DEBUG_MUTEX
-        remove_proc_entry("stats_per", proc_spl_mutex);
-#endif
-	remove_proc_entry("mutex", proc_spl);
 	remove_proc_entry("spl", NULL);
-#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
+#endif /* DEBUG_KMEM || DEBUG_KSTAT */
 
 #ifdef CONFIG_SYSCTL
         ASSERT(spl_header != NULL);