Reimplement mutexs for Linux lock profiling/analysis

For a generic explanation of why mutexs needed to be reimplemented to work with the kernel lock profiling see commits: e811949a57044d60d12953c5c3b808a79a7d36ef and d28db80fd0fd4fd63aec09037c44408e51a222d6 The specific changes made to the mutex implemetation are as follows. The Linux mutex structure is now directly embedded in the kmutex_t. This allows a kmutex_t to be directly case to a mutex struct and passed directly to the Linux primative. Just like with the rwlocks it is critical that these functions be implemented as '#defines to ensure the location information is preserved. The preprocessor can then do a direct replacement of the Solaris primative with the linux primative. Just as with the rwlocks we need to track the lock owner. Here things get a little more interesting because depending on your kernel version, and how you've built your kernel Linux may already do this for you. If your running a 2.6.29 or newer kernel on a SMP system the lock owner will be tracked. This was added to Linux to support adaptive mutexs, more on that shortly. Alternately, your kernel might track the lock owner if you've set CONFIG_DEBUG_MUTEXES in the kernel build. If neither of the above things is true for your kernel the kmutex_t type will include and track the lock owner to ensure correct behavior. This is all handled by a new autoconf check called SPL_AC_MUTEX_OWNER. Concerning adaptive mutexs these are a very recent development and they did not make it in to either the latest FC11 of SLES11 kernels. Ideally, I'd love to see this kernel change appear in one of these distros because it does help performance. From Linux kernel commit: 0d66bf6d3514b35eb6897629059443132992dbd7 "Testing with Ingo's test-mutex application... gave a 345% boost for VFS scalability on my testbox" However, if you don't want to backport this change yourself you can still simply export the task_curr() symbol. The kmutex_t implementation will use this symbol when it's available to provide it's own adaptive mutexs. Finally, DEBUG_MUTEX support was removed including the proc handlers. This was done because now that we are cleanly integrated with the kernel profiling all this information and much much more is available in debug kernel builds. This code was now redundant. Update mutexs validated on: - SLES10 (ppc64) - SLES11 (x86_64) - CHAOS4.2 (x86_64) - RHEL5.3 (x86_64) - RHEL6 (x86_64) - FC11 (x86_64)
author: Brian Behlendorf <[email protected]> 2009-09-25 14:47:01 -0700
committer: Brian Behlendorf <[email protected]> 2009-09-25 14:47:01 -0700
commit: 4d54fdee1d774ddaef381893434a3721067e2c56 (patch)
tree: 7139adfd73794aec7103361539b30903a6500572 /include/sys
parent: d28db80fd0fd4fd63aec09037c44408e51a222d6 (diff)
2 files changed, 158 insertions, 68 deletions
diff --git a/include/sys/condvar.h b/include/sys/condvar.h
index 40b6e4948..9a2e8b5a0 100644
--- a/include/sys/condvar.h
+++ b/include/sys/condvar.h
@@ -33,6 +33,7 @@ extern "C" {
 
 #include <linux/module.h>
 #include <linux/wait.h>
+#include <sys/kmem.h>
 #include <sys/mutex.h>
 
 /* The kcondvar_t struct is protected by mutex taken externally before
diff --git a/include/sys/mutex.h b/include/sys/mutex.h
index a26b2116a..49d17659d 100644
--- a/include/sys/mutex.h
+++ b/include/sys/mutex.h
@@ -1,7 +1,7 @@
 /*
  *  This file is part of the SPL: Solaris Porting Layer.
  *
- *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
+ *  Copyright (c) 2009 Lawrence Livermore National Security, LLC.
  *  Produced at Lawrence Livermore National Laboratory
  *  Written by:
  *          Brian Behlendorf <[email protected]>,
@@ -25,88 +25,177 @@
  */
 
 #ifndef _SPL_MUTEX_H
-#define	_SPL_MUTEX_H
+#define _SPL_MUTEX_H
 
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-#include <linux/module.h>
-#include <linux/hardirq.h>
 #include <sys/types.h>
-#include <sys/kmem.h>
+#include <linux/mutex.h>
+
+typedef enum {
+        MUTEX_DEFAULT  = 0,
+        MUTEX_SPIN     = 1,
+        MUTEX_ADAPTIVE = 2
+} kmutex_type_t;
 
-#define MUTEX_DEFAULT		0
-#define MUTEX_SPIN		1
-#define MUTEX_ADAPTIVE		2
+#ifdef HAVE_MUTEX_OWNER
 
-#define MUTEX_ENTER_TOTAL	0
-#define MUTEX_ENTER_NOT_HELD	1
-#define MUTEX_ENTER_SPIN	2
-#define MUTEX_ENTER_SLEEP	3
-#define MUTEX_TRYENTER_TOTAL	4
-#define MUTEX_TRYENTER_NOT_HELD	5
-#define MUTEX_STATS_SIZE	6
+typedef struct mutex kmutex_t;
 
-#define KM_MAGIC		0x42424242
-#define KM_POISON		0x84
+static inline kthread_t *
+mutex_owner(kmutex_t *mp)
+{
+        if (mp->owner)
+                return (mp->owner)->task;
+
+        return NULL;
+}
+#define mutex_owned(mp)         (mutex_owner(mp) == current)
+#define MUTEX_HELD(mp)          mutex_owned(mp)
+#undef mutex_init
+#define mutex_init(mp, name, type, ibc)                                 \
+({                                                                    \
+        static struct lock_class_key __key;                             \
+        ASSERT(type == MUTEX_DEFAULT);                                  \
+                                                                        \
+        __mutex_init((mp), #mp, &__key);                                \
+})
+/* #define mutex_destroy(mp)    ((void)0) */
+#define mutex_tryenter(mp)      mutex_trylock(mp)
+#define mutex_enter(mp)         mutex_lock(mp)
+#define mutex_exit(mp)          mutex_unlock(mp)
+
+#else /* HAVE_MUTEX_OWNER */
 
 typedef struct {
-	int32_t km_magic;
-	int16_t km_type;
-	int16_t km_name_size;
-	char *km_name;
-	struct task_struct *km_owner;
-	struct semaphore *km_sem;
-#ifdef DEBUG_MUTEX
-	int *km_stats;
-	struct list_head km_list;
-#endif
+        struct mutex m_mutex;
+        kthread_t *m_owner;
 } kmutex_t;
 
-extern int mutex_spin_max;
+#ifdef HAVE_TASK_CURR
+extern int spl_mutex_spin_max(void);
+#else /* HAVE_TASK_CURR */
+# define task_curr(owner)       0
+# define spl_mutex_spin_max()   0
+#endif /* HAVE_TASK_CURR */
 
-#ifdef DEBUG_MUTEX
-extern int mutex_stats[MUTEX_STATS_SIZE];
-extern spinlock_t mutex_stats_lock;
-extern struct list_head mutex_stats_list;
-#define MUTEX_STAT_INC(stats, stat)	((stats)[stat]++)
-#else
-#define MUTEX_STAT_INC(stats, stat)
-#endif
+#define MUTEX(mp)               ((struct mutex *)(mp))
 
-int spl_mutex_init(void);
-void spl_mutex_fini(void);
+static inline kthread_t *
+spl_mutex_get_owner(kmutex_t *mp)
+{
+        return mp->m_owner;
+}
+
+static inline void
+spl_mutex_set_owner(kmutex_t *mp)
+{
+        unsigned long flags;
+
+        spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags);
+        mp->m_owner = current;
+        spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags);
+}
+
+static inline void
+spl_mutex_clear_owner(kmutex_t *mp)
+{
+        unsigned long flags;
+
+        spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags);
+        mp->m_owner = NULL;
+        spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags);
+}
+
+static inline kthread_t *
+mutex_owner(kmutex_t *mp)
+{
+        unsigned long flags;
+        kthread_t *owner;
+
+        spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags);
+        owner = spl_mutex_get_owner(mp);
+        spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags);
 
-extern int __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc);
-extern void __spl_mutex_destroy(kmutex_t *mp);
-extern int __mutex_tryenter(kmutex_t *mp);
-extern void __mutex_enter(kmutex_t *mp);
-extern void __mutex_exit(kmutex_t *mp);
-extern int __mutex_owned(kmutex_t *mp);
-extern kthread_t *__spl_mutex_owner(kmutex_t *mp);
+        return owner;
+}
+
+#define mutex_owned(mp)         (mutex_owner(mp) == current)
+#define MUTEX_HELD(mp)          mutex_owned(mp)
 
+/*
+ * The following functions must be a #define and not static inline.
+ * This ensures that the native linux mutex functions (lock/unlock)
+ * will be correctly located in the users code which is important
+ * for the built in kernel lock analysis tools
+ */
 #undef mutex_init
+#define mutex_init(mp, name, type, ibc)                                 \
+({                                                                      \
+        static struct lock_class_key __key;                             \
+        ASSERT(type == MUTEX_DEFAULT);                                  \
+                                                                        \
+        __mutex_init(MUTEX(mp), #mp, &__key);                           \
+        spl_mutex_clear_owner(mp);                                      \
+})
+
 #undef mutex_destroy
+#define mutex_destroy(mp)                                               \
+({                                                                      \
+        VERIFY(!MUTEX_HELD(mp));                                        \
+})
 
-#define mutex_init(mp, name, type, ibc)					\
-({									\
-	/* May never fail or all subsequent mutex_* calls will ASSERT */\
-	if ((name) == NULL)						\
-		while(__spl_mutex_init(mp, #mp, type, ibc));		\
-	else								\
-		while(__spl_mutex_init(mp, name, type, ibc));		\
+#define mutex_tryenter(mp)                                              \
+({                                                                      \
+        int _rc_;                                                       \
+                                                                        \
+        if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1)                     \
+                spl_mutex_set_owner(mp);                                \
+                                                                        \
+        _rc_;                                                           \
 })
-#define mutex_destroy(mp)	__spl_mutex_destroy(mp)
-#define mutex_tryenter(mp)	__mutex_tryenter(mp)
-#define mutex_enter(mp)		__mutex_enter(mp)
-#define mutex_exit(mp)		__mutex_exit(mp)
-#define mutex_owned(mp)		__mutex_owned(mp)
-#define mutex_owner(mp)		__spl_mutex_owner(mp)
-#define MUTEX_HELD(mp)		mutex_owned(mp)
-
-#ifdef	__cplusplus
-}
-#endif
 
-#endif	/* _SPL_MUTEX_H */
+/*
+ * Adaptive mutexs assume that the lock may be held by a task running
+ * on a different cpu.  The expectation is that the task will drop the
+ * lock before leaving the head of the run queue.  So the ideal thing
+ * to do is spin until we acquire the lock and avoid a context switch.
+ * However it is also possible the task holding the lock yields the
+ * processor with out dropping lock.  In this case, we know it's going
+ * to be a while so we stop spinning and go to sleep waiting for the
+ * lock to be available.  This should strike the optimum balance
+ * between spinning and sleeping waiting for a lock.
+ */
+#define mutex_enter(mp)                                                 \
+({                                                                      \
+        kthread_t *_owner_;                                             \
+        int _rc_, _count_;                                              \
+                                                                        \
+        _rc_ = 0;                                                       \
+        _count_ = 0;                                                    \
+        _owner_ = mutex_owner(mp);                                      \
+                                                                        \
+        while (_owner_ && task_curr(_owner_) &&                         \
+               _count_ <= spl_mutex_spin_max()) {                       \
+                if ((_rc_ = mutex_trylock(MUTEX(mp))))                  \
+                        break;                                          \
+                                                                        \
+                _count_++;                                              \
+        }                                                               \
+                                                                        \
+        if (!_rc_)                                                      \
+                mutex_lock(MUTEX(mp));                                  \
+                                                                        \
+        spl_mutex_set_owner(mp);                                        \
+})
+
+#define mutex_exit(mp)                                                  \
+({                                                                      \
+        spl_mutex_clear_owner(mp);                                      \
+        mutex_unlock(MUTEX(mp));                                        \
+})
+
+#endif /* HAVE_MUTEX_OWNER */
+
+int spl_mutex_init(void);
+void spl_mutex_fini(void);
+
+#endif /* _SPL_MUTEX_H */
author	Brian Behlendorf <[email protected]>	2009-09-25 14:47:01 -0700
committer	Brian Behlendorf <[email protected]>	2009-09-25 14:47:01 -0700
commit	4d54fdee1d774ddaef381893434a3721067e2c56 (patch)
tree	7139adfd73794aec7103361539b30903a6500572 /include/sys
parent	d28db80fd0fd4fd63aec09037c44408e51a222d6 (diff)