5 files changed, 201 insertions, 55 deletions
diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c
index 69e9ca7c4..7384a79f2 100644
--- a/modules/spl/spl-kmem.c
+++ b/modules/spl/spl-kmem.c
@@ -17,12 +17,20 @@ atomic64_t vmem_alloc_used;
 unsigned long vmem_alloc_max = 0;
 int kmem_warning_flag = 1;
 
+spinlock_t kmem_lock;
+struct hlist_head kmem_table[KMEM_TABLE_SIZE];
+struct list_head kmem_list;
+
 EXPORT_SYMBOL(kmem_alloc_used);
 EXPORT_SYMBOL(kmem_alloc_max);
 EXPORT_SYMBOL(vmem_alloc_used);
 EXPORT_SYMBOL(vmem_alloc_max);
 EXPORT_SYMBOL(kmem_warning_flag);
 
+EXPORT_SYMBOL(kmem_lock);
+EXPORT_SYMBOL(kmem_table);
+EXPORT_SYMBOL(kmem_list);
+
 int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
 #else
 int kmem_set_warning(int flag) { return 0; }
@@ -44,7 +52,11 @@ EXPORT_SYMBOL(kmem_set_warning);
  * solaris style callback is needed.  There is some overhead in this
  * operation which isn't horibile but it needs to be kept in mind.
  */
+#define KCC_MAGIC                0x7a7a7a7a
+#define KCC_POISON               0x77
+
 typedef struct kmem_cache_cb {
+        int                 kcc_magic;
         struct list_head    kcc_list;
         kmem_cache_t *      kcc_cache;
         kmem_constructor_t  kcc_constructor;
@@ -52,14 +64,14 @@ typedef struct kmem_cache_cb {
         kmem_reclaim_t      kcc_reclaim;
         void *              kcc_private;
         void *              kcc_vmp;
+	atomic_t            kcc_ref;
 } kmem_cache_cb_t;
 
-
-static spinlock_t kmem_cache_cb_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(kmem_cache_cb_list);
+static struct rw_semaphore kmem_cache_cb_sem;
+static struct list_head kmem_cache_cb_list;
 static struct shrinker *kmem_cache_shrinker;
 
-/* Function must be called while holding the kmem_cache_cb_lock
+/* Function must be called while holding the kmem_cache_cb_sem
  * Because kmem_cache_t is an opaque datatype we're forced to
  * match pointers to identify specific cache entires.
  */
@@ -67,6 +79,9 @@ static kmem_cache_cb_t *
 kmem_cache_find_cache_cb(kmem_cache_t *cache)
 {
         kmem_cache_cb_t *kcc;
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+        ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
+#endif
 
         list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
 		if (cache == kcc->kcc_cache)
@@ -83,19 +98,20 @@ kmem_cache_add_cache_cb(kmem_cache_t *cache,
                         void *priv, void *vmp)
 {
         kmem_cache_cb_t *kcc;
-	unsigned long flags;
 
         kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
         if (kcc) {
+		kcc->kcc_magic = KCC_MAGIC;
 		kcc->kcc_cache = cache;
                 kcc->kcc_constructor = constructor;
                 kcc->kcc_destructor = destructor;
                 kcc->kcc_reclaim = reclaim;
                 kcc->kcc_private = priv;
                 kcc->kcc_vmp = vmp;
-		spin_lock_irqsave(&kmem_cache_cb_lock, flags);
+		atomic_set(&kcc->kcc_ref, 0);
+		down_write(&kmem_cache_cb_sem);
                 list_add(&kcc->kcc_list, &kmem_cache_cb_list);
-		spin_unlock_irqrestore(&kmem_cache_cb_lock, flags);
+		up_write(&kmem_cache_cb_sem);
         }
 
         return kcc;
@@ -104,14 +120,15 @@ kmem_cache_add_cache_cb(kmem_cache_t *cache,
 static void
 kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&kmem_cache_cb_lock, flags);
+        down_write(&kmem_cache_cb_sem);
+	ASSERT(atomic_read(&kcc->kcc_ref) == 0);
         list_del(&kcc->kcc_list);
-	spin_unlock_irqrestore(&kmem_cache_cb_lock, flags);
+        up_write(&kmem_cache_cb_sem);
 
-       if (kcc)
-              kfree(kcc);
+        if (kcc){
+		memset(kcc, KCC_POISON, sizeof(*kcc));
+                kfree(kcc);
+	}
 }
 
 static void
@@ -119,23 +136,36 @@ kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long fla
 {
         kmem_cache_cb_t *kcc;
 	kmem_constructor_t constructor;
-	unsigned long irqflags;
 	void *private;
 
-	spin_lock_irqsave(&kmem_cache_cb_lock, irqflags);
+	/* Ensure constructor verifies are not passed to the registered
+	 * constructors.  This may not be safe due to the Solaris constructor
+	 * not being aware of how to handle the SLAB_CTOR_VERIFY flag
+	 */
+	if (flags & SLAB_CTOR_VERIFY)
+		return;
+
+	/* We can be called with interrupts disabled so it is critical that
+	 * this function and the registered constructor never sleep.
+	 */
+        while (!down_read_trylock(&kmem_cache_cb_sem));
 
         /* Callback list must be in sync with linux slab caches */
         kcc = kmem_cache_find_cache_cb(cache);
         ASSERT(kcc);
+	ASSERT(kcc->kcc_magic == KCC_MAGIC);
+	atomic_inc(&kcc->kcc_ref);
 
 	constructor = kcc->kcc_constructor;
 	private = kcc->kcc_private;
 
-	spin_unlock_irqrestore(&kmem_cache_cb_lock, irqflags);
+        up_read(&kmem_cache_cb_sem);
 
 	if (constructor)
 		constructor(ptr, private, (int)flags);
 
+	atomic_dec(&kcc->kcc_ref);
+
 	/* Linux constructor has no return code, silently eat it */
 }
 
@@ -144,23 +174,29 @@ kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flag
 {
         kmem_cache_cb_t *kcc;
         kmem_destructor_t destructor;
-	unsigned long irqflags;
 	void *private;
 
-	spin_lock_irqsave(&kmem_cache_cb_lock, irqflags);
+	/* We can be called with interrupts disabled so it is critical that
+	 * this function and the registered constructor never sleep.
+	 */
+        while (!down_read_trylock(&kmem_cache_cb_sem));
 
         /* Callback list must be in sync with linux slab caches */
         kcc = kmem_cache_find_cache_cb(cache);
 	ASSERT(kcc);
+	ASSERT(kcc->kcc_magic == KCC_MAGIC);
+	atomic_inc(&kcc->kcc_ref);
 
 	destructor = kcc->kcc_destructor;
 	private = kcc->kcc_private;
 
-	spin_unlock_irqrestore(&kmem_cache_cb_lock, irqflags);
+        up_read(&kmem_cache_cb_sem);
 
 	/* Solaris destructor takes no flags, silently eat them */
 	if (destructor)
 		destructor(ptr, private);
+
+	atomic_dec(&kcc->kcc_ref);
 }
 
 /* XXX - Arguments are ignored */
@@ -168,7 +204,6 @@ static int
 kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
 {
         kmem_cache_cb_t *kcc;
-	unsigned long flags;
         int total = 0;
 
 	/* Under linux a shrinker is not tightly coupled with a slab
@@ -178,9 +213,23 @@ kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
 	 * function in the shim layer for all slab caches.  And we always
 	 * attempt to shrink all caches when this generic shrinker is called.
 	 */
-	spin_lock_irqsave(&kmem_cache_cb_lock, flags);
+        down_read(&kmem_cache_cb_sem);
 
         list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
+	        ASSERT(kcc);
+                ASSERT(kcc->kcc_magic == KCC_MAGIC);
+
+		/* Take a reference on the cache in question.  If that
+		 * cache is contended simply skip it, it may already be
+		 * in the process of a reclaim or the ctor/dtor may be
+		 * running in either case it's best to skip it.
+		 */
+	        atomic_inc(&kcc->kcc_ref);
+		if (atomic_read(&kcc->kcc_ref) > 1) {
+	                atomic_dec(&kcc->kcc_ref);
+			continue;
+		}
+
 	        /* Under linux the desired number and gfp type of objects
 		 * is passed to the reclaiming function as a sugested reclaim
 		 * target.  I do not pass these args on because reclaim
@@ -190,6 +239,7 @@ kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
 		if (kcc->kcc_reclaim)
                         kcc->kcc_reclaim(kcc->kcc_private);
 
+	        atomic_dec(&kcc->kcc_ref);
 	        total += 1;
         }
 
@@ -199,7 +249,8 @@ kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
 	 * was registered with the generic shrinker.  This should fake out
 	 * the linux VM when it attempts to shrink caches.
 	 */
-	spin_unlock_irqrestore(&kmem_cache_cb_lock, flags);
+        up_read(&kmem_cache_cb_sem);
+
 	return total;
 }
 
@@ -238,18 +289,18 @@ __kmem_cache_create(char *name, size_t size, size_t align,
                 RETURN(NULL);
 
         /* Register shared shrinker function on initial cache create */
-	spin_lock(&kmem_cache_cb_lock);
+        down_read(&kmem_cache_cb_sem);
 	if (list_empty(&kmem_cache_cb_list)) {
                 kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
                                                  kmem_cache_generic_shrinker);
                 if (kmem_cache_shrinker == NULL) {
                         kmem_cache_destroy(cache);
-			spin_unlock(&kmem_cache_cb_lock);
+                        up_read(&kmem_cache_cb_sem);
                         RETURN(NULL);
                 }
 
         }
-	spin_unlock(&kmem_cache_cb_lock);
+        up_read(&kmem_cache_cb_sem);
 
         kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
                                       reclaim, priv, vmp);
@@ -272,27 +323,31 @@ __kmem_cache_destroy(kmem_cache_t *cache)
 {
         kmem_cache_cb_t *kcc;
 	char *name;
-	unsigned long flags;
 	int rc;
 	ENTRY;
 
-	spin_lock_irqsave(&kmem_cache_cb_lock, flags);
+        down_read(&kmem_cache_cb_sem);
         kcc = kmem_cache_find_cache_cb(cache);
-	spin_unlock_irqrestore(&kmem_cache_cb_lock, flags);
-        if (kcc == NULL)
+        if (kcc == NULL) {
+                up_read(&kmem_cache_cb_sem);
                 RETURN(-EINVAL);
+        }
+	atomic_inc(&kcc->kcc_ref);
+        up_read(&kmem_cache_cb_sem);
 
 	name = (char *)kmem_cache_name(cache);
         rc = kmem_cache_destroy(cache);
+
+	atomic_dec(&kcc->kcc_ref);
         kmem_cache_remove_cache_cb(kcc);
 	kfree(name);
 
 	/* Unregister generic shrinker on removal of all caches */
-	spin_lock_irqsave(&kmem_cache_cb_lock, flags);
+        down_read(&kmem_cache_cb_sem);
 	if (list_empty(&kmem_cache_cb_list))
                 remove_shrinker(kmem_cache_shrinker);
 
-	spin_unlock_irqrestore(&kmem_cache_cb_lock, flags);
+        up_read(&kmem_cache_cb_sem);
 	RETURN(rc);
 }
 EXPORT_SYMBOL(__kmem_cache_destroy);
@@ -312,25 +367,101 @@ int
 kmem_init(void)
 {
         ENTRY;
+
+	init_rwsem(&kmem_cache_cb_sem);
+        INIT_LIST_HEAD(&kmem_cache_cb_list);
 #ifdef DEBUG_KMEM
-	atomic64_set(&kmem_alloc_used, 0);
-	atomic64_set(&vmem_alloc_used, 0);
+        {
+                int i;
+		atomic64_set(&kmem_alloc_used, 0);
+		atomic64_set(&vmem_alloc_used, 0);
+
+                spin_lock_init(&kmem_lock);
+                INIT_LIST_HEAD(&kmem_list);
+
+                for (i = 0; i < KMEM_TABLE_SIZE; i++)
+                        INIT_HLIST_HEAD(&kmem_table[i]);
+        }
 #endif
 	RETURN(0);
 }
 
+static char *sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
+{
+        int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
+	int i, flag = 1;
+
+	ASSERT(str != NULL && len >= 17);
+        memset(str, 0, len);
+
+	/* Check for a fully printable string, and while we are at
+         * it place the printable characters in the passed buffer. */
+	for (i = 0; i < size; i++) {
+                str[i] = ((char *)(kd->kd_addr))[i];
+                if (isprint(str[i])) {
+                        continue;
+                } else {
+                        /* Minimum number of printable characters found
+                         * to make it worthwhile to print this as ascii. */
+                        if (i > min)
+                                break;
+
+                         flag = 0;
+                         break;
+                }
+
+	}
+
+	if (!flag) {
+		sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
+		        *((uint8_t *)kd->kd_addr),
+		        *((uint8_t *)kd->kd_addr + 2),
+		        *((uint8_t *)kd->kd_addr + 4),
+		        *((uint8_t *)kd->kd_addr + 6),
+		        *((uint8_t *)kd->kd_addr + 8),
+		        *((uint8_t *)kd->kd_addr + 10),
+		        *((uint8_t *)kd->kd_addr + 12),
+		        *((uint8_t *)kd->kd_addr + 14));
+	}
+
+	return str;
+}
+
 void
 kmem_fini(void)
 {
 	ENTRY;
 #ifdef DEBUG_KMEM
-        if (atomic64_read(&kmem_alloc_used) != 0)
-                CWARN("kmem leaked %ld/%ld bytes\n",
-                       atomic_read(&kmem_alloc_used), kmem_alloc_max);
-
-        if (atomic64_read(&vmem_alloc_used) != 0)
-                CWARN("vmem leaked %ld/%ld bytes\n",
-                       atomic_read(&vmem_alloc_used), vmem_alloc_max);
+        {
+                unsigned long flags;
+                kmem_debug_t *kd;
+		char str[17];
+
+                if (atomic64_read(&kmem_alloc_used) != 0)
+                        CWARN("kmem leaked %ld/%ld bytes\n",
+                               atomic_read(&kmem_alloc_used), kmem_alloc_max);
+
+		/* Display all unreclaimed memory addresses, including the
+		 * allocation size and the first few bytes of what's located
+		 * at that address to aid in debugging.  Performance is not
+		 * a serious concern here since it is module unload time. */
+                spin_lock_irqsave(&kmem_lock, flags);
+                if (!list_empty(&kmem_list))
+                        CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
+			       "address", "size", "data", "func", "line");
+
+                list_for_each_entry(kd, &kmem_list, kd_list) {
+                        CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
+		 	       kd->kd_addr, kd->kd_size,
+                               sprintf_addr(kd, str, 17, 8),
+			       kd->kd_func, kd->kd_line);
+		}
+                spin_unlock_irqrestore(&kmem_lock, flags);
+
+                if (atomic64_read(&vmem_alloc_used) != 0)
+                        CWARN("vmem leaked %ld/%ld bytes\n",
+                               atomic_read(&vmem_alloc_used), vmem_alloc_max);
+        }
 #endif
 	EXIT;
 }
diff --git a/modules/spl/spl-mutex.c b/modules/spl/spl-mutex.c
index 06a8f316b..5949283e6 100644
--- a/modules/spl/spl-mutex.c
+++ b/modules/spl/spl-mutex.c
@@ -29,13 +29,15 @@ int mutex_spin_max = 100;
 
 #ifdef DEBUG_MUTEX
 int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
-DEFINE_MUTEX(mutex_stats_lock);
+struct rw_semaphore mutex_stats_sem;
 LIST_HEAD(mutex_stats_list);
 #endif
 
 void
 __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
 {
+	int flags = KM_SLEEP;
+
 	ASSERT(mp);
 	ASSERT(name);
 	ASSERT(ibc == NULL);
@@ -58,12 +60,18 @@ __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
 			SBUG();
 	}
 
+	/* We may be called when there is a non-zero preempt_count or
+	 * interrupts are disabled is which case we must not sleep.
+	 */
+        if (current_thread_info()->preempt_count || irqs_disabled())
+		flags = KM_NOSLEEP;
+
 	/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
-	mp->km_sem = kmem_alloc(sizeof(struct semaphore), KM_SLEEP);
+	mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags);
 	if (mp->km_sem == NULL)
 		return;
 
-	mp->km_name = kmem_alloc(mp->km_name_size, KM_SLEEP);
+	mp->km_name = kmem_alloc(mp->km_name_size, flags);
 	if (mp->km_name == NULL) {
 		kmem_free(mp->km_sem, sizeof(struct semaphore));
 		return;
@@ -73,16 +81,19 @@ __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
 	strcpy(mp->km_name, name);
 
 #ifdef DEBUG_MUTEX
-	mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, KM_SLEEP);
+	mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags);
         if (mp->km_stats == NULL) {
 		kmem_free(mp->km_name, mp->km_name_size);
 		kmem_free(mp->km_sem, sizeof(struct semaphore));
 		return;
 	}
 
-	mutex_lock(&mutex_stats_lock);
+	/* We may be called when there is a non-zero preempt_count or
+	 * interrupts are disabled is which case we must not sleep.
+	 */
+	while (!down_write_trylock(&mutex_stats_sem));
 	list_add_tail(&mp->km_list, &mutex_stats_list);
-	mutex_unlock(&mutex_stats_lock);
+	up_write(&mutex_stats_sem);
 #endif
 }
 EXPORT_SYMBOL(__spl_mutex_init);
@@ -94,9 +105,12 @@ __spl_mutex_destroy(kmutex_t *mp)
 	ASSERT(mp->km_magic == KM_MAGIC);
 
 #ifdef DEBUG_MUTEX
-	mutex_lock(&mutex_stats_lock);
+	/* We may be called when there is a non-zero preempt_count or
+	 * interrupts are disabled is which case we must not sleep.
+	 */
+	while (!down_write_trylock(&mutex_stats_sem));
 	list_del_init(&mp->km_list);
-	mutex_unlock(&mutex_stats_lock);
+	up_write(&mutex_stats_sem);
 
 	kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
 #endif
diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c
index 64423c186..61fb38532 100644
--- a/modules/spl/spl-proc.c
+++ b/modules/spl/spl-proc.c
@@ -426,7 +426,7 @@ mutex_seq_start(struct seq_file *f, loff_t *pos)
         loff_t n = *pos;
         ENTRY;
 
-        mutex_lock(&mutex_stats_lock);
+        down_read(&mutex_stats_sem);
         if (!n)
                 mutex_seq_show_headers(f);
 
@@ -454,7 +454,7 @@ mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
 static void
 mutex_seq_stop(struct seq_file *f, void *v)
 {
-        mutex_unlock(&mutex_stats_lock);
+        up_read(&mutex_stats_sem);
 }
 
 static struct seq_operations mutex_seq_ops = {
diff --git a/modules/splat/splat-internal.h b/modules/splat/splat-internal.h
index 525df5902..25b2dd78c 100644
--- a/modules/splat/splat-internal.h
+++ b/modules/splat/splat-internal.h
@@ -138,7 +138,7 @@ typedef struct splat_info {
 #define sym2str(sym)			(char *)(#sym)
 
 #define splat_print(file, format, args...)				\
-({	splat_info_t *_info_ = (splat_info_t *)file->private_data;		\
+({	splat_info_t *_info_ = (splat_info_t *)file->private_data;	\
 	int _rc_;							\
 									\
 	ASSERT(_info_);							\
@@ -160,7 +160,7 @@ typedef struct splat_info {
 	_rc_;								\
 })
 
-#define splat_vprint(file, test, format, args...)				\
+#define splat_vprint(file, test, format, args...)			\
 	splat_print(file, "%*s: " format, SPLAT_NAME_SIZE, test, args)
 
 splat_subsystem_t * splat_condvar_init(void);
diff --git a/modules/splat/splat-kmem.c b/modules/splat/splat-kmem.c
index f9f6964de..9491a081a 100644
--- a/modules/splat/splat-kmem.c
+++ b/modules/splat/splat-kmem.c
@@ -148,7 +148,7 @@ splat_kmem_test34_constructor(void *ptr, void *priv, int flags)
 		kcd->kcd_flag = 1;
 
 		if (kcp) {
-	 		kcd->kcd_magic = kcp->kcp_magic;
+			kcd->kcd_magic = kcp->kcp_magic;
 			kcp->kcp_count++;
 		}
 	}
@@ -258,8 +258,8 @@ splat_kmem_test4_reclaim(void *priv)
 	int i;
 
 	splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST4_NAME,
-                   "Reaping %d objects from '%s'\n",
-	           SPLAT_KMEM_OBJ_RECLAIM, SPLAT_KMEM_CACHE_NAME);
+                     "Reaping %d objects from '%s'\n",
+	             SPLAT_KMEM_OBJ_RECLAIM, SPLAT_KMEM_CACHE_NAME);
 	for (i = 0; i < SPLAT_KMEM_OBJ_RECLAIM; i++) {
 		if (kcp->kcp_kcd[i]) {
 			kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]);
@@ -306,6 +306,7 @@ splat_kmem_test4(struct file *file, void *arg)
 	}
 
 	max = kcp.kcp_count;
+	ASSERT(max > 0);
 
 	/* Force shrinker to run */
 	kmem_reap();