Move cv_destroy() outside zp->z_range_lock()

With the recent SPL change (d599e4fa) that forces cv_destroy() to block until all waiters have been woken. It is now unsafe to call cv_destroy() under the zp->z_range_lock() because it is used as the condition variable mutex. If there are waiters cv_destroy() will block until they wake up and aquire the mutex. However, they will never aquire the mutex because cv_destroy() will not return allowing it's caller to drop the lock. Deadlock. To avoid this cv_destroy() is now run asynchronously in a taskq. This solves two problems: 1) It is no longer run under the zp->z_range_lock so no deadlock. 2) Since cv_destroy() may now block we don't want this slowing down zfs_range_unlock() and throttling the system. This was not as much of an issue under OpenSolaris because their cv_destroy() implementation does not do anything. They do however risk a bad paging request if cv_destroy() returns, the memory holding the condition variable is free'd, and then the waiters wake up and try to reference it. It's a very small unlikely race, but it is possible.
author: Brian Behlendorf <[email protected]> 2011-02-04 14:38:11 -0800
committer: Brian Behlendorf <[email protected]> 2011-02-10 09:27:21 -0800
commit: 8926ab7a50d60d855e4d49d2ed7bdef49dd56149 (patch)
tree: d87a6ca7624381f1a8c500ba7be772a3ba0499ca
parent: c0d35759c5ab1abaa6b72062cc4ecd0d86628de8 (diff)
1 files changed, 36 insertions, 23 deletions
diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c
index 9362fb4e8..26ad58de8 100644
--- a/module/zfs/zfs_rlock.c
+++ b/module/zfs/zfs_rlock.c
@@ -453,6 +453,20 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
 	return (new);
 }
 
+static void
+zfs_range_free(void *arg)
+{
+	rl_t *rl = arg;
+
+	if (rl->r_write_wanted)
+		cv_destroy(&rl->r_wr_cv);
+
+	if (rl->r_read_wanted)
+		cv_destroy(&rl->r_rd_cv);
+
+	kmem_free(rl, sizeof (rl_t));
+}
+
 /*
  * Unlock a reader lock
  */
@@ -472,14 +486,14 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
 	 */
 	if (remove->r_cnt == 1) {
 		avl_remove(tree, remove);
-		if (remove->r_write_wanted) {
+		mutex_exit(&zp->z_range_lock);
+		if (remove->r_write_wanted)
 			cv_broadcast(&remove->r_wr_cv);
-			cv_destroy(&remove->r_wr_cv);
-		}
-		if (remove->r_read_wanted) {
+
+		if (remove->r_read_wanted)
 			cv_broadcast(&remove->r_rd_cv);
-			cv_destroy(&remove->r_rd_cv);
-		}
+
+		taskq_dispatch(system_taskq, zfs_range_free, remove, 0);
 	} else {
 		ASSERT3U(remove->r_cnt, ==, 0);
 		ASSERT3U(remove->r_write_wanted, ==, 0);
@@ -505,19 +519,21 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
 			rl->r_cnt--;
 			if (rl->r_cnt == 0) {
 				avl_remove(tree, rl);
-				if (rl->r_write_wanted) {
+
+				if (rl->r_write_wanted)
 					cv_broadcast(&rl->r_wr_cv);
-					cv_destroy(&rl->r_wr_cv);
-				}
-				if (rl->r_read_wanted) {
+
+				if (rl->r_read_wanted)
 					cv_broadcast(&rl->r_rd_cv);
-					cv_destroy(&rl->r_rd_cv);
-				}
-				kmem_free(rl, sizeof (rl_t));
+
+				taskq_dispatch(system_taskq,
+				    zfs_range_free, rl, 0);
 			}
 		}
+
+		mutex_exit(&zp->z_range_lock);
+		kmem_free(remove, sizeof (rl_t));
 	}
-	kmem_free(remove, sizeof (rl_t));
 }
 
 /*
@@ -537,22 +553,19 @@ zfs_range_unlock(rl_t *rl)
 		/* writer locks can't be shared or split */
 		avl_remove(&zp->z_range_avl, rl);
 		mutex_exit(&zp->z_range_lock);
-		if (rl->r_write_wanted) {
+		if (rl->r_write_wanted)
 			cv_broadcast(&rl->r_wr_cv);
-			cv_destroy(&rl->r_wr_cv);
-		}
-		if (rl->r_read_wanted) {
+
+		if (rl->r_read_wanted)
 			cv_broadcast(&rl->r_rd_cv);
-			cv_destroy(&rl->r_rd_cv);
-		}
-		kmem_free(rl, sizeof (rl_t));
+
+		taskq_dispatch(system_taskq, zfs_range_free, rl, 0);
 	} else {
 		/*
 		 * lock may be shared, let zfs_range_unlock_reader()
-		 * release the lock and free the rl_t
+		 * release the zp->z_range_lock lock and free the rl_t
 		 */
 		zfs_range_unlock_reader(zp, rl);
-		mutex_exit(&zp->z_range_lock);
 	}
 }
author	Brian Behlendorf <[email protected]>	2011-02-04 14:38:11 -0800
committer	Brian Behlendorf <[email protected]>	2011-02-10 09:27:21 -0800
commit	8926ab7a50d60d855e4d49d2ed7bdef49dd56149 (patch)
tree	d87a6ca7624381f1a8c500ba7be772a3ba0499ca
parent	c0d35759c5ab1abaa6b72062cc4ecd0d86628de8 (diff)