summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2012-12-19 13:51:44 -0800
committerBrian Behlendorf <[email protected]>2012-12-20 09:57:39 -0800
commit65d56083b4617a4cade0cff68cbbaf68114169d6 (patch)
tree7707c7061ed4be1f0100649d0f7ff6fe6c3e756a
parentd5446cfc528262ae3a41da31a8524c9d2e793f45 (diff)
Fix zpool on zvol lock inversion deadlock
In all but one case the spa_namespace_lock is taken before the bdev->bd_mutex lock. But Linux __blkdev_get() function calls fops->open() with the bdev->bd_mutex lock held and we must somehow still safely acquire the spa_namespace_lock. To avoid a potential lock inversion deadlock we preemptively try to take the spa_namespace_lock(). Normally it will not be contended and this is safe because spa_open_common() handles the case where the caller already holds the spa_namespace_lock. When it is contended we risk a lock inversion if we were to block waiting for the lock. Luckily, the __blkdev_get() function allows us to return -ERESTARTSYS which will result in bdev->bd_mutex being dropped, reacquired, and fops->open() being called again. This process can be repeated safely until both locks are acquired. Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Jorgen Lundman <[email protected]> Closes #612
-rw-r--r--module/zfs/zvol.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 7a448f194..d4d533f02 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -891,11 +891,39 @@ zvol_first_open(zvol_state_t *zv)
{
objset_t *os;
uint64_t volsize;
+ int locked = 0;
int error;
uint64_t ro;
+ /*
+ * In all other cases the spa_namespace_lock is taken before the
+ * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
+ * function calls fops->open() with the bdev->bd_mutex lock held.
+ *
+ * To avoid a potential lock inversion deadlock we preemptively
+ * try to take the spa_namespace_lock(). Normally it will not
+ * be contended and this is safe because spa_open_common() handles
+ * the case where the caller already holds the spa_namespace_lock.
+ *
+ * When it is contended we risk a lock inversion if we were to
+ * block waiting for the lock. Luckily, the __blkdev_get()
+ * function allows us to return -ERESTARTSYS which will result in
+ * bdev->bd_mutex being dropped, reacquired, and fops->open() being
+ * called again. This process can be repeated safely until both
+ * locks are acquired.
+ */
+ if (!mutex_owned(&spa_namespace_lock)) {
+ locked = mutex_tryenter(&spa_namespace_lock);
+ if (!locked)
+ return (-ERESTARTSYS);
+ }
+
/* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
+
+ if (locked)
+ mutex_exit(&spa_namespace_lock);
+
if (error)
return (-error);