summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/zvol.c31
1 files changed, 29 insertions, 2 deletions
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 6cd366602..3bf28e1d4 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -1150,12 +1150,36 @@ static int
zvol_first_open(zvol_state_t *zv)
{
objset_t *os;
- int error;
+ int error, locked = 0;
+
+ /*
+ * In all other cases the spa_namespace_lock is taken before the
+ * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
+ * function calls fops->open() with the bdev->bd_mutex lock held.
+ * This deadlock can be easily observed with zvols used as vdevs.
+ *
+ * To avoid a potential lock inversion deadlock we preemptively
+ * try to take the spa_namespace_lock(). Normally it will not
+ * be contended and this is safe because spa_open_common() handles
+ * the case where the caller already holds the spa_namespace_lock.
+ *
+ * When it is contended we risk a lock inversion if we were to
+ * block waiting for the lock. Luckily, the __blkdev_get()
+ * function allows us to return -ERESTARTSYS which will result in
+ * bdev->bd_mutex being dropped, reacquired, and fops->open() being
+ * called again. This process can be repeated safely until both
+ * locks are acquired.
+ */
+ if (!mutex_owned(&spa_namespace_lock)) {
+ locked = mutex_tryenter(&spa_namespace_lock);
+ if (!locked)
+ return (-SET_ERROR(ERESTARTSYS));
+ }
/* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os);
if (error)
- return (SET_ERROR(-error));
+ goto out_mutex;
zv->zv_objset = os;
@@ -1166,6 +1190,9 @@ zvol_first_open(zvol_state_t *zv)
zv->zv_objset = NULL;
}
+out_mutex:
+ if (locked)
+ mutex_exit(&spa_namespace_lock);
return (SET_ERROR(-error));
}