diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/zvol.c | 31 |
1 files changed, 29 insertions, 2 deletions
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 6cd366602..3bf28e1d4 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1150,12 +1150,36 @@ static int zvol_first_open(zvol_state_t *zv) { objset_t *os; - int error; + int error, locked = 0; + + /* + * In all other cases the spa_namespace_lock is taken before the + * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() + * function calls fops->open() with the bdev->bd_mutex lock held. + * This deadlock can be easily observed with zvols used as vdevs. + * + * To avoid a potential lock inversion deadlock we preemptively + * try to take the spa_namespace_lock(). Normally it will not + * be contended and this is safe because spa_open_common() handles + * the case where the caller already holds the spa_namespace_lock. + * + * When it is contended we risk a lock inversion if we were to + * block waiting for the lock. Luckily, the __blkdev_get() + * function allows us to return -ERESTARTSYS which will result in + * bdev->bd_mutex being dropped, reacquired, and fops->open() being + * called again. This process can be repeated safely until both + * locks are acquired. + */ + if (!mutex_owned(&spa_namespace_lock)) { + locked = mutex_tryenter(&spa_namespace_lock); + if (!locked) + return (-SET_ERROR(ERESTARTSYS)); + } /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os); if (error) - return (SET_ERROR(-error)); + goto out_mutex; zv->zv_objset = os; @@ -1166,6 +1190,9 @@ zvol_first_open(zvol_state_t *zv) zv->zv_objset = NULL; } +out_mutex: + if (locked) + mutex_exit(&spa_namespace_lock); return (SET_ERROR(-error)); } |