aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBoris Protopopov <[email protected]>2015-09-23 12:34:51 -0400
committerBrian Behlendorf <[email protected]>2016-03-10 09:53:36 -0800
commit1ee159f423b5eb3c4646b0ba2dd0fb359503ba90 (patch)
tree4535141c208168c27716484bf273f70bcb41e294
parenta0bd735adb1b1eb81fef10b4db102ee051c4d4ff (diff)
Fix lock order inversion with zvol_open()
zfsonlinux issue #3681 - lock order inversion between zvol_open() and dsl_pool_sync()...zvol_rename_minors() Remove trylock of spa_namespace_lock as it is no longer needed when zvol minor operations are performed in a separate context with no prior locking state; the spa_namespace_lock is no longer held when bdev->bd_mutex or zfs_state_lock might be taken in the code paths originating from the zvol minor operation callbacks. Signed-off-by: Boris Protopopov <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #3681
-rw-r--r--module/zfs/zvol.c63
1 files changed, 20 insertions, 43 deletions
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index ab4d3ceb7..ba482a474 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -957,56 +957,27 @@ zvol_first_open(zvol_state_t *zv)
{
objset_t *os;
uint64_t volsize;
- int locked = 0;
int error;
uint64_t ro;
- /*
- * In all other cases the spa_namespace_lock is taken before the
- * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
- * function calls fops->open() with the bdev->bd_mutex lock held.
- *
- * To avoid a potential lock inversion deadlock we preemptively
- * try to take the spa_namespace_lock(). Normally it will not
- * be contended and this is safe because spa_open_common() handles
- * the case where the caller already holds the spa_namespace_lock.
- *
- * When it is contended we risk a lock inversion if we were to
- * block waiting for the lock. Luckily, the __blkdev_get()
- * function allows us to return -ERESTARTSYS which will result in
- * bdev->bd_mutex being dropped, reacquired, and fops->open() being
- * called again. This process can be repeated safely until both
- * locks are acquired.
- */
- if (!mutex_owned(&spa_namespace_lock)) {
- locked = mutex_tryenter(&spa_namespace_lock);
- if (!locked)
- return (-SET_ERROR(ERESTARTSYS));
- }
-
- error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
- if (error)
- goto out_mutex;
-
/* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
if (error)
- goto out_mutex;
+ return (SET_ERROR(-error));
+
+ zv->zv_objset = os;
+
+ error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
+ if (error)
+ goto out_owned;
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
- if (error) {
- dmu_objset_disown(os, zvol_tag);
- zv->zv_objset = NULL;
- goto out_mutex;
- }
+ if (error)
+ goto out_owned;
- zv->zv_objset = os;
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
- if (error) {
- dmu_objset_disown(os, zvol_tag);
- zv->zv_objset = NULL;
- goto out_mutex;
- }
+ if (error)
+ goto out_owned;
set_capacity(zv->zv_disk, volsize >> 9);
zv->zv_volsize = volsize;
@@ -1021,9 +992,11 @@ zvol_first_open(zvol_state_t *zv)
zv->zv_flags &= ~ZVOL_RDONLY;
}
-out_mutex:
- if (locked)
- mutex_exit(&spa_namespace_lock);
+out_owned:
+ if (error) {
+ dmu_objset_disown(os, zvol_tag);
+ zv->zv_objset = NULL;
+ }
return (SET_ERROR(-error));
}
@@ -1526,6 +1499,8 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
{
const char *name = (const char *)arg;
+ ASSERT0(MUTEX_HELD(&spa_namespace_lock));
+
/* skip the designated dataset */
if (name && strcmp(dsname, name) == 0)
return (0);
@@ -1550,6 +1525,8 @@ zvol_create_minors_cb(const char *dsname, void *arg)
uint64_t snapdev;
int error;
+ ASSERT0(MUTEX_HELD(&spa_namespace_lock));
+
error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
if (error)
return (0);