aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2021-12-17 09:52:13 -0800
committerTony Hutter <[email protected]>2021-12-22 09:31:58 -0800
commita4831fa023f07999570f1f200b1c856300315977 (patch)
treefdc49fd6b7666f050449f4de34e7596deef1bcef
parentb327131a1fe5340cf3c2e8b46ff4d80029c636b7 (diff)
Fix zvol_open() lock inversion
When restructuring the zvol_open() logic for the Linux 5.13 kernel a lock inversion was accidentally introduced. In the updated code the spa_namespace_lock is now taken before the zv_suspend_lock allowing the following scenario to occur: down_read <=== waiting for zv_suspend_lock zvol_open <=== holds spa_namespace_lock __blkdev_get blkdev_get_by_dev blkdev_open ... mutex_lock <== waiting for spa_namespace_lock spa_open_common spa_open dsl_pool_hold dmu_objset_hold_flags dmu_objset_hold dsl_prop_get dsl_prop_get_integer zvol_create_minor dmu_recv_end zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend() zfs_ioc_recv ... This commit resolves the issue by moving the acquisition of the spa_namespace_lock back to after the zv_suspend_lock which restores the original ordering. Additionally, as part of this change the error exit paths were simplified where possible. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Rich Ercolani <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #12863
-rw-r--r--module/os/linux/zfs/zvol_os.c121
1 files changed, 58 insertions, 63 deletions
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 25732f5e1..5f20d092a 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -461,8 +461,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
{
zvol_state_t *zv;
int error = 0;
- boolean_t drop_suspend = B_TRUE;
- boolean_t drop_namespace = B_FALSE;
+ boolean_t drop_suspend = B_FALSE;
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
hrtime_t start = gethrtime();
@@ -482,7 +481,36 @@ retry:
return (SET_ERROR(-ENXIO));
}
- if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) {
+ mutex_enter(&zv->zv_state_lock);
+ /*
+ * Make sure zvol is not suspended during first open
+ * (hold zv_suspend_lock) and respect proper lock acquisition
+ * ordering - zv_suspend_lock before zv_state_lock
+ */
+ if (zv->zv_open_count == 0) {
+ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
+ mutex_exit(&zv->zv_state_lock);
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
+ mutex_enter(&zv->zv_state_lock);
+ /* check to see if zv_suspend_lock is needed */
+ if (zv->zv_open_count != 0) {
+ rw_exit(&zv->zv_suspend_lock);
+ } else {
+ drop_suspend = B_TRUE;
+ }
+ } else {
+ drop_suspend = B_TRUE;
+ }
+ }
+ rw_exit(&zvol_state_lock);
+
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+ if (zv->zv_open_count == 0) {
+ boolean_t drop_namespace = B_FALSE;
+
+ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
+
/*
* In all other call paths the spa_namespace_lock is taken
* before the bdev->bd_mutex lock. However, on open(2)
@@ -507,84 +535,51 @@ retry:
* the kernel so the only option is to return the error for
* the caller to handle it.
*/
- if (!mutex_tryenter(&spa_namespace_lock)) {
- rw_exit(&zvol_state_lock);
+ if (!mutex_owned(&spa_namespace_lock)) {
+ if (!mutex_tryenter(&spa_namespace_lock)) {
+ mutex_exit(&zv->zv_state_lock);
+ rw_exit(&zv->zv_suspend_lock);
#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
- schedule();
- return (SET_ERROR(-ERESTARTSYS));
-#else
- if ((gethrtime() - start) > timeout)
+ schedule();
return (SET_ERROR(-ERESTARTSYS));
+#else
+ if ((gethrtime() - start) > timeout)
+ return (SET_ERROR(-ERESTARTSYS));
- schedule_timeout(MSEC_TO_TICK(10));
- goto retry;
+ schedule_timeout(MSEC_TO_TICK(10));
+ goto retry;
#endif
- } else {
- drop_namespace = B_TRUE;
- }
- }
-
- mutex_enter(&zv->zv_state_lock);
- /*
- * make sure zvol is not suspended during first open
- * (hold zv_suspend_lock) and respect proper lock acquisition
- * ordering - zv_suspend_lock before zv_state_lock
- */
- if (zv->zv_open_count == 0) {
- if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
- mutex_exit(&zv->zv_state_lock);
- rw_enter(&zv->zv_suspend_lock, RW_READER);
- mutex_enter(&zv->zv_state_lock);
- /* check to see if zv_suspend_lock is needed */
- if (zv->zv_open_count != 0) {
- rw_exit(&zv->zv_suspend_lock);
- drop_suspend = B_FALSE;
+ } else {
+ drop_namespace = B_TRUE;
}
}
- } else {
- drop_suspend = B_FALSE;
- }
- rw_exit(&zvol_state_lock);
-
- ASSERT(MUTEX_HELD(&zv->zv_state_lock));
- if (zv->zv_open_count == 0) {
- ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
- if (error)
- goto out_mutex;
- }
- if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
- error = -EROFS;
- goto out_open_count;
+ if (drop_namespace)
+ mutex_exit(&spa_namespace_lock);
}
- zv->zv_open_count++;
-
- mutex_exit(&zv->zv_state_lock);
- if (drop_namespace)
- mutex_exit(&spa_namespace_lock);
- if (drop_suspend)
- rw_exit(&zv->zv_suspend_lock);
-
- zfs_check_media_change(bdev);
-
- return (0);
+ if (error == 0) {
+ if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+ if (zv->zv_open_count == 0)
+ zvol_last_close(zv);
-out_open_count:
- if (zv->zv_open_count == 0)
- zvol_last_close(zv);
+ error = SET_ERROR(-EROFS);
+ } else {
+ zv->zv_open_count++;
+ }
+ }
-out_mutex:
mutex_exit(&zv->zv_state_lock);
- if (drop_namespace)
- mutex_exit(&spa_namespace_lock);
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
- return (SET_ERROR(error));
+ if (error == 0)
+ zfs_check_media_change(bdev);
+
+ return (error);
}
static void