diff options
author | Boris Protopopov <[email protected]> | 2016-02-16 14:52:55 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-02-24 11:54:24 -0800 |
commit | 5428dc51fb55145fbac1c142402dafc11d1e7d28 (patch) | |
tree | 10a844b20c4c7173222d85f0cb974541dcea531e /module | |
parent | 19a47cb1c2be6da9b9d4a395a7afc07da4813a5f (diff) |
Make zvol minor functionality more robust
Close the race window in zvol_open() to prevent removal of
zvol_state in the 'first open' code path. Move the call to
check_disk_change() under zvol_state_lock to make sure the
zvol_media_changed() and zvol_revalidate_disk() called by
check_disk_change() are invoked with positive zv_open_count.
Skip opened zvols when removing minors and set private_data
to NULL for zvols that are not in use whose minors are being
removed, to indicate to zvol_open() that the state is gone.
Skip opened zvols when renaming minors to avoid modifying
zv_name that might be in use, e.g. in zvol_ioctl().
Drop zvol_state_lock before calling add_disk() when creating
minors to avoid deadlocks with zvol_open().
Wrap dmu_objset_find() with spl_fstran_mark()/unmark().
Signed-off-by: Boris Protopopov <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Richard Yao <[email protected]>
Closes #4344
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/zvol.c | 93 |
1 files changed, 77 insertions, 16 deletions
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 52774a82c..c90e4ec6b 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -35,6 +35,7 @@ * needs to be run before opening and using a device. * * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ #include <sys/dbuf.h> @@ -607,6 +608,8 @@ zvol_write(zvol_state_t *zv, uio_t *uio, boolean_t sync) rl_t *rl; int error = 0; + ASSERT(zv && zv->zv_open_count > 0); + rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, RL_WRITER); @@ -675,6 +678,8 @@ zvol_discard(struct bio *bio) rl_t *rl; dmu_tx_t *tx; + ASSERT(zv && zv->zv_open_count > 0); + if (end > zv->zv_volsize) return (SET_ERROR(EIO)); @@ -722,6 +727,8 @@ zvol_read(zvol_state_t *zv, uio_t *uio) rl_t *rl; int error = 0; + ASSERT(zv && zv->zv_open_count > 0); + rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, RL_READER); while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { @@ -1020,7 +1027,7 @@ zvol_last_close(zvol_state_t *zv) static int zvol_open(struct block_device *bdev, fmode_t flag) { - zvol_state_t *zv = bdev->bd_disk->private_data; + zvol_state_t *zv; int error = 0, drop_mutex = 0; /* @@ -1034,7 +1041,17 @@ zvol_open(struct block_device *bdev, fmode_t flag) drop_mutex = 1; } - ASSERT3P(zv, !=, NULL); + /* + * Obtain a copy of private_data under the lock to make sure + * that either the result of zvol_freeg() setting + * bdev->bd_disk->private_data to NULL is observed, or zvol_free() + * is not called on this zv because of the positive zv_open_count. + */ + zv = bdev->bd_disk->private_data; + if (zv == NULL) { + error = -ENXIO; + goto out_mutex; + } if (zv->zv_open_count == 0) { error = zvol_first_open(zv); @@ -1049,6 +1066,8 @@ zvol_open(struct block_device *bdev, fmode_t flag) zv->zv_open_count++; + check_disk_change(bdev); + out_open_count: if (zv->zv_open_count == 0) zvol_last_close(zv); @@ -1057,8 +1076,6 @@ out_mutex: if (drop_mutex) mutex_exit(&zvol_state_lock); - check_disk_change(bdev); - return (SET_ERROR(error)); } @@ -1072,16 +1089,16 @@ zvol_release(struct gendisk *disk, fmode_t mode) zvol_state_t *zv = disk->private_data; int drop_mutex = 0; + ASSERT(zv && zv->zv_open_count > 0); + if (!mutex_owned(&zvol_state_lock)) { mutex_enter(&zvol_state_lock); drop_mutex = 1; } - if (zv->zv_open_count > 0) { - zv->zv_open_count--; - if (zv->zv_open_count == 0) - zvol_last_close(zv); - } + zv->zv_open_count--; + if (zv->zv_open_count == 0) + zvol_last_close(zv); if (drop_mutex) mutex_exit(&zvol_state_lock); @@ -1098,8 +1115,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, zvol_state_t *zv = bdev->bd_disk->private_data; int error = 0; - if (zv == NULL) - return (SET_ERROR(-ENXIO)); + ASSERT(zv && zv->zv_open_count > 0); switch (cmd) { case BLKFLSBUF: @@ -1133,6 +1149,8 @@ static int zvol_media_changed(struct gendisk *disk) { zvol_state_t *zv = disk->private_data; + ASSERT(zv && zv->zv_open_count > 0); + return (zv->zv_changed); } @@ -1140,6 +1158,8 @@ static int zvol_revalidate_disk(struct gendisk *disk) { zvol_state_t *zv = disk->private_data; + ASSERT(zv && zv->zv_open_count > 0); + zv->zv_changed = 0; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); @@ -1156,7 +1176,11 @@ static int zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) { zvol_state_t *zv = bdev->bd_disk->private_data; - sector_t sectors = get_capacity(zv->zv_disk); + sector_t sectors; + + ASSERT(zv && zv->zv_open_count > 0); + + sectors = get_capacity(zv->zv_disk); if (sectors > 2048) { geo->heads = 16; @@ -1312,9 +1336,14 @@ out_kmem: static void zvol_free(zvol_state_t *zv) { + ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(zv->zv_open_count == 0); + avl_destroy(&zv->zv_znode.z_range_avl); mutex_destroy(&zv->zv_znode.z_range_lock); + zv->zv_disk->private_data = NULL; + del_gendisk(zv->zv_disk); blk_cleanup_queue(zv->zv_queue); put_disk(zv->zv_disk); @@ -1448,7 +1477,15 @@ out: if (error == 0) { zvol_insert(zv); + /* + * Drop the lock to prevent deadlock with sys_open() -> + * zvol_open(), which first takes bd_disk->bd_mutex and then + * takes zvol_state_lock, whereas this code path first takes + * zvol_state_lock, and then takes bd_disk->bd_mutex. + */ + mutex_exit(&zvol_state_lock); add_disk(zv->zv_disk); + mutex_enter(&zvol_state_lock); } return (SET_ERROR(error)); @@ -1545,10 +1582,15 @@ int zvol_create_minors(const char *name) { int error = 0; + fstrans_cookie_t cookie; - if (!zvol_inhibit_dev) - error = dmu_objset_find((char *)name, zvol_create_minors_cb, - NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + if (zvol_inhibit_dev) + return (0); + + cookie = spl_fstrans_mark(); + error = dmu_objset_find((char *)name, zvol_create_minors_cb, + NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + spl_fstrans_unmark(cookie); return (SET_ERROR(error)); } @@ -1572,7 +1614,13 @@ zvol_remove_minors(const char *name) if (name == NULL || strcmp(zv->zv_name, name) == 0 || (strncmp(zv->zv_name, name, namelen) == 0 && - zv->zv_name[namelen] == '/')) { + (zv->zv_name[namelen] == '/' || + zv->zv_name[namelen] == '@'))) { + + /* If in use, leave alone */ + if (zv->zv_open_count > 0) + continue; + zvol_remove(zv); zvol_free(zv); } @@ -1603,6 +1651,10 @@ zvol_rename_minors(const char *oldname, const char *newname) for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); + /* If in use, leave alone */ + if (zv->zv_open_count > 0) + continue; + if (strcmp(zv->zv_name, oldname) == 0) { __zvol_rename_minor(zv, newname); } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && @@ -1643,8 +1695,17 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) { int zvol_set_snapdev(const char *dsname, uint64_t snapdev) { + fstrans_cookie_t cookie; + + if (zvol_inhibit_dev) + /* caller should continue to modify snapdev property */ + return (-1); + + cookie = spl_fstrans_mark(); (void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb, &snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); + spl_fstrans_unmark(cookie); + /* caller should continue to modify snapdev property */ return (-1); } |