diff options
author | Brian Behlendorf <[email protected]> | 2013-12-06 14:20:22 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2013-12-16 09:15:57 -0800 |
commit | ba6a24026c6eb910188c24b5c921fb793d3c998e (patch) | |
tree | 65b71e719840b17406f217b2245daa087aa40c15 /module/zfs | |
parent | dda12da9f1ec714af0e468aa03c24f402961f135 (diff) |
Remove ZFC_IOC_*_MINOR ioctl()s
Early versions of ZFS coordinated the creation and destruction
of device minors from userspace. This was inherently racy and
in late 2009 these ioctl()s were removed leaving everything up
to the kernel. This significantly simplified the code.
However, we never picked up these changes in ZoL since we'd
already significantly adjusted this code for Linux. This patch
aims to rectify that by finally removing ZFC_IOC_*_MINOR ioctl()s
and moving all the functionality down in to the kernel. Since
this cleanup will change the kernel/user ABI it's being done
in the same tag as the previous libzfs_core ABI changes. This
will minimize, but not eliminate, the disruption to end users.
Once merged ZoL, Illumos, and FreeBSD will basically be back
in sync in regards to handling ZVOLs in the common code. While
each platform must have its own custom zvol.c implemenation the
interfaces provided are consistent.
NOTES:
1) This patch introduces one subtle change in behavior which
could not be easily avoided. Prior to this change callers
of 'zfs create -V ...' were guaranteed that upon exit the
/dev/zvol/ block device link would be created or an error
returned. That's no longer the case. The utilities will no
longer block waiting for the symlink to be created. Callers
are now responsible for blocking, this is why a 'udev_wait'
call was added to the 'label' function in scripts/common.sh.
2) The read-only behavior of a ZVOL now solely depends on if
the ZVOL_RDONLY bit is set in zv->zv_flags. The redundant
policy setting in the gendisk structure was removed. This
both simplifies the code and allows us to safely leverage
set_disk_ro() to issue a KOBJ_CHANGE uevent. See the
comment in the code for futher details on this.
3) Because __zvol_create_minor() and zvol_alloc() may now be
called in a sync task they must use KM_PUSHPAGE.
References:
illumos/illumos-gate@681d9761e8516a7dc5ab6589e2dfe717777e1123
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Ned Bass <[email protected]>
Signed-off-by: Tim Chase <[email protected]>
Closes #1969
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dsl_dataset.c | 25 | ||||
-rw-r--r-- | module/zfs/dsl_dir.c | 5 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 66 | ||||
-rw-r--r-- | module/zfs/zvol.c | 128 |
4 files changed, 142 insertions, 82 deletions
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 9ee9508bf..52edbd3fa 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -1229,6 +1229,16 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) fnvlist_free(suspended); } +#ifdef _KERNEL + if (error == 0) { + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char *snapname = nvpair_name(pair); + zvol_create_minors(snapname); + } + } +#endif + return (error); } @@ -1601,6 +1611,9 @@ static int dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { +#ifdef _KERNEL + char *oldname, *newname; +#endif dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; dsl_dataset_t *ds; uint64_t val; @@ -1627,6 +1640,18 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); +#ifdef _KERNEL + oldname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); + newname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); + snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, + ddrsa->ddrsa_oldsnapname); + snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, + ddrsa->ddrsa_newsnapname); + zvol_rename_minors(oldname, newname); + kmem_free(newname, MAXPATHLEN); + kmem_free(oldname, MAXPATHLEN); +#endif + dsl_dataset_rele(ds, FTAG); return (0); } diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 54a7dffb1..803a77c25 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -38,6 +38,7 @@ #include <sys/zio.h> #include <sys/arc.h> #include <sys/sunddi.h> +#include <sys/zvol.h> #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); @@ -1302,6 +1303,10 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx) VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, 8, 1, &dd->dd_object, tx)); +#ifdef _KERNEL + zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname); +#endif + dsl_prop_notify_all(dd); dsl_dir_rele(newparent, FTAG); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index ae82cb45b..9b084632f 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2089,7 +2089,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc) return (err); } -static boolean_t +boolean_t dataset_name_hidden(const char *name) { /* @@ -2810,30 +2810,6 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc) /* * inputs: - * zc_name name of volume - * - * outputs: none - */ -static int -zfs_ioc_create_minor(zfs_cmd_t *zc) -{ - return (zvol_create_minor(zc->zc_name)); -} - -/* - * inputs: - * zc_name name of volume - * - * outputs: none - */ -static int -zfs_ioc_remove_minor(zfs_cmd_t *zc) -{ - return (zvol_remove_minor(zc->zc_name)); -} - -/* - * inputs: * zc_name name of filesystem * zc_nvlist_src{_size} nvlist of delegated permissions * zc_perm_action allow/unallow flag @@ -3174,6 +3150,12 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) if (error != 0) (void) dsl_destroy_head(fsname); } + +#ifdef _KERNEL + if (error == 0 && type == DMU_OST_ZVOL) + zvol_create_minors(fsname); +#endif + return (error); } @@ -3216,6 +3198,12 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) if (error != 0) (void) dsl_destroy_head(fsname); } + +#ifdef _KERNEL + if (error == 0) + zvol_create_minors(fsname); +#endif + return (error); } @@ -3276,6 +3264,12 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) } error = dsl_dataset_snapshot(snaps, props, outnvl); + +#ifdef _KERNEL + if (error == 0) + zvol_create_minors(poolname); +#endif + return (error); } @@ -3427,10 +3421,10 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) (name[poollen] != '/' && name[poollen] != '@')) return (SET_ERROR(EXDEV)); - (void) zvol_remove_minor(name); error = zfs_unmount_snap(name); if (error != 0) return (error); + (void) zvol_remove_minor(name); } return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl)); @@ -3520,7 +3514,6 @@ zfs_ioc_rename(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie & 1; char *at; - int err; zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || @@ -3550,12 +3543,7 @@ zfs_ioc_rename(zfs_cmd_t *zc) return (error); } else { - err = dsl_dir_rename(zc->zc_name, zc->zc_value); - if (!err && zc->zc_objset_type == DMU_OST_ZVOL) { - (void) zvol_remove_minor(zc->zc_name); - (void) zvol_create_minor(zc->zc_value); - } - return (err); + return (dsl_dir_rename(zc->zc_name, zc->zc_value)); } } @@ -4045,6 +4033,12 @@ zfs_ioc_recv(zfs_cmd_t *zc) error = 1; } #endif + +#ifdef _KERNEL + if (error == 0) + zvol_create_minors(tofs); +#endif + /* * On error, restore the original props. */ @@ -5391,12 +5385,8 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); /* - * ZoL functions + * ZoL functions */ - zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor, - zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); - zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor, - zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear, diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 79c56cd78..cb3d18108 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -347,7 +347,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) goto out_doi; } - if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) { + if (zv->zv_flags & ZVOL_RDONLY) { error = SET_ERROR(EROFS); goto out_doi; } @@ -396,7 +396,7 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize) goto out; } - if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) { + if (zv->zv_flags & ZVOL_RDONLY) { error = SET_ERROR(EROFS); goto out; } @@ -770,8 +770,7 @@ zvol_request(struct request_queue *q) zvol_dispatch(zvol_read, req); break; case WRITE: - if (unlikely(get_disk_ro(zv->zv_disk)) || - unlikely(zv->zv_flags & ZVOL_RDONLY)) { + if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { __blk_end_request(req, -EROFS, size); break; } @@ -1019,8 +1018,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) goto out_mutex; } - if ((flag & FMODE_WRITE) && - (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))) { + if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) { error = -EROFS; goto out_open_count; } @@ -1235,7 +1233,7 @@ zvol_alloc(dev_t dev, const char *name) zvol_state_t *zv; int error = 0; - zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); + zv = kmem_zalloc(sizeof (zvol_state_t), KM_PUSHPAGE); spin_lock_init(&zv->zv_lock); list_link_init(&zv->zv_next); @@ -1315,7 +1313,7 @@ __zvol_snapdev_hidden(const char *name) char *atp; int error = 0; - parent = kmem_alloc(MAXPATHLEN, KM_SLEEP); + parent = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); (void) strlcpy(parent, name, MAXPATHLEN); if ((atp = strrchr(parent, '@')) != NULL) { @@ -1352,7 +1350,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev) goto out; } - doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP); + doi = kmem_alloc(sizeof(dmu_object_info_t), KM_PUSHPAGE); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) @@ -1474,77 +1472,118 @@ zvol_remove_minor(const char *name) return (error); } +/* + * Rename a block device minor mode for the specified volume. + */ +static void +__zvol_rename_minor(zvol_state_t *zv, const char *newname) +{ + int readonly = get_disk_ro(zv->zv_disk); + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + + strlcpy(zv->zv_name, newname, sizeof (zv->zv_name)); + + /* + * The block device's read-only state is briefly changed causing + * a KOBJ_CHANGE uevent to be issued. This ensures udev detects + * the name change and fixes the symlinks. This does not change + * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never + * changes. This would normally be done using kobject_uevent() but + * that is a GPL-only symbol which is why we need this workaround. + */ + set_disk_ro(zv->zv_disk, !readonly); + set_disk_ro(zv->zv_disk, readonly); +} + static int zvol_create_minors_cb(const char *dsname, void *arg) { - if (strchr(dsname, '/') == NULL) - return 0; + (void) zvol_create_minor(dsname); - (void) __zvol_create_minor(dsname, B_FALSE); return (0); } /* - * Create minors for specified pool, if pool is NULL create minors - * for all available pools. + * Create minors for specified dataset including children and snapshots. */ int -zvol_create_minors(char *pool) +zvol_create_minors(const char *name) { - spa_t *spa = NULL; int error = 0; + if (!zvol_inhibit_dev) + error = dmu_objset_find((char *)name, zvol_create_minors_cb, + NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + + return (SET_ERROR(error)); +} + +/* + * Remove minors for specified dataset including children and snapshots. + */ +void +zvol_remove_minors(const char *name) +{ + zvol_state_t *zv, *zv_next; + int namelen = ((name) ? strlen(name) : 0); + if (zvol_inhibit_dev) - return (0); + return; mutex_enter(&zvol_state_lock); - if (pool) { - error = dmu_objset_find(pool, zvol_create_minors_cb, - NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); - } else { - mutex_enter(&spa_namespace_lock); - while ((spa = spa_next(spa)) != NULL) { - error = dmu_objset_find(spa_name(spa), zvol_create_minors_cb, NULL, - DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); - if (error) - break; + + for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { + zv_next = list_next(&zvol_state_list, zv); + + if (name == NULL || strcmp(zv->zv_name, name) == 0 || + (strncmp(zv->zv_name, name, namelen) == 0 && + zv->zv_name[namelen] == '/')) { + zvol_remove(zv); + zvol_free(zv); } - mutex_exit(&spa_namespace_lock); } - mutex_exit(&zvol_state_lock); - return error; + mutex_exit(&zvol_state_lock); } /* - * Remove minors for specified pool, if pool is NULL remove all minors. + * Rename minors for specified dataset including children and snapshots. */ void -zvol_remove_minors(const char *pool) +zvol_rename_minors(const char *oldname, const char *newname) { zvol_state_t *zv, *zv_next; - char *str; + int oldnamelen, newnamelen; + char *name; if (zvol_inhibit_dev) return; - str = kmem_zalloc(MAXNAMELEN, KM_SLEEP); - if (pool) { - (void) strncpy(str, pool, strlen(pool)); - (void) strcat(str, "/"); - } + oldnamelen = strlen(oldname); + newnamelen = strlen(newname); + name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); mutex_enter(&zvol_state_lock); + for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); - if (pool == NULL || !strncmp(str, zv->zv_name, strlen(str))) { - zvol_remove(zv); - zvol_free(zv); + if (strcmp(zv->zv_name, oldname) == 0) { + __zvol_rename_minor(zv, newname); + } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && + (zv->zv_name[oldnamelen] == '/' || + zv->zv_name[oldnamelen] == '@')) { + snprintf(name, MAXNAMELEN, "%s%c%s", newname, + zv->zv_name[oldnamelen], + zv->zv_name + oldnamelen + 1); + __zvol_rename_minor(zv, name); } } + mutex_exit(&zvol_state_lock); - kmem_free(str, MAXNAMELEN); + + kmem_free(name, MAXNAMELEN); } static int @@ -1552,7 +1591,7 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) { uint64_t snapdev = *(uint64_t *) arg; if (strchr(dsname, '@') == NULL) - return 0; + return (0); switch (snapdev) { case ZFS_SNAPDEV_VISIBLE: @@ -1564,7 +1603,8 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) { (void) zvol_remove_minor(dsname); break; } - return 0; + + return (0); } int |