aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2013-12-06 14:20:22 -0800
committerBrian Behlendorf <[email protected]>2013-12-16 09:15:57 -0800
commitba6a24026c6eb910188c24b5c921fb793d3c998e (patch)
tree65b71e719840b17406f217b2245daa087aa40c15 /module/zfs
parentdda12da9f1ec714af0e468aa03c24f402961f135 (diff)
Remove ZFC_IOC_*_MINOR ioctl()s
Early versions of ZFS coordinated the creation and destruction of device minors from userspace. This was inherently racy and in late 2009 these ioctl()s were removed leaving everything up to the kernel. This significantly simplified the code. However, we never picked up these changes in ZoL since we'd already significantly adjusted this code for Linux. This patch aims to rectify that by finally removing ZFC_IOC_*_MINOR ioctl()s and moving all the functionality down in to the kernel. Since this cleanup will change the kernel/user ABI it's being done in the same tag as the previous libzfs_core ABI changes. This will minimize, but not eliminate, the disruption to end users. Once merged ZoL, Illumos, and FreeBSD will basically be back in sync in regards to handling ZVOLs in the common code. While each platform must have its own custom zvol.c implemenation the interfaces provided are consistent. NOTES: 1) This patch introduces one subtle change in behavior which could not be easily avoided. Prior to this change callers of 'zfs create -V ...' were guaranteed that upon exit the /dev/zvol/ block device link would be created or an error returned. That's no longer the case. The utilities will no longer block waiting for the symlink to be created. Callers are now responsible for blocking, this is why a 'udev_wait' call was added to the 'label' function in scripts/common.sh. 2) The read-only behavior of a ZVOL now solely depends on if the ZVOL_RDONLY bit is set in zv->zv_flags. The redundant policy setting in the gendisk structure was removed. This both simplifies the code and allows us to safely leverage set_disk_ro() to issue a KOBJ_CHANGE uevent. See the comment in the code for futher details on this. 3) Because __zvol_create_minor() and zvol_alloc() may now be called in a sync task they must use KM_PUSHPAGE. References: illumos/illumos-gate@681d9761e8516a7dc5ab6589e2dfe717777e1123 Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Ned Bass <[email protected]> Signed-off-by: Tim Chase <[email protected]> Closes #1969
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dsl_dataset.c25
-rw-r--r--module/zfs/dsl_dir.c5
-rw-r--r--module/zfs/zfs_ioctl.c66
-rw-r--r--module/zfs/zvol.c128
4 files changed, 142 insertions, 82 deletions
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 9ee9508bf..52edbd3fa 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -1229,6 +1229,16 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
fnvlist_free(suspended);
}
+#ifdef _KERNEL
+ if (error == 0) {
+ for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(snaps, pair)) {
+ char *snapname = nvpair_name(pair);
+ zvol_create_minors(snapname);
+ }
+ }
+#endif
+
return (error);
}
@@ -1601,6 +1611,9 @@ static int
dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
dsl_dataset_t *hds, void *arg)
{
+#ifdef _KERNEL
+ char *oldname, *newname;
+#endif
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
dsl_dataset_t *ds;
uint64_t val;
@@ -1627,6 +1640,18 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj,
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
+#ifdef _KERNEL
+ oldname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
+ newname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
+ snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
+ ddrsa->ddrsa_oldsnapname);
+ snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
+ ddrsa->ddrsa_newsnapname);
+ zvol_rename_minors(oldname, newname);
+ kmem_free(newname, MAXPATHLEN);
+ kmem_free(oldname, MAXPATHLEN);
+#endif
+
dsl_dataset_rele(ds, FTAG);
return (0);
}
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 54a7dffb1..803a77c25 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -38,6 +38,7 @@
#include <sys/zio.h>
#include <sys/arc.h>
#include <sys/sunddi.h>
+#include <sys/zvol.h>
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
@@ -1302,6 +1303,10 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));
+#ifdef _KERNEL
+ zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
+#endif
+
dsl_prop_notify_all(dd);
dsl_dir_rele(newparent, FTAG);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index ae82cb45b..9b084632f 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -2089,7 +2089,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
return (err);
}
-static boolean_t
+boolean_t
dataset_name_hidden(const char *name)
{
/*
@@ -2810,30 +2810,6 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc)
/*
* inputs:
- * zc_name name of volume
- *
- * outputs: none
- */
-static int
-zfs_ioc_create_minor(zfs_cmd_t *zc)
-{
- return (zvol_create_minor(zc->zc_name));
-}
-
-/*
- * inputs:
- * zc_name name of volume
- *
- * outputs: none
- */
-static int
-zfs_ioc_remove_minor(zfs_cmd_t *zc)
-{
- return (zvol_remove_minor(zc->zc_name));
-}
-
-/*
- * inputs:
* zc_name name of filesystem
* zc_nvlist_src{_size} nvlist of delegated permissions
* zc_perm_action allow/unallow flag
@@ -3174,6 +3150,12 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
+
+#ifdef _KERNEL
+ if (error == 0 && type == DMU_OST_ZVOL)
+ zvol_create_minors(fsname);
+#endif
+
return (error);
}
@@ -3216,6 +3198,12 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
(void) dsl_destroy_head(fsname);
}
+
+#ifdef _KERNEL
+ if (error == 0)
+ zvol_create_minors(fsname);
+#endif
+
return (error);
}
@@ -3276,6 +3264,12 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
}
error = dsl_dataset_snapshot(snaps, props, outnvl);
+
+#ifdef _KERNEL
+ if (error == 0)
+ zvol_create_minors(poolname);
+#endif
+
return (error);
}
@@ -3427,10 +3421,10 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
(name[poollen] != '/' && name[poollen] != '@'))
return (SET_ERROR(EXDEV));
- (void) zvol_remove_minor(name);
error = zfs_unmount_snap(name);
if (error != 0)
return (error);
+ (void) zvol_remove_minor(name);
}
return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
@@ -3520,7 +3514,6 @@ zfs_ioc_rename(zfs_cmd_t *zc)
{
boolean_t recursive = zc->zc_cookie & 1;
char *at;
- int err;
zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
@@ -3550,12 +3543,7 @@ zfs_ioc_rename(zfs_cmd_t *zc)
return (error);
} else {
- err = dsl_dir_rename(zc->zc_name, zc->zc_value);
- if (!err && zc->zc_objset_type == DMU_OST_ZVOL) {
- (void) zvol_remove_minor(zc->zc_name);
- (void) zvol_create_minor(zc->zc_value);
- }
- return (err);
+ return (dsl_dir_rename(zc->zc_name, zc->zc_value));
}
}
@@ -4045,6 +4033,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
error = 1;
}
#endif
+
+#ifdef _KERNEL
+ if (error == 0)
+ zvol_create_minors(tofs);
+#endif
+
/*
* On error, restore the original props.
*/
@@ -5391,12 +5385,8 @@ zfs_ioctl_init(void)
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
/*
- * ZoL functions
+ * ZoL functions
*/
- zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor,
- zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
- zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor,
- zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 79c56cd78..cb3d18108 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -347,7 +347,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
goto out_doi;
}
- if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
+ if (zv->zv_flags & ZVOL_RDONLY) {
error = SET_ERROR(EROFS);
goto out_doi;
}
@@ -396,7 +396,7 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
goto out;
}
- if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
+ if (zv->zv_flags & ZVOL_RDONLY) {
error = SET_ERROR(EROFS);
goto out;
}
@@ -770,8 +770,7 @@ zvol_request(struct request_queue *q)
zvol_dispatch(zvol_read, req);
break;
case WRITE:
- if (unlikely(get_disk_ro(zv->zv_disk)) ||
- unlikely(zv->zv_flags & ZVOL_RDONLY)) {
+ if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
__blk_end_request(req, -EROFS, size);
break;
}
@@ -1019,8 +1018,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
goto out_mutex;
}
- if ((flag & FMODE_WRITE) &&
- (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))) {
+ if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
error = -EROFS;
goto out_open_count;
}
@@ -1235,7 +1233,7 @@ zvol_alloc(dev_t dev, const char *name)
zvol_state_t *zv;
int error = 0;
- zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
+ zv = kmem_zalloc(sizeof (zvol_state_t), KM_PUSHPAGE);
spin_lock_init(&zv->zv_lock);
list_link_init(&zv->zv_next);
@@ -1315,7 +1313,7 @@ __zvol_snapdev_hidden(const char *name)
char *atp;
int error = 0;
- parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ parent = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
(void) strlcpy(parent, name, MAXPATHLEN);
if ((atp = strrchr(parent, '@')) != NULL) {
@@ -1352,7 +1350,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
goto out;
}
- doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);
+ doi = kmem_alloc(sizeof(dmu_object_info_t), KM_PUSHPAGE);
error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
if (error)
@@ -1474,77 +1472,118 @@ zvol_remove_minor(const char *name)
return (error);
}
+/*
+ * Rename a block device minor mode for the specified volume.
+ */
+static void
+__zvol_rename_minor(zvol_state_t *zv, const char *newname)
+{
+ int readonly = get_disk_ro(zv->zv_disk);
+
+ ASSERT(MUTEX_HELD(&zvol_state_lock));
+
+ strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+
+ /*
+ * The block device's read-only state is briefly changed causing
+ * a KOBJ_CHANGE uevent to be issued. This ensures udev detects
+ * the name change and fixes the symlinks. This does not change
+ * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
+ * changes. This would normally be done using kobject_uevent() but
+ * that is a GPL-only symbol which is why we need this workaround.
+ */
+ set_disk_ro(zv->zv_disk, !readonly);
+ set_disk_ro(zv->zv_disk, readonly);
+}
+
static int
zvol_create_minors_cb(const char *dsname, void *arg)
{
- if (strchr(dsname, '/') == NULL)
- return 0;
+ (void) zvol_create_minor(dsname);
- (void) __zvol_create_minor(dsname, B_FALSE);
return (0);
}
/*
- * Create minors for specified pool, if pool is NULL create minors
- * for all available pools.
+ * Create minors for specified dataset including children and snapshots.
*/
int
-zvol_create_minors(char *pool)
+zvol_create_minors(const char *name)
{
- spa_t *spa = NULL;
int error = 0;
+ if (!zvol_inhibit_dev)
+ error = dmu_objset_find((char *)name, zvol_create_minors_cb,
+ NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
+
+ return (SET_ERROR(error));
+}
+
+/*
+ * Remove minors for specified dataset including children and snapshots.
+ */
+void
+zvol_remove_minors(const char *name)
+{
+ zvol_state_t *zv, *zv_next;
+ int namelen = ((name) ? strlen(name) : 0);
+
if (zvol_inhibit_dev)
- return (0);
+ return;
mutex_enter(&zvol_state_lock);
- if (pool) {
- error = dmu_objset_find(pool, zvol_create_minors_cb,
- NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
- } else {
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- error = dmu_objset_find(spa_name(spa), zvol_create_minors_cb, NULL,
- DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
- if (error)
- break;
+
+ for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
+ zv_next = list_next(&zvol_state_list, zv);
+
+ if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
+ (strncmp(zv->zv_name, name, namelen) == 0 &&
+ zv->zv_name[namelen] == '/')) {
+ zvol_remove(zv);
+ zvol_free(zv);
}
- mutex_exit(&spa_namespace_lock);
}
- mutex_exit(&zvol_state_lock);
- return error;
+ mutex_exit(&zvol_state_lock);
}
/*
- * Remove minors for specified pool, if pool is NULL remove all minors.
+ * Rename minors for specified dataset including children and snapshots.
*/
void
-zvol_remove_minors(const char *pool)
+zvol_rename_minors(const char *oldname, const char *newname)
{
zvol_state_t *zv, *zv_next;
- char *str;
+ int oldnamelen, newnamelen;
+ char *name;
if (zvol_inhibit_dev)
return;
- str = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
- if (pool) {
- (void) strncpy(str, pool, strlen(pool));
- (void) strcat(str, "/");
- }
+ oldnamelen = strlen(oldname);
+ newnamelen = strlen(newname);
+ name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE);
mutex_enter(&zvol_state_lock);
+
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
zv_next = list_next(&zvol_state_list, zv);
- if (pool == NULL || !strncmp(str, zv->zv_name, strlen(str))) {
- zvol_remove(zv);
- zvol_free(zv);
+ if (strcmp(zv->zv_name, oldname) == 0) {
+ __zvol_rename_minor(zv, newname);
+ } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
+ (zv->zv_name[oldnamelen] == '/' ||
+ zv->zv_name[oldnamelen] == '@')) {
+ snprintf(name, MAXNAMELEN, "%s%c%s", newname,
+ zv->zv_name[oldnamelen],
+ zv->zv_name + oldnamelen + 1);
+ __zvol_rename_minor(zv, name);
}
}
+
mutex_exit(&zvol_state_lock);
- kmem_free(str, MAXNAMELEN);
+
+ kmem_free(name, MAXNAMELEN);
}
static int
@@ -1552,7 +1591,7 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
uint64_t snapdev = *(uint64_t *) arg;
if (strchr(dsname, '@') == NULL)
- return 0;
+ return (0);
switch (snapdev) {
case ZFS_SNAPDEV_VISIBLE:
@@ -1564,7 +1603,8 @@ snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
(void) zvol_remove_minor(dsname);
break;
}
- return 0;
+
+ return (0);
}
int