diff options
author | Brian Behlendorf <behlendorf1@llnl.gov> | 2013-12-06 14:20:22 -0800 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2013-12-16 09:15:57 -0800 |
commit | ba6a24026c6eb910188c24b5c921fb793d3c998e (patch) | |
tree | 65b71e719840b17406f217b2245daa087aa40c15 /lib | |
parent | dda12da9f1ec714af0e468aa03c24f402961f135 (diff) |
Remove ZFC_IOC_*_MINOR ioctl()s
Early versions of ZFS coordinated the creation and destruction
of device minors from userspace. This was inherently racy and
in late 2009 these ioctl()s were removed leaving everything up
to the kernel. This significantly simplified the code.
However, we never picked up these changes in ZoL since we'd
already significantly adjusted this code for Linux. This patch
aims to rectify that by finally removing ZFC_IOC_*_MINOR ioctl()s
and moving all the functionality down in to the kernel. Since
this cleanup will change the kernel/user ABI it's being done
in the same tag as the previous libzfs_core ABI changes. This
will minimize, but not eliminate, the disruption to end users.
Once merged ZoL, Illumos, and FreeBSD will basically be back
in sync in regards to handling ZVOLs in the common code. While
each platform must have its own custom zvol.c implemenation the
interfaces provided are consistent.
NOTES:
1) This patch introduces one subtle change in behavior which
could not be easily avoided. Prior to this change callers
of 'zfs create -V ...' were guaranteed that upon exit the
/dev/zvol/ block device link would be created or an error
returned. That's no longer the case. The utilities will no
longer block waiting for the symlink to be created. Callers
are now responsible for blocking, this is why a 'udev_wait'
call was added to the 'label' function in scripts/common.sh.
2) The read-only behavior of a ZVOL now solely depends on if
the ZVOL_RDONLY bit is set in zv->zv_flags. The redundant
policy setting in the gendisk structure was removed. This
both simplifies the code and allows us to safely leverage
set_disk_ro() to issue a KOBJ_CHANGE uevent. See the
comment in the code for futher details on this.
3) Because __zvol_create_minor() and zvol_alloc() may now be
called in a sync task they must use KM_PUSHPAGE.
References:
illumos/illumos-gate@681d9761e8516a7dc5ab6589e2dfe717777e1123
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes #1969
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libzfs/libzfs_changelist.c | 18 | ||||
-rw-r--r-- | lib/libzfs/libzfs_dataset.c | 351 | ||||
-rw-r--r-- | lib/libzfs/libzfs_sendrecv.c | 10 |
3 files changed, 15 insertions, 364 deletions
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 3a83e2d71..0bcfc0423 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -291,30 +291,22 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) for (cn = uu_list_first(clp->cl_list); cn != NULL; cn = uu_list_next(clp->cl_list, cn)) { - zfs_handle_t *hdl; - - hdl = cn->cn_handle; - /* * Do not rename a clone that's not in the source hierarchy. */ - if (!isa_child_of(hdl->zfs_name, src)) + if (!isa_child_of(cn->cn_handle->zfs_name, src)) continue; /* * Destroy the previous mountpoint if needed. */ - remove_mountpoint(hdl); + remove_mountpoint(cn->cn_handle); (void) strlcpy(newname, dst, sizeof (newname)); - (void) strcat(newname, hdl->zfs_name + strlen(src)); - - if (ZFS_IS_VOLUME(hdl)) { - (void) zvol_remove_link(hdl->zfs_hdl, hdl->zfs_name); - (void) zvol_create_link(hdl->zfs_hdl, newname); - } + (void) strcat(newname, cn->cn_handle->zfs_name + strlen(src)); - (void) strlcpy(hdl->zfs_name, newname, sizeof (hdl->zfs_name)); + (void) strlcpy(cn->cn_handle->zfs_name, newname, + sizeof (cn->cn_handle->zfs_name)); } } diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 9810b2e2d..90b6572b1 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -63,7 +63,6 @@ #include "libzfs_impl.h" #include "zfs_deleg.h" -static int zvol_create_link_common(libzfs_handle_t *, const char *, int); static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); @@ -418,8 +417,7 @@ make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM; else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) - return (-1); /* zpios' and other testing datasets are - of this type, ignore if encountered */ + return (-1); else abort(); @@ -2121,7 +2119,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, localtime_r(&time, &t) == NULL || strftime(propbuf, proplen, "%a %b %e %k:%M %Y", &t) == 0) - (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t) val); + (void) snprintf(propbuf, proplen, "%llu", + (u_longlong_t) val); } break; @@ -2628,7 +2627,7 @@ zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, if (literal) { (void) snprintf(propbuf, proplen, "%llu", - (u_longlong_t)propvalue); + (u_longlong_t)propvalue); } else if (propvalue == 0 && (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) { (void) strlcpy(propbuf, "none", proplen); @@ -2685,7 +2684,8 @@ zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, return (err); if (literal) { - (void) snprintf(propbuf, proplen, "%llu", (long long unsigned int)propvalue); + (void) snprintf(propbuf, proplen, "%llu", + (u_longlong_t)propvalue); } else { zfs_nicenum(propvalue, propbuf, proplen); } @@ -3056,17 +3056,6 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, ret = lzc_create(path, ost, props); nvlist_free(props); - if (ret == 0 && type == ZFS_TYPE_VOLUME) { - ret = zvol_create_link(hdl, path); - if (ret) { - (void) zfs_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, - "Volume successfully created, but device links " - "were not created")); - return (-1); - } - } - /* check for failure */ if (ret != 0) { char parent[ZFS_MAXNAMELEN]; @@ -3128,9 +3117,6 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer) (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (ZFS_IS_VOLUME(zhp)) { - if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) - return (-1); - zc.zc_objset_type = DMU_OST_ZVOL; } else { zc.zc_objset_type = DMU_OST_ZFS; @@ -3167,15 +3153,6 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) if (lzc_exists(name)) verify(nvlist_add_boolean(dd->nvl, name) == 0); - if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - (void) zvol_remove_link(zhp->zfs_hdl, name); - /* - * NB: this is simply a best-effort. We don't want to - * return an error, because then we wouldn't visit all - * the volumes. - */ - } - rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); zfs_close(zhp); return (rv); @@ -3320,70 +3297,11 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } - } else if (ZFS_IS_VOLUME(zhp)) { - ret = zvol_create_link(zhp->zfs_hdl, target); } return (ret); } -typedef struct promote_data { - char cb_mountpoint[MAXPATHLEN]; - const char *cb_target; - const char *cb_errbuf; - uint64_t cb_pivot_txg; -} promote_data_t; - -static int -promote_snap_cb(zfs_handle_t *zhp, void *data) -{ - promote_data_t *pd = data; - zfs_handle_t *szhp; - char snapname[MAXPATHLEN]; - int rv = 0; - - /* We don't care about snapshots after the pivot point */ - if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) { - zfs_close(zhp); - return (0); - } - - /* Remove the device link if it's a zvol. */ - if (ZFS_IS_VOLUME(zhp)) - (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name); - - /* Check for conflicting names */ - (void) strlcpy(snapname, pd->cb_target, sizeof (snapname)); - (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname)); - szhp = make_dataset_handle(zhp->zfs_hdl, snapname); - if (szhp != NULL) { - zfs_close(szhp); - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "snapshot name '%s' from origin \n" - "conflicts with '%s' from target"), - zhp->zfs_name, snapname); - rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf); - } - zfs_close(zhp); - return (rv); -} - -static int -promote_snap_done_cb(zfs_handle_t *zhp, void *data) -{ - promote_data_t *pd = data; - - /* We don't care about snapshots after the pivot point */ - if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) { - /* Create the device link if it's a zvol. */ - if (ZFS_IS_VOLUME(zhp)) - (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); - } - - zfs_close(zhp); - return (0); -} - /* * Promotes the given clone fs to be the clone parent. */ @@ -3393,10 +3311,7 @@ zfs_promote(zfs_handle_t *zhp) libzfs_handle_t *hdl = zhp->zfs_hdl; zfs_cmd_t zc = {"\0"}; char parent[MAXPATHLEN]; - char *cp; int ret; - zfs_handle_t *pzhp; - promote_data_t pd; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -3414,29 +3329,7 @@ zfs_promote(zfs_handle_t *zhp) "not a cloned filesystem")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } - cp = strchr(parent, '@'); - *cp = '\0'; - /* Walk the snapshots we will be moving */ - pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); - if (pzhp == NULL) - return (-1); - pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG); - zfs_close(pzhp); - pd.cb_target = zhp->zfs_name; - pd.cb_errbuf = errbuf; - pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET); - if (pzhp == NULL) - return (-1); - (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint, - sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE); - ret = zfs_iter_snapshots(pzhp, B_FALSE, promote_snap_cb, &pd); - if (ret != 0) { - zfs_close(pzhp); - return (-1); - } - - /* issue the ioctl */ (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -3445,17 +3338,9 @@ zfs_promote(zfs_handle_t *zhp) if (ret != 0) { int save_errno = errno; - (void) zfs_iter_snapshots(pzhp, B_FALSE, promote_snap_done_cb, - &pd); - zfs_close(pzhp); - switch (save_errno) { case EEXIST: - /* - * There is a conflicting snapshot name. We - * should have caught this above, but they could - * have renamed something in the mean time. - */ + /* There is a conflicting snapshot name. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "conflicting snapshot '%s' from parent '%s'"), zc.zc_string, parent); @@ -3464,45 +3349,7 @@ zfs_promote(zfs_handle_t *zhp) default: return (zfs_standard_error(hdl, save_errno, errbuf)); } - } else { - (void) zfs_iter_snapshots(zhp, B_FALSE, promote_snap_done_cb, - &pd); } - - zfs_close(pzhp); - return (ret); -} - -struct createdata { - const char *cd_snapname; - int cd_ifexists; -}; - -static int -zfs_create_link_cb(zfs_handle_t *zhp, void *arg) -{ - struct createdata *cd = arg; - int ret; - - if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - char name[MAXPATHLEN]; - - (void) strlcpy(name, zhp->zfs_name, sizeof (name)); - (void) strlcat(name, "@", sizeof (name)); - (void) strlcat(name, cd->cd_snapname, sizeof (name)); - (void) zvol_create_link_common(zhp->zfs_hdl, name, - cd->cd_ifexists); - /* - * NB: this is simply a best-effort. We don't want to - * return an error, because then we wouldn't visit all - * the volumes. - */ - } - - ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd); - - zfs_close(zhp); - return (ret); } @@ -3593,31 +3440,6 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) (void) zfs_standard_error(hdl, ret, errbuf); } } - } else { - zfs_handle_t *zhp; - int linktries = 0, linkok = 0, linkfail = 0; - nvpair_t *snap; - - for (snap = nvlist_next_nvpair(snaps, NULL); snap != NULL; - snap = nvlist_next_nvpair(snaps, snap)) { - char *cp, *snapname; - - snapname = nvpair_name(snap); - cp = strchr(snapname, '@'); - *cp = '\0'; - - if ((zhp = zfs_open(hdl, snapname, ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME)) != NULL) { - if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - ++linktries; - *cp = '@'; - if (zvol_create_link(zhp->zfs_hdl, nvpair_name(snap))) - ++linkfail; - else - ++linkok; - } - } - } } nvlist_free(props); @@ -3641,7 +3463,7 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - + (void) strlcpy(fsname, path, sizeof (fsname)); cp = strchr(fsname, '@'); *cp = '\0'; @@ -3756,8 +3578,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) */ if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) - return (-1); if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); @@ -3788,10 +3608,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) */ if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { - if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) { - zfs_close(zhp); - return (err); - } if (restore_resv) { new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (old_volsize != new_volsize) @@ -3905,7 +3721,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } if (recursive) { - struct destroydata dd; parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { @@ -3920,15 +3735,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, goto error; } - dd.snapname = delim + 1; - - /* We remove any zvol links prior to renaming them */ - verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); - ret = zfs_iter_filesystems(zhrp, zfs_check_snap_cb, &dd); - nvlist_free(dd.nvl); - if (ret) { - goto error; - } } else { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, force_unmount ? MS_FORCE : 0)) == NULL) @@ -3978,27 +3784,10 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, * On failure, we still want to remount any filesystems that * were previously mounted, so we don't alter the system state. */ - if (recursive) { - struct createdata cd; - - /* only create links for datasets that had existed */ - cd.cd_snapname = delim + 1; - cd.cd_ifexists = B_TRUE; - (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb, - &cd); - } else { + if (!recursive) (void) changelist_postfix(cl); - } } else { - if (recursive) { - struct createdata cd; - - /* only create links for datasets that had existed */ - cd.cd_snapname = strchr(target, '@') + 1; - cd.cd_ifexists = B_TRUE; - ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb, - &cd); - } else { + if (!recursive) { changelist_rename(cl, zfs_get_name(zhp), target); ret = changelist_postfix(cl); } @@ -4017,126 +3806,6 @@ error: return (ret); } -/* - * Given a zvol dataset, issue the ioctl to create the appropriate minor node, - * and wait briefly for udev to create the /dev link. - */ -int -zvol_create_link(libzfs_handle_t *hdl, const char *dataset) -{ - return (zvol_create_link_common(hdl, dataset, B_FALSE)); -} - -static int -zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) -{ - zfs_cmd_t zc = {"\0"}; - char path[MAXPATHLEN]; - int error; - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - - /* - * Issue the appropriate ioctl. - */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) { - switch (errno) { - case EEXIST: - /* - * Silently ignore the case where the link already - * exists. This allows 'zfs volinit' to be run multiple - * times without errors. - */ - return (0); - - case ENODEV: - /* - * snapdev set to hidden : - * device creation was not permitted (see zvol.c) - * ignore error quietly - */ - return (0); - - case ENOENT: - /* - * Dataset does not exist in the kernel. If we - * don't care (see zfs_rename), then ignore the - * error quietly. - */ - if (ifexists) { - return (0); - } - - /* FALLTHROUGH */ - - default: - return (zfs_standard_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot create device links " - "for '%s'"), dataset)); - } - } - - /* - * Wait for udev to create the device. - */ - (void) snprintf(path, sizeof (path), "%s/%s", ZVOL_DIR, dataset); - error = zpool_label_disk_wait(path, DISK_LABEL_WAIT); - if (error) - (void) printf(gettext("%s may not be immediately " - "available\n"), path); - - return (0); -} - -/* - * Remove a minor node for the given zvol and the associated /dev links. - */ -int -zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) -{ - zfs_cmd_t zc = {"\0"}; - int timeout = 3000; /* in milliseconds */ - int error = 0; - int i; - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - - /* - * Due to concurrent updates by udev the device may be reported as - * busy. In this case don't immediately fail. Instead briefly delay - * and retry the ioctl() which is now likely to succeed. If unable - * remove the link after timeout milliseconds return the failure. - */ - for (i = 0; i < timeout; i++) { - error = ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc); - if (error && errno == EBUSY) { - usleep(1000); - continue; - } else { - break; - } - } - - if (error) { - switch (errno) { - case ENXIO: - /* - * Silently ignore the case where the link no longer - * exists, so that 'zfs volfini' can be run multiple - * times without errors. - */ - return (0); - - default: - return (zfs_standard_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot remove device " - "links for '%s': %s"), dataset, strerror(errno))); - } - } - - return (0); -} - nvlist_t * zfs_get_user_props(zfs_handle_t *zhp) { diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 9f366eea1..12ac9bdf9 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -2790,12 +2790,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (-1); } } - if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME && - zvol_remove_link(hdl, zhp->zfs_name) != 0) { - zfs_close(zhp); - zcmd_free_nvlists(&zc); - return (-1); - } zfs_close(zhp); } else { /* @@ -3001,10 +2995,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (h != NULL) { if (h->zfs_type == ZFS_TYPE_VOLUME) { *cp = '@'; - err = zvol_create_link(hdl, h->zfs_name); - if (err == 0 && ioctl_err == 0) - err = zvol_create_link(hdl, - zc.zc_value); } else if (newfs || stream_avl) { /* * Track the first/top of hierarchy fs, |