diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/Kbuild.in | 3 | ||||
-rw-r--r-- | module/os/linux/spl/spl-generic.c | 6 | ||||
-rw-r--r-- | module/os/linux/spl/spl-zone.c | 424 | ||||
-rw-r--r-- | module/os/linux/zfs/policy.c | 2 | ||||
-rw-r--r-- | module/os/linux/zfs/zfs_ioctl_os.c | 47 | ||||
-rw-r--r-- | module/os/linux/zfs/zfs_vfsops.c | 20 | ||||
-rw-r--r-- | module/os/linux/zfs/zpl_super.c | 1 |
7 files changed, 501 insertions, 2 deletions
diff --git a/module/Kbuild.in b/module/Kbuild.in index ed8dc23a9..14f236281 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -65,7 +65,8 @@ SPL_OBJS := \ spl-tsd.o \ spl-vmem.o \ spl-xdr.o \ - spl-zlib.o + spl-zlib.o \ + spl-zone.o spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS)) diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index f99a2f966..5179100d1 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -780,8 +780,13 @@ spl_init(void) if ((rc = spl_zlib_init())) goto out7; + if ((rc = spl_zone_init())) + goto out8; + return (rc); +out8: + spl_zlib_fini(); out7: spl_kstat_fini(); out6: @@ -801,6 +806,7 @@ out1: static void __exit spl_fini(void) { + spl_zone_fini(); spl_zlib_fini(); spl_kstat_fini(); spl_proc_fini(); diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c new file mode 100644 index 000000000..804c8010c --- /dev/null +++ b/module/os/linux/spl/spl-zone.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2021 Klara Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/mutex.h> +#include <sys/sysmacros.h> +#include <sys/kmem.h> +#include <linux/file.h> +#include <linux/magic.h> +#include <sys/zone.h> + +#if defined(CONFIG_USER_NS) +#include <linux/statfs.h> +#include <linux/proc_ns.h> +#endif + +static kmutex_t zone_datasets_lock; +static struct list_head zone_datasets; + +typedef struct zone_datasets { + struct list_head zds_list; /* zone_datasets linkage */ + struct user_namespace *zds_userns; /* namespace reference */ + struct list_head zds_datasets; /* datasets for the namespace */ +} zone_datasets_t; + +typedef struct zone_dataset { + struct list_head zd_list; /* zone_dataset linkage */ + size_t zd_dsnamelen; /* length of name */ + char zd_dsname[0]; /* name of the member dataset */ +} zone_dataset_t; + +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +/* + * Returns: + * - 0 on success + * - EBADF if it cannot open the provided file descriptor + * - ENOTTY if the file itself is a not a user namespace file. We want to + * intercept this error in the ZFS layer. We cannot just return one of the + * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS + * and the SPL layers. + */ +static int +user_ns_get(int fd, struct user_namespace **userns) +{ + struct kstatfs st; + struct file *nsfile; + struct ns_common *ns; + int error; + + if ((nsfile = fget(fd)) == NULL) + return (EBADF); + if (vfs_statfs(&nsfile->f_path, &st) != 0) { + error = ENOTTY; + goto done; + } + if (st.f_type != NSFS_MAGIC) { + error = ENOTTY; + goto done; + } + ns = get_proc_ns(file_inode(nsfile)); + if (ns->ops->type != CLONE_NEWUSER) { + error = ENOTTY; + goto done; + } + *userns = container_of(ns, struct user_namespace, ns); + + error = 0; +done: + fput(nsfile); + + return (error); +} +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ + +static unsigned int +user_ns_zoneid(struct user_namespace *user_ns) +{ + unsigned int r; + +#if defined(HAVE_USER_NS_COMMON_INUM) + r = user_ns->ns.inum; +#else + r = user_ns->proc_inum; +#endif + + return (r); +} + +static struct zone_datasets * +zone_datasets_lookup(unsigned int nsinum) +{ + zone_datasets_t *zds; + + list_for_each_entry(zds, &zone_datasets, zds_list) { + if (user_ns_zoneid(zds->zds_userns) == nsinum) + return (zds); + } + return (NULL); +} + +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +static struct zone_dataset * +zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) +{ + zone_dataset_t *zd; + + list_for_each_entry(zd, &zds->zds_datasets, zd_list) { + if (zd->zd_dsnamelen != dsnamelen) + continue; + if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) + return (zd); + } + + return (NULL); +} + +static int +zone_dataset_cred_check(cred_t *cred) +{ + + if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) + return (EPERM); + + return (0); +} +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ + +static int +zone_dataset_name_check(const char *dataset, size_t *dsnamelen) +{ + + if (dataset[0] == '\0' || dataset[0] == '/') + return (ENOENT); + + *dsnamelen = strlen(dataset); + /* Ignore trailing slash, if supplied. */ + if (dataset[*dsnamelen - 1] == '/') + (*dsnamelen)--; + + return (0); +} + +int +zone_dataset_attach(cred_t *cred, const char *dataset, int cleanup_fd) +{ +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) + struct user_namespace *userns; + zone_datasets_t *zds; + zone_dataset_t *zd; + int error; + size_t dsnamelen; + + if ((error = zone_dataset_cred_check(cred)) != 0) + return (error); + if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) + return (error); + if ((error = user_ns_get(cleanup_fd, &userns)) != 0) + return (error); + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(user_ns_zoneid(userns)); + if (zds == NULL) { + zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); + INIT_LIST_HEAD(&zds->zds_list); + INIT_LIST_HEAD(&zds->zds_datasets); + zds->zds_userns = userns; + /* + * Lock the namespace by incresing its refcount to prevent + * the namespace ID from being reused. + */ + get_user_ns(userns); + list_add_tail(&zds->zds_list, &zone_datasets); + } else { + zd = zone_dataset_lookup(zds, dataset, dsnamelen); + if (zd != NULL) { + mutex_exit(&zone_datasets_lock); + return (EEXIST); + } + } + + zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); + zd->zd_dsnamelen = dsnamelen; + strncpy(zd->zd_dsname, dataset, dsnamelen); + zd->zd_dsname[dsnamelen] = '\0'; + INIT_LIST_HEAD(&zd->zd_list); + list_add_tail(&zd->zd_list, &zds->zds_datasets); + + mutex_exit(&zone_datasets_lock); + return (0); +#else + return (ENXIO); +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +} +EXPORT_SYMBOL(zone_dataset_attach); + +int +zone_dataset_detach(cred_t *cred, const char *dataset, int cleanup_fd) +{ +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) + struct user_namespace *userns; + zone_datasets_t *zds; + zone_dataset_t *zd; + int error; + size_t dsnamelen; + + if ((error = zone_dataset_cred_check(cred)) != 0) + return (error); + if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) + return (error); + if ((error = user_ns_get(cleanup_fd, &userns)) != 0) + return (error); + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(user_ns_zoneid(userns)); + if (zds != NULL) + zd = zone_dataset_lookup(zds, dataset, dsnamelen); + if (zds == NULL || zd == NULL) { + mutex_exit(&zone_datasets_lock); + return (ENOENT); + } + + list_del(&zd->zd_list); + kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); + + /* Prune the namespace entry if it has no more delegations. */ + if (list_empty(&zds->zds_datasets)) { + /* + * Decrease the refcount now that the namespace is no longer + * used. It is no longer necessary to prevent the namespace ID + * from being reused. + */ + put_user_ns(userns); + list_del(&zds->zds_list); + kmem_free(zds, sizeof (*zds)); + } + + mutex_exit(&zone_datasets_lock); + return (0); +#else + return (ENXIO); +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +} +EXPORT_SYMBOL(zone_dataset_detach); + +/* + * A dataset is visible if: + * - It is a parent of a namespace entry. + * - It is one of the namespace entries. + * - It is a child of a namespace entry. + * + * A dataset is writable if: + * - It is one of the namespace entries. + * - It is a child of a namespace entry. + * + * The parent datasets of namespace entries are visible and + * read-only to provide a path back to the root of the pool. + */ +int +zone_dataset_visible(const char *dataset, int *write) +{ + zone_datasets_t *zds; + zone_dataset_t *zd; + size_t dsnamelen, zd_len; + int visible; + + /* Default to read-only, in case visible is returned. */ + if (write != NULL) + *write = 0; + if (zone_dataset_name_check(dataset, &dsnamelen) != 0) + return (0); + if (INGLOBALZONE(curproc)) { + if (write != NULL) + *write = 1; + return (1); + } + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); + if (zds == NULL) { + mutex_exit(&zone_datasets_lock); + return (0); + } + + visible = 0; + list_for_each_entry(zd, &zds->zds_datasets, zd_list) { + zd_len = strlen(zd->zd_dsname); + if (zd_len > dsnamelen) { + /* + * The name of the namespace entry is longer than that + * of the dataset, so it could be that the dataset is a + * parent of the namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, + dsnamelen) == 0 && + zd->zd_dsname[dsnamelen] == '/'; + if (visible) + break; + } else if (zd_len == dsnamelen) { + /* + * The name of the namespace entry is as long as that + * of the dataset, so perhaps the dataset itself is the + * namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; + if (visible) { + if (write != NULL) + *write = 1; + break; + } + } else { + /* + * The name of the namespace entry is shorter than that + * of the dataset, so perhaps the dataset is a child of + * the namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, + zd_len) == 0 && dataset[zd_len] == '/'; + if (visible) { + if (write != NULL) + *write = 1; + break; + } + } + } + + mutex_exit(&zone_datasets_lock); + return (visible); +} +EXPORT_SYMBOL(zone_dataset_visible); + +unsigned int +global_zoneid(void) +{ + unsigned int z = 0; + +#if defined(CONFIG_USER_NS) + z = user_ns_zoneid(&init_user_ns); +#endif + + return (z); +} +EXPORT_SYMBOL(global_zoneid); + +unsigned int +crgetzoneid(const cred_t *cr) +{ + unsigned int r = 0; + +#if defined(CONFIG_USER_NS) + r = user_ns_zoneid(cr->user_ns); +#endif + + return (r); +} +EXPORT_SYMBOL(crgetzoneid); + +boolean_t +inglobalzone(proc_t *proc) +{ +#if defined(CONFIG_USER_NS) + return (proc->cred->user_ns == &init_user_ns); +#else + return (B_TRUE); +#endif +} +EXPORT_SYMBOL(inglobalzone); + +int +spl_zone_init(void) +{ + mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); + INIT_LIST_HEAD(&zone_datasets); + return (0); +} + +void +spl_zone_fini(void) +{ + zone_datasets_t *zds; + zone_dataset_t *zd; + + /* + * It would be better to assert an empty zone_datasets, but since + * there's no automatic mechanism for cleaning them up if the user + * namespace is destroyed, just do it here, since spl is about to go + * out of context. + */ + while (!list_empty(&zone_datasets)) { + zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); + while (!list_empty(&zds->zds_datasets)) { + zd = list_entry(zds->zds_datasets.next, + zone_dataset_t, zd_list); + list_del(&zd->zd_list); + kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); + put_user_ns(zds->zds_userns); + } + list_del(&zds->zds_list); + kmem_free(zds, sizeof (*zds)); + } + mutex_destroy(&zone_datasets_lock); +} diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 5a52092bb..ab00d2ae1 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err, static int priv_policy(const cred_t *cr, int capability, int err) { - return (priv_policy_ns(cr, capability, err, NULL)); + return (priv_policy_ns(cr, capability, err, cr->user_ns)); } static int diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c index c65702e1a..67b864aa7 100644 --- a/module/os/linux/zfs/zfs_ioctl_os.c +++ b/module/os/linux/zfs/zfs_ioctl_os.c @@ -37,6 +37,7 @@ * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2021 Klara, Inc. */ #include <sys/types.h> @@ -150,6 +151,48 @@ out: } +static int +zfs_ioc_userns_attach(zfs_cmd_t *zc) +{ + int error; + + if (zc == NULL) + return (SET_ERROR(EINVAL)); + + error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd); + + /* + * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived + * back from the SPL layer, which does not know about ZFS_ERR_* errors. + * See the comment at the user_ns_get() function in spl-zone.c for + * details. + */ + if (error == ENOTTY) + error = ZFS_ERR_NOT_USER_NAMESPACE; + + return (error); +} + +static int +zfs_ioc_userns_detach(zfs_cmd_t *zc) +{ + int error; + + if (zc == NULL) + return (SET_ERROR(EINVAL)); + + error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd); + + /* + * See the comment in zfs_ioc_userns_attach() for details on what is + * going on here. + */ + if (error == ENOTTY) + error = ZFS_ERR_NOT_USER_NAMESPACE; + + return (error); +} + uint64_t zfs_max_nvlist_src_size_os(void) { @@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname) void zfs_ioctl_init_os(void) { + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH, + zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH, + zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE); } #ifdef CONFIG_COMPAT diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 81a059651..a67ba821d 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) int error = 0; zfsvfs_t *zfsvfs = NULL; vfs_t *vfs = NULL; + int canwrite; + int dataset_visible_zone; ASSERT(zm); ASSERT(osname); + dataset_visible_zone = zone_dataset_visible(osname, &canwrite); + + /* + * Refuse to mount a filesystem if we are in a namespace and the + * dataset is not visible or writable in that namespace. + */ + if (!INGLOBALZONE(curproc) && + (!dataset_visible_zone || !canwrite)) { + return (SET_ERROR(EPERM)); + } + error = zfsvfs_parse_options(zm->mnt_data, &vfs); if (error) return (error); + /* + * If a non-writable filesystem is being mounted without the + * read-only flag, pretend it was set, as done for snapshots. + */ + if (!canwrite) + vfs->vfs_readonly = true; + error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs); if (error) { zfsvfs_vfs_free(vfs); diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index c2fd3fee1..b18efde9b 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = { struct file_system_type zpl_fs_type = { .owner = THIS_MODULE, .name = ZFS_DRIVER, + .fs_flags = FS_USERNS_MOUNT, .mount = zpl_mount, .kill_sb = zpl_kill_sb, }; |