aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/Kbuild.in3
-rw-r--r--module/os/linux/spl/spl-generic.c6
-rw-r--r--module/os/linux/spl/spl-zone.c424
-rw-r--r--module/os/linux/zfs/policy.c2
-rw-r--r--module/os/linux/zfs/zfs_ioctl_os.c47
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c20
-rw-r--r--module/os/linux/zfs/zpl_super.c1
7 files changed, 501 insertions, 2 deletions
diff --git a/module/Kbuild.in b/module/Kbuild.in
index ed8dc23a9..14f236281 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -65,7 +65,8 @@ SPL_OBJS := \
spl-tsd.o \
spl-vmem.o \
spl-xdr.o \
- spl-zlib.o
+ spl-zlib.o \
+ spl-zone.o
spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
index f99a2f966..5179100d1 100644
--- a/module/os/linux/spl/spl-generic.c
+++ b/module/os/linux/spl/spl-generic.c
@@ -780,8 +780,13 @@ spl_init(void)
if ((rc = spl_zlib_init()))
goto out7;
+ if ((rc = spl_zone_init()))
+ goto out8;
+
return (rc);
+out8:
+ spl_zlib_fini();
out7:
spl_kstat_fini();
out6:
@@ -801,6 +806,7 @@ out1:
static void __exit
spl_fini(void)
{
+ spl_zone_fini();
spl_zlib_fini();
spl_kstat_fini();
spl_proc_fini();
diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c
new file mode 100644
index 000000000..804c8010c
--- /dev/null
+++ b/module/os/linux/spl/spl-zone.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2021 Klara Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mutex.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <linux/file.h>
+#include <linux/magic.h>
+#include <sys/zone.h>
+
+#if defined(CONFIG_USER_NS)
+#include <linux/statfs.h>
+#include <linux/proc_ns.h>
+#endif
+
+static kmutex_t zone_datasets_lock;
+static struct list_head zone_datasets;
+
+typedef struct zone_datasets {
+ struct list_head zds_list; /* zone_datasets linkage */
+ struct user_namespace *zds_userns; /* namespace reference */
+ struct list_head zds_datasets; /* datasets for the namespace */
+} zone_datasets_t;
+
+typedef struct zone_dataset {
+ struct list_head zd_list; /* zone_dataset linkage */
+ size_t zd_dsnamelen; /* length of name */
+ char zd_dsname[0]; /* name of the member dataset */
+} zone_dataset_t;
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+/*
+ * Returns:
+ * - 0 on success
+ * - EBADF if it cannot open the provided file descriptor
+ * - ENOTTY if the file itself is a not a user namespace file. We want to
+ * intercept this error in the ZFS layer. We cannot just return one of the
+ * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
+ * and the SPL layers.
+ */
+static int
+user_ns_get(int fd, struct user_namespace **userns)
+{
+ struct kstatfs st;
+ struct file *nsfile;
+ struct ns_common *ns;
+ int error;
+
+ if ((nsfile = fget(fd)) == NULL)
+ return (EBADF);
+ if (vfs_statfs(&nsfile->f_path, &st) != 0) {
+ error = ENOTTY;
+ goto done;
+ }
+ if (st.f_type != NSFS_MAGIC) {
+ error = ENOTTY;
+ goto done;
+ }
+ ns = get_proc_ns(file_inode(nsfile));
+ if (ns->ops->type != CLONE_NEWUSER) {
+ error = ENOTTY;
+ goto done;
+ }
+ *userns = container_of(ns, struct user_namespace, ns);
+
+ error = 0;
+done:
+ fput(nsfile);
+
+ return (error);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static unsigned int
+user_ns_zoneid(struct user_namespace *user_ns)
+{
+ unsigned int r;
+
+#if defined(HAVE_USER_NS_COMMON_INUM)
+ r = user_ns->ns.inum;
+#else
+ r = user_ns->proc_inum;
+#endif
+
+ return (r);
+}
+
+static struct zone_datasets *
+zone_datasets_lookup(unsigned int nsinum)
+{
+ zone_datasets_t *zds;
+
+ list_for_each_entry(zds, &zone_datasets, zds_list) {
+ if (user_ns_zoneid(zds->zds_userns) == nsinum)
+ return (zds);
+ }
+ return (NULL);
+}
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+static struct zone_dataset *
+zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
+{
+ zone_dataset_t *zd;
+
+ list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ if (zd->zd_dsnamelen != dsnamelen)
+ continue;
+ if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
+ return (zd);
+ }
+
+ return (NULL);
+}
+
+static int
+zone_dataset_cred_check(cred_t *cred)
+{
+
+ if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
+ return (EPERM);
+
+ return (0);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static int
+zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
+{
+
+ if (dataset[0] == '\0' || dataset[0] == '/')
+ return (ENOENT);
+
+ *dsnamelen = strlen(dataset);
+ /* Ignore trailing slash, if supplied. */
+ if (dataset[*dsnamelen - 1] == '/')
+ (*dsnamelen)--;
+
+ return (0);
+}
+
+int
+zone_dataset_attach(cred_t *cred, const char *dataset, int cleanup_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+ struct user_namespace *userns;
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+ if ((error = user_ns_get(cleanup_fd, &userns)) != 0)
+ return (error);
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(user_ns_zoneid(userns));
+ if (zds == NULL) {
+ zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
+ INIT_LIST_HEAD(&zds->zds_list);
+ INIT_LIST_HEAD(&zds->zds_datasets);
+ zds->zds_userns = userns;
+ /*
+ * Lock the namespace by incresing its refcount to prevent
+ * the namespace ID from being reused.
+ */
+ get_user_ns(userns);
+ list_add_tail(&zds->zds_list, &zone_datasets);
+ } else {
+ zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+ if (zd != NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (EEXIST);
+ }
+ }
+
+ zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+ zd->zd_dsnamelen = dsnamelen;
+ strncpy(zd->zd_dsname, dataset, dsnamelen);
+ zd->zd_dsname[dsnamelen] = '\0';
+ INIT_LIST_HEAD(&zd->zd_list);
+ list_add_tail(&zd->zd_list, &zds->zds_datasets);
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_attach);
+
+int
+zone_dataset_detach(cred_t *cred, const char *dataset, int cleanup_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+ struct user_namespace *userns;
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+ if ((error = user_ns_get(cleanup_fd, &userns)) != 0)
+ return (error);
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(user_ns_zoneid(userns));
+ if (zds != NULL)
+ zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+ if (zds == NULL || zd == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+ }
+
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+ /* Prune the namespace entry if it has no more delegations. */
+ if (list_empty(&zds->zds_datasets)) {
+ /*
+ * Decrease the refcount now that the namespace is no longer
+ * used. It is no longer necessary to prevent the namespace ID
+ * from being reused.
+ */
+ put_user_ns(userns);
+ list_del(&zds->zds_list);
+ kmem_free(zds, sizeof (*zds));
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_detach);
+
+/*
+ * A dataset is visible if:
+ * - It is a parent of a namespace entry.
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * A dataset is writable if:
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * The parent datasets of namespace entries are visible and
+ * read-only to provide a path back to the root of the pool.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ size_t dsnamelen, zd_len;
+ int visible;
+
+ /* Default to read-only, in case visible is returned. */
+ if (write != NULL)
+ *write = 0;
+ if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+ return (0);
+ if (INGLOBALZONE(curproc)) {
+ if (write != NULL)
+ *write = 1;
+ return (1);
+ }
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+ if (zds == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+ }
+
+ visible = 0;
+ list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ zd_len = strlen(zd->zd_dsname);
+ if (zd_len > dsnamelen) {
+ /*
+ * The name of the namespace entry is longer than that
+ * of the dataset, so it could be that the dataset is a
+ * parent of the namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset,
+ dsnamelen) == 0 &&
+ zd->zd_dsname[dsnamelen] == '/';
+ if (visible)
+ break;
+ } else if (zd_len == dsnamelen) {
+ /*
+ * The name of the namespace entry is as long as that
+ * of the dataset, so perhaps the dataset itself is the
+ * namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
+ if (visible) {
+ if (write != NULL)
+ *write = 1;
+ break;
+ }
+ } else {
+ /*
+ * The name of the namespace entry is shorter than that
+ * of the dataset, so perhaps the dataset is a child of
+ * the namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset,
+ zd_len) == 0 && dataset[zd_len] == '/';
+ if (visible) {
+ if (write != NULL)
+ *write = 1;
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+}
+EXPORT_SYMBOL(zone_dataset_visible);
+
+unsigned int
+global_zoneid(void)
+{
+ unsigned int z = 0;
+
+#if defined(CONFIG_USER_NS)
+ z = user_ns_zoneid(&init_user_ns);
+#endif
+
+ return (z);
+}
+EXPORT_SYMBOL(global_zoneid);
+
+unsigned int
+crgetzoneid(const cred_t *cr)
+{
+ unsigned int r = 0;
+
+#if defined(CONFIG_USER_NS)
+ r = user_ns_zoneid(cr->user_ns);
+#endif
+
+ return (r);
+}
+EXPORT_SYMBOL(crgetzoneid);
+
+boolean_t
+inglobalzone(proc_t *proc)
+{
+#if defined(CONFIG_USER_NS)
+ return (proc->cred->user_ns == &init_user_ns);
+#else
+ return (B_TRUE);
+#endif
+}
+EXPORT_SYMBOL(inglobalzone);
+
+int
+spl_zone_init(void)
+{
+ mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
+ INIT_LIST_HEAD(&zone_datasets);
+ return (0);
+}
+
+void
+spl_zone_fini(void)
+{
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+
+ /*
+ * It would be better to assert an empty zone_datasets, but since
+ * there's no automatic mechanism for cleaning them up if the user
+ * namespace is destroyed, just do it here, since spl is about to go
+ * out of context.
+ */
+ while (!list_empty(&zone_datasets)) {
+ zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
+ while (!list_empty(&zds->zds_datasets)) {
+ zd = list_entry(zds->zds_datasets.next,
+ zone_dataset_t, zd_list);
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+ put_user_ns(zds->zds_userns);
+ }
+ list_del(&zds->zds_list);
+ kmem_free(zds, sizeof (*zds));
+ }
+ mutex_destroy(&zone_datasets_lock);
+}
diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c
index 5a52092bb..ab00d2ae1 100644
--- a/module/os/linux/zfs/policy.c
+++ b/module/os/linux/zfs/policy.c
@@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
static int
priv_policy(const cred_t *cr, int capability, int err)
{
- return (priv_policy_ns(cr, capability, err, NULL));
+ return (priv_policy_ns(cr, capability, err, cr->user_ns));
}
static int
diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c
index c65702e1a..67b864aa7 100644
--- a/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/module/os/linux/zfs/zfs_ioctl_os.c
@@ -37,6 +37,7 @@
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2021 Klara, Inc.
*/
#include <sys/types.h>
@@ -150,6 +151,48 @@ out:
}
+static int
+zfs_ioc_userns_attach(zfs_cmd_t *zc)
+{
+ int error;
+
+ if (zc == NULL)
+ return (SET_ERROR(EINVAL));
+
+ error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+ /*
+ * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
+ * back from the SPL layer, which does not know about ZFS_ERR_* errors.
+ * See the comment at the user_ns_get() function in spl-zone.c for
+ * details.
+ */
+ if (error == ENOTTY)
+ error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+ return (error);
+}
+
+static int
+zfs_ioc_userns_detach(zfs_cmd_t *zc)
+{
+ int error;
+
+ if (zc == NULL)
+ return (SET_ERROR(EINVAL));
+
+ error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+ /*
+ * See the comment in zfs_ioc_userns_attach() for details on what is
+ * going on here.
+ */
+ if (error == ENOTTY)
+ error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+ return (error);
+}
+
uint64_t
zfs_max_nvlist_src_size_os(void)
{
@@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
void
zfs_ioctl_init_os(void)
{
+ zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
+ zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
+ zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
+ zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
}
#ifdef CONFIG_COMPAT
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 81a059651..a67ba821d 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
int error = 0;
zfsvfs_t *zfsvfs = NULL;
vfs_t *vfs = NULL;
+ int canwrite;
+ int dataset_visible_zone;
ASSERT(zm);
ASSERT(osname);
+ dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
+
+ /*
+ * Refuse to mount a filesystem if we are in a namespace and the
+ * dataset is not visible or writable in that namespace.
+ */
+ if (!INGLOBALZONE(curproc) &&
+ (!dataset_visible_zone || !canwrite)) {
+ return (SET_ERROR(EPERM));
+ }
+
error = zfsvfs_parse_options(zm->mnt_data, &vfs);
if (error)
return (error);
+ /*
+ * If a non-writable filesystem is being mounted without the
+ * read-only flag, pretend it was set, as done for snapshots.
+ */
+ if (!canwrite)
+ vfs->vfs_readonly = true;
+
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) {
zfsvfs_vfs_free(vfs);
diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c
index c2fd3fee1..b18efde9b 100644
--- a/module/os/linux/zfs/zpl_super.c
+++ b/module/os/linux/zfs/zpl_super.c
@@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
+ .fs_flags = FS_USERNS_MOUNT,
.mount = zpl_mount,
.kill_sb = zpl_kill_sb,
};