aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zfs/zfs_main.c60
-rw-r--r--config/kernel-user-ns-inum.m423
-rw-r--r--config/kernel.m42
-rw-r--r--contrib/pyzfs/libzfs_core/_constants.py1
-rw-r--r--include/libzfs.h10
-rw-r--r--include/os/linux/spl/sys/zone.h31
-rw-r--r--include/sys/fs/zfs.h3
-rw-r--r--lib/libspl/include/sys/types.h2
-rw-r--r--lib/libspl/include/zone.h12
-rw-r--r--lib/libspl/os/linux/zone.c32
-rw-r--r--lib/libuutil/libuutil.abi2
-rw-r--r--lib/libzfs/libzfs.abi9
-rw-r--r--lib/libzfs/libzfs_util.c6
-rw-r--r--lib/libzfs/os/linux/libzfs_util_os.c69
-rw-r--r--lib/libzfs_core/libzfs_core.abi2
-rw-r--r--man/Makefile.am2
-rw-r--r--man/man7/zfsprops.73
l---------man/man8/zfs-unzone.81
-rw-r--r--man/man8/zfs-zone.8116
-rw-r--r--module/Kbuild.in3
-rw-r--r--module/os/linux/spl/spl-generic.c6
-rw-r--r--module/os/linux/spl/spl-zone.c424
-rw-r--r--module/os/linux/zfs/policy.c2
-rw-r--r--module/os/linux/zfs/zfs_ioctl_os.c47
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c20
-rw-r--r--module/os/linux/zfs/zpl_super.c1
-rw-r--r--tests/runfiles/linux.run3
-rw-r--r--tests/zfs-tests/include/commands.cfg2
-rw-r--r--tests/zfs-tests/tests/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh5
-rwxr-xr-xtests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh115
-rwxr-xr-xtests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh97
-rwxr-xr-xtests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh67
33 files changed, 1166 insertions, 15 deletions
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 6282d8946..30b2ae0c4 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
#endif
+#ifdef __linux__
+static int zfs_do_zone(int argc, char **argv);
+static int zfs_do_unzone(int argc, char **argv);
+#endif
+
/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
*/
@@ -184,6 +189,8 @@ typedef enum {
HELP_JAIL,
HELP_UNJAIL,
HELP_WAIT,
+ HELP_ZONE,
+ HELP_UNZONE,
} zfs_help_t;
typedef struct zfs_command {
@@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
#endif
+
+#ifdef __linux__
+ { "zone", zfs_do_zone, HELP_ZONE },
+ { "unzone", zfs_do_unzone, HELP_UNZONE },
+#endif
};
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
@@ -415,6 +427,10 @@ get_usage(zfs_help_t idx)
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
+ case HELP_ZONE:
+ return (gettext("\tzone <nsfile> <filesystem>\n"));
+ case HELP_UNZONE:
+ return (gettext("\tunzone <nsfile> <filesystem>\n"));
default:
__builtin_unreachable();
}
@@ -8692,6 +8708,50 @@ main(int argc, char **argv)
return (ret);
}
+/*
+ * zfs zone nsfile filesystem
+ *
+ * Add or delete the given dataset to/from the namespace.
+ */
+#ifdef __linux__
+static int
+zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
+{
+ zfs_handle_t *zhp;
+ int ret;
+
+ if (argc < 3) {
+ (void) fprintf(stderr, gettext("missing argument(s)\n"));
+ usage(B_FALSE);
+ }
+ if (argc > 3) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
+ if (zhp == NULL)
+ return (1);
+
+ ret = (zfs_userns(zhp, argv[1], attach) != 0);
+
+ zfs_close(zhp);
+ return (ret);
+}
+
+static int
+zfs_do_zone(int argc, char **argv)
+{
+ return (zfs_do_zone_impl(argc, argv, B_TRUE));
+}
+
+static int
+zfs_do_unzone(int argc, char **argv)
+{
+ return (zfs_do_zone_impl(argc, argv, B_FALSE));
+}
+#endif
+
#ifdef __FreeBSD__
#include <sys/jail.h>
#include <jail.h>
diff --git a/config/kernel-user-ns-inum.m4 b/config/kernel-user-ns-inum.m4
new file mode 100644
index 000000000..2207a4aa6
--- /dev/null
+++ b/config/kernel-user-ns-inum.m4
@@ -0,0 +1,23 @@
+dnl #
+dnl # 3.18 API change
+dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
+ ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
+ #include <linux/user_namespace.h>
+ ], [
+ struct user_namespace uns;
+ uns.ns.inum = 0;
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
+ AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
+ ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
+ [user_namespace->ns.inum exists])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 953036750..1f274cbe4 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_KTHREAD
ZFS_AC_KERNEL_SRC_ZERO_PAGE
ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
+ ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_KTHREAD
ZFS_AC_KERNEL_ZERO_PAGE
ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
+ ZFS_AC_KERNEL_USER_NS_COMMON_INUM
])
dnl #
diff --git a/contrib/pyzfs/libzfs_core/_constants.py b/contrib/pyzfs/libzfs_core/_constants.py
index 3273652f7..7ee2ef87d 100644
--- a/contrib/pyzfs/libzfs_core/_constants.py
+++ b/contrib/pyzfs/libzfs_core/_constants.py
@@ -100,6 +100,7 @@ zfs_errno = enum_with_offset(1024, [
'ZFS_ERR_REBUILD_IN_PROGRESS',
'ZFS_ERR_BADPROP',
'ZFS_ERR_VDEV_NOTSUP',
+ 'ZFS_ERR_NOT_USER_NAMESPACE',
],
{}
)
diff --git a/include/libzfs.h b/include/libzfs.h
index 2c2aa3faf..fe420de4d 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -150,6 +150,7 @@ typedef enum zfs_error {
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */
+ EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */
EZFS_UNKNOWN
} zfs_error_t;
@@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,
#endif /* __FreeBSD__ */
+#ifdef __linux__
+
+/*
+ * Add or delete the given filesystem to/from the given user namespace.
+ */
+_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach);
+
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/include/os/linux/spl/sys/zone.h b/include/os/linux/spl/sys/zone.h
index 00e30f690..5978a6285 100644
--- a/include/os/linux/spl/sys/zone.h
+++ b/include/os/linux/spl/sys/zone.h
@@ -25,11 +25,34 @@
#define _SPL_ZONE_H
#include <sys/byteorder.h>
+#include <sys/cred.h>
-#define GLOBAL_ZONEID 0
+#include <linux/cred.h>
+#include <linux/user_namespace.h>
-#define zone_dataset_visible(x, y) (1)
-#define crgetzoneid(x) (GLOBAL_ZONEID)
-#define INGLOBALZONE(z) (1)
+/*
+ * Attach the given dataset to the given user namespace.
+ */
+extern int zone_dataset_attach(cred_t *, const char *, int);
+
+/*
+ * Detach the given dataset from the given user namespace.
+ */
+extern int zone_dataset_detach(cred_t *, const char *, int);
+
+/*
+ * Returns true if the named pool/dataset is visible in the current zone.
+ */
+extern int zone_dataset_visible(const char *dataset, int *write);
+
+int spl_zone_init(void);
+void spl_zone_fini(void);
+
+extern unsigned int crgetzoneid(const cred_t *);
+extern unsigned int global_zoneid(void);
+extern boolean_t inglobalzone(proc_t *);
+
+#define INGLOBALZONE(x) inglobalzone(x)
+#define GLOBAL_ZONEID global_zoneid()
#endif /* SPL_ZONE_H */
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 9cd1e32cd..bc6666a2a 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1450,7 +1450,9 @@ typedef enum zfs_ioc {
ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
+ ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
+ ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */
ZFS_IOC_SET_BOOTENV, /* 0x87 */
ZFS_IOC_GET_BOOTENV, /* 0x88 */
ZFS_IOC_LAST
@@ -1531,6 +1533,7 @@ typedef enum {
ZFS_ERR_REBUILD_IN_PROGRESS,
ZFS_ERR_BADPROP,
ZFS_ERR_VDEV_NOTSUP,
+ ZFS_ERR_NOT_USER_NAMESPACE,
} zfs_errno_t;
/*
diff --git a/lib/libspl/include/sys/types.h b/lib/libspl/include/sys/types.h
index f32c2188a..8dc38ae33 100644
--- a/lib/libspl/include/sys/types.h
+++ b/lib/libspl/include/sys/types.h
@@ -44,7 +44,7 @@
#include <inttypes.h>
#endif /* HAVE_INTTYPES */
-typedef int zoneid_t;
+typedef uint_t zoneid_t;
typedef int projid_t;
/*
diff --git a/lib/libspl/include/zone.h b/lib/libspl/include/zone.h
index b0ac2d9bc..0af4e7a2f 100644
--- a/lib/libspl/include/zone.h
+++ b/lib/libspl/include/zone.h
@@ -33,7 +33,17 @@
extern "C" {
#endif
-#define GLOBAL_ZONEID 0
+#ifdef __FreeBSD__
+#define GLOBAL_ZONEID 0
+#else
+/*
+ * Hardcoded in the kernel's root user namespace. A "better" way to get
+ * this would be by using ioctl_ns(2), but this would need to be performed
+ * recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
+ * supported since Linux 4.9.
+ */
+#define GLOBAL_ZONEID 4026531837U
+#endif
extern zoneid_t getzoneid(void);
diff --git a/lib/libspl/os/linux/zone.c b/lib/libspl/os/linux/zone.c
index 393a16ad5..65c02dfe7 100644
--- a/lib/libspl/os/linux/zone.c
+++ b/lib/libspl/os/linux/zone.c
@@ -23,10 +23,40 @@
* Use is subject to license terms.
*/
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
#include <zone.h>
zoneid_t
getzoneid(void)
{
- return (GLOBAL_ZONEID);
+ char path[PATH_MAX];
+ char buf[128] = { '\0' };
+ char *cp;
+
+ int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
+ /* This API doesn't have any error checking... */
+ if (c < 0)
+ return (0);
+
+ ssize_t r = readlink(path, buf, sizeof (buf) - 1);
+ if (r < 0)
+ return (0);
+
+ cp = strchr(buf, '[');
+ if (cp == NULL)
+ return (0);
+ cp++;
+
+ unsigned long n = strtoul(cp, NULL, 10);
+ if (n == ULONG_MAX && errno == ERANGE)
+ return (0);
+ zoneid_t z = (zoneid_t)n;
+
+ return (z);
}
diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi
index 86220b44b..766d88430 100644
--- a/lib/libuutil/libuutil.abi
+++ b/lib/libuutil/libuutil.abi
@@ -1081,7 +1081,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
- <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+ <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
index 9f9a2f907..fb5e01b82 100644
--- a/lib/libzfs/libzfs.abi
+++ b/lib/libzfs/libzfs.abi
@@ -433,6 +433,7 @@
<elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -1537,7 +1538,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libspl/os/linux/zone.c' language='LANG_C99'>
- <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+ <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
@@ -4414,6 +4415,12 @@
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
<return type-id='26a90f95'/>
</function-decl>
+ <function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
+ <parameter type-id='9200a744' name='zhp'/>
+ <parameter type-id='80f4b756' name='nspath'/>
+ <parameter type-id='95e97e5e' name='attach'/>
+ <return type-id='95e97e5e'/>
+ </function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzutil/os/linux/zutil_device_path_os.c' language='LANG_C99'>
<function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 1c067e214..1d40cbbfc 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -299,6 +299,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_VDEV_NOTSUP:
return (dgettext(TEXT_DOMAIN, "operation not supported "
"on this type of vdev"));
+ case EZFS_NOT_USER_NAMESPACE:
+ return (dgettext(TEXT_DOMAIN, "the provided file "
+ "was not a user namespace file"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -485,6 +488,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_BADPROP:
zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
break;
+ case ZFS_ERR_NOT_USER_NAMESPACE:
+ zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, "%s", strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
diff --git a/lib/libzfs/os/linux/libzfs_util_os.c b/lib/libzfs/os/linux/libzfs_util_os.c
index 9d6f574a5..7bd26ea98 100644
--- a/lib/libzfs/os/linux/libzfs_util_os.c
+++ b/lib/libzfs/os/linux/libzfs_util_os.c
@@ -19,6 +19,9 @@
* CDDL HEADER END
*/
+/*
+ * Copyright (c) 2021 Klara, Inc.
+ */
#include <alloca.h>
#include <errno.h>
@@ -207,3 +210,69 @@ zfs_version_kernel(void)
ret[read - 1] = '\0';
return (ret);
}
+
+/*
+ * Add or delete the given filesystem to/from the given user namespace.
+ */
+int
+zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach)
+{
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ zfs_cmd_t zc = {"\0"};
+ char errbuf[1024];
+ unsigned long cmd;
+ int ret;
+
+ if (attach) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
+ zhp->zfs_name);
+ } else {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
+ zhp->zfs_name);
+ }
+
+ switch (zhp->zfs_type) {
+ case ZFS_TYPE_VOLUME:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "volumes can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_SNAPSHOT:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "snapshots can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_BOOKMARK:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "bookmarks can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_VDEV:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "vdevs can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_INVALID:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid zfs_type_t: ZFS_TYPE_INVALID"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_POOL:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pools can not be namespaced"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case ZFS_TYPE_FILESYSTEM:
+ zfs_fallthrough;
+ }
+ assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ zc.zc_objset_type = DMU_OST_ZFS;
+ zc.zc_cleanup_fd = open(nspath, O_RDONLY);
+ if (zc.zc_cleanup_fd < 0) {
+ return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf));
+ }
+
+ cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH;
+ if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
+ zfs_standard_error(hdl, errno, errbuf);
+
+ return (ret);
+}
diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi
index 266007e4d..fae98469a 100644
--- a/lib/libzfs_core/libzfs_core.abi
+++ b/lib/libzfs_core/libzfs_core.abi
@@ -939,7 +939,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
- <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+ <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
diff --git a/man/Makefile.am b/man/Makefile.am
index 8fa21d2fd..12f818372 100644
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -59,9 +59,11 @@ dist_man_MANS = \
%D%/man8/zfs-unjail.8 \
%D%/man8/zfs-unload-key.8 \
%D%/man8/zfs-unmount.8 \
+ %D%/man8/zfs-unzone.8 \
%D%/man8/zfs-upgrade.8 \
%D%/man8/zfs-userspace.8 \
%D%/man8/zfs-wait.8 \
+ %D%/man8/zfs-zone.8 \
%D%/man8/zfs_ids_to_path.8 \
%D%/man8/zgenhostid.8 \
%D%/man8/zinject.8 \
diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7
index b1e1ce377..4d6fc613c 100644
--- a/man/man7/zfsprops.7
+++ b/man/man7/zfsprops.7
@@ -1885,8 +1885,7 @@ feature and are not relevant on other platforms.
The default value is
.Sy off .
.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the dataset is managed from a non-global zone.
-Zones are a Solaris feature and are not relevant on other platforms.
+Controls whether the dataset is managed from a non-global zone or namespace.
The default value is
.Sy off .
.El
diff --git a/man/man8/zfs-unzone.8 b/man/man8/zfs-unzone.8
new file mode 120000
index 000000000..9052b28aa
--- /dev/null
+++ b/man/man8/zfs-unzone.8
@@ -0,0 +1 @@
+zfs-zone.8 \ No newline at end of file
diff --git a/man/man8/zfs-zone.8 b/man/man8/zfs-zone.8
new file mode 100644
index 000000000..2f975dde6
--- /dev/null
+++ b/man/man8/zfs-zone.8
@@ -0,0 +1,116 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <[email protected]>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2011, Pawel Jakub Dawidek <[email protected]>
+.\" Copyright (c) 2012, Glen Barber <[email protected]>
+.\" Copyright (c) 2012, Bryan Drewery <[email protected]>
+.\" Copyright (c) 2013, Steven Hartland <[email protected]>
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright (c) 2014, Xin LI <[email protected]>
+.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
+.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\" Copyright 2021 Klara, Inc.
+.\"
+.Dd June 3, 2022
+.Dt ZFS-ZONE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-zone ,
+.Nm zfs-unzone
+.Nd attach and detach ZFS filesystems to user namespaces
+.Sh SYNOPSIS
+.Nm zfs Cm zone
+.Ar nsfile
+.Ar filesystem
+.Nm zfs Cm unzone
+.Ar nsfile
+.Ar filesystem
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm zone
+.Ar nsfile
+.Ar filesystem
+.Xc
+Attach the specified
+.Ar filesystem
+to the user namespace identified by
+.Ar nsfile .
+From now on this file system tree can be managed from within a user namespace
+if the
+.Sy zoned
+property has been set.
+.Pp
+You cannot attach a zoned dataset's children to another user namespace.
+You can also not attach the root file system
+of the user namespace or any dataset
+which needs to be mounted before the zfs service
+is run inside the user namespace,
+as it would be attached unmounted until it is
+mounted from the service inside the user namespace.
+.Pp
+To allow management of the dataset from within a user namespace, the
+.Sy zoned
+property has to be set and the user namespaces needs access to the
+.Pa /dev/zfs
+device.
+The
+.Sy quota
+property cannot be changed from within a user namespace.
+.Pp
+After a dataset is attached to a user namespace and the
+.Sy zoned
+property is set,
+a zoned file system cannot be mounted outside the user namespace,
+since the user namespace administrator might have set the mount point
+to an unacceptable value.
+.It Xo
+.Nm zfs
+.Cm unzone
+.Ar nsfile
+.Ar filesystem
+.Xc
+Detach the specified
+.Ar filesystem
+from the user namespace identified by
+.Ar nsfile .
+.El
+.Sh EXAMPLES
+.Ss Example 1 : No Delegating a Dataset to a User Namespace
+The following example delegates the
+.Ar tank/users
+dataset to a user namespace identified by user namespace file
+.Pa /proc/1234/ns/user .
+.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users
+.
+.Sh SEE ALSO
+.Xr zfsprops 7
diff --git a/module/Kbuild.in b/module/Kbuild.in
index ed8dc23a9..14f236281 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -65,7 +65,8 @@ SPL_OBJS := \
spl-tsd.o \
spl-vmem.o \
spl-xdr.o \
- spl-zlib.o
+ spl-zlib.o \
+ spl-zone.o
spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
index f99a2f966..5179100d1 100644
--- a/module/os/linux/spl/spl-generic.c
+++ b/module/os/linux/spl/spl-generic.c
@@ -780,8 +780,13 @@ spl_init(void)
if ((rc = spl_zlib_init()))
goto out7;
+ if ((rc = spl_zone_init()))
+ goto out8;
+
return (rc);
+out8:
+ spl_zlib_fini();
out7:
spl_kstat_fini();
out6:
@@ -801,6 +806,7 @@ out1:
static void __exit
spl_fini(void)
{
+ spl_zone_fini();
spl_zlib_fini();
spl_kstat_fini();
spl_proc_fini();
diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c
new file mode 100644
index 000000000..804c8010c
--- /dev/null
+++ b/module/os/linux/spl/spl-zone.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2021 Klara Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mutex.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <linux/file.h>
+#include <linux/magic.h>
+#include <sys/zone.h>
+
+#if defined(CONFIG_USER_NS)
+#include <linux/statfs.h>
+#include <linux/proc_ns.h>
+#endif
+
+static kmutex_t zone_datasets_lock;
+static struct list_head zone_datasets;
+
+typedef struct zone_datasets {
+ struct list_head zds_list; /* zone_datasets linkage */
+ struct user_namespace *zds_userns; /* namespace reference */
+ struct list_head zds_datasets; /* datasets for the namespace */
+} zone_datasets_t;
+
+typedef struct zone_dataset {
+ struct list_head zd_list; /* zone_dataset linkage */
+ size_t zd_dsnamelen; /* length of name */
+ char zd_dsname[0]; /* name of the member dataset */
+} zone_dataset_t;
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+/*
+ * Returns:
+ * - 0 on success
+ * - EBADF if it cannot open the provided file descriptor
+ * - ENOTTY if the file itself is a not a user namespace file. We want to
+ * intercept this error in the ZFS layer. We cannot just return one of the
+ * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
+ * and the SPL layers.
+ */
+static int
+user_ns_get(int fd, struct user_namespace **userns)
+{
+ struct kstatfs st;
+ struct file *nsfile;
+ struct ns_common *ns;
+ int error;
+
+ if ((nsfile = fget(fd)) == NULL)
+ return (EBADF);
+ if (vfs_statfs(&nsfile->f_path, &st) != 0) {
+ error = ENOTTY;
+ goto done;
+ }
+ if (st.f_type != NSFS_MAGIC) {
+ error = ENOTTY;
+ goto done;
+ }
+ ns = get_proc_ns(file_inode(nsfile));
+ if (ns->ops->type != CLONE_NEWUSER) {
+ error = ENOTTY;
+ goto done;
+ }
+ *userns = container_of(ns, struct user_namespace, ns);
+
+ error = 0;
+done:
+ fput(nsfile);
+
+ return (error);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static unsigned int
+user_ns_zoneid(struct user_namespace *user_ns)
+{
+ unsigned int r;
+
+#if defined(HAVE_USER_NS_COMMON_INUM)
+ r = user_ns->ns.inum;
+#else
+ r = user_ns->proc_inum;
+#endif
+
+ return (r);
+}
+
+static struct zone_datasets *
+zone_datasets_lookup(unsigned int nsinum)
+{
+ zone_datasets_t *zds;
+
+ list_for_each_entry(zds, &zone_datasets, zds_list) {
+ if (user_ns_zoneid(zds->zds_userns) == nsinum)
+ return (zds);
+ }
+ return (NULL);
+}
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+static struct zone_dataset *
+zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
+{
+ zone_dataset_t *zd;
+
+ list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ if (zd->zd_dsnamelen != dsnamelen)
+ continue;
+ if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
+ return (zd);
+ }
+
+ return (NULL);
+}
+
+static int
+zone_dataset_cred_check(cred_t *cred)
+{
+
+ if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
+ return (EPERM);
+
+ return (0);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static int
+zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
+{
+
+ if (dataset[0] == '\0' || dataset[0] == '/')
+ return (ENOENT);
+
+ *dsnamelen = strlen(dataset);
+ /* Ignore trailing slash, if supplied. */
+ if (dataset[*dsnamelen - 1] == '/')
+ (*dsnamelen)--;
+
+ return (0);
+}
+
+int
+zone_dataset_attach(cred_t *cred, const char *dataset, int cleanup_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+ struct user_namespace *userns;
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+ if ((error = user_ns_get(cleanup_fd, &userns)) != 0)
+ return (error);
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(user_ns_zoneid(userns));
+ if (zds == NULL) {
+ zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
+ INIT_LIST_HEAD(&zds->zds_list);
+ INIT_LIST_HEAD(&zds->zds_datasets);
+ zds->zds_userns = userns;
+ /*
+ * Lock the namespace by incresing its refcount to prevent
+ * the namespace ID from being reused.
+ */
+ get_user_ns(userns);
+ list_add_tail(&zds->zds_list, &zone_datasets);
+ } else {
+ zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+ if (zd != NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (EEXIST);
+ }
+ }
+
+ zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+ zd->zd_dsnamelen = dsnamelen;
+ strncpy(zd->zd_dsname, dataset, dsnamelen);
+ zd->zd_dsname[dsnamelen] = '\0';
+ INIT_LIST_HEAD(&zd->zd_list);
+ list_add_tail(&zd->zd_list, &zds->zds_datasets);
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_attach);
+
+int
+zone_dataset_detach(cred_t *cred, const char *dataset, int cleanup_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+ struct user_namespace *userns;
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+ if ((error = user_ns_get(cleanup_fd, &userns)) != 0)
+ return (error);
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(user_ns_zoneid(userns));
+ if (zds != NULL)
+ zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+ if (zds == NULL || zd == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+ }
+
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+ /* Prune the namespace entry if it has no more delegations. */
+ if (list_empty(&zds->zds_datasets)) {
+ /*
+ * Decrease the refcount now that the namespace is no longer
+ * used. It is no longer necessary to prevent the namespace ID
+ * from being reused.
+ */
+ put_user_ns(userns);
+ list_del(&zds->zds_list);
+ kmem_free(zds, sizeof (*zds));
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_detach);
+
+/*
+ * A dataset is visible if:
+ * - It is a parent of a namespace entry.
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * A dataset is writable if:
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * The parent datasets of namespace entries are visible and
+ * read-only to provide a path back to the root of the pool.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+ size_t dsnamelen, zd_len;
+ int visible;
+
+ /* Default to read-only, in case visible is returned. */
+ if (write != NULL)
+ *write = 0;
+ if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+ return (0);
+ if (INGLOBALZONE(curproc)) {
+ if (write != NULL)
+ *write = 1;
+ return (1);
+ }
+
+ mutex_enter(&zone_datasets_lock);
+ zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+ if (zds == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+ }
+
+ visible = 0;
+ list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ zd_len = strlen(zd->zd_dsname);
+ if (zd_len > dsnamelen) {
+ /*
+ * The name of the namespace entry is longer than that
+ * of the dataset, so it could be that the dataset is a
+ * parent of the namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset,
+ dsnamelen) == 0 &&
+ zd->zd_dsname[dsnamelen] == '/';
+ if (visible)
+ break;
+ } else if (zd_len == dsnamelen) {
+ /*
+ * The name of the namespace entry is as long as that
+ * of the dataset, so perhaps the dataset itself is the
+ * namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
+ if (visible) {
+ if (write != NULL)
+ *write = 1;
+ break;
+ }
+ } else {
+ /*
+ * The name of the namespace entry is shorter than that
+ * of the dataset, so perhaps the dataset is a child of
+ * the namespace entry.
+ */
+ visible = memcmp(zd->zd_dsname, dataset,
+ zd_len) == 0 && dataset[zd_len] == '/';
+ if (visible) {
+ if (write != NULL)
+ *write = 1;
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+}
+EXPORT_SYMBOL(zone_dataset_visible);
+
+unsigned int
+global_zoneid(void)
+{
+ unsigned int z = 0;
+
+#if defined(CONFIG_USER_NS)
+ z = user_ns_zoneid(&init_user_ns);
+#endif
+
+ return (z);
+}
+EXPORT_SYMBOL(global_zoneid);
+
+unsigned int
+crgetzoneid(const cred_t *cr)
+{
+ unsigned int r = 0;
+
+#if defined(CONFIG_USER_NS)
+ r = user_ns_zoneid(cr->user_ns);
+#endif
+
+ return (r);
+}
+EXPORT_SYMBOL(crgetzoneid);
+
+boolean_t
+inglobalzone(proc_t *proc)
+{
+#if defined(CONFIG_USER_NS)
+ return (proc->cred->user_ns == &init_user_ns);
+#else
+ return (B_TRUE);
+#endif
+}
+EXPORT_SYMBOL(inglobalzone);
+
+int
+spl_zone_init(void)
+{
+ mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
+ INIT_LIST_HEAD(&zone_datasets);
+ return (0);
+}
+
+void
+spl_zone_fini(void)
+{
+ zone_datasets_t *zds;
+ zone_dataset_t *zd;
+
+ /*
+ * It would be better to assert an empty zone_datasets, but since
+ * there's no automatic mechanism for cleaning them up if the user
+ * namespace is destroyed, just do it here, since spl is about to go
+ * out of context.
+ */
+ while (!list_empty(&zone_datasets)) {
+ zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
+ while (!list_empty(&zds->zds_datasets)) {
+ zd = list_entry(zds->zds_datasets.next,
+ zone_dataset_t, zd_list);
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+ put_user_ns(zds->zds_userns);
+ }
+ list_del(&zds->zds_list);
+ kmem_free(zds, sizeof (*zds));
+ }
+ mutex_destroy(&zone_datasets_lock);
+}
diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c
index 5a52092bb..ab00d2ae1 100644
--- a/module/os/linux/zfs/policy.c
+++ b/module/os/linux/zfs/policy.c
@@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
static int
priv_policy(const cred_t *cr, int capability, int err)
{
- return (priv_policy_ns(cr, capability, err, NULL));
+ return (priv_policy_ns(cr, capability, err, cr->user_ns));
}
static int
diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c
index c65702e1a..67b864aa7 100644
--- a/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/module/os/linux/zfs/zfs_ioctl_os.c
@@ -37,6 +37,7 @@
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2021 Klara, Inc.
*/
#include <sys/types.h>
@@ -150,6 +151,48 @@ out:
}
+static int
+zfs_ioc_userns_attach(zfs_cmd_t *zc)
+{
+ int error;
+
+ if (zc == NULL)
+ return (SET_ERROR(EINVAL));
+
+ error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+ /*
+ * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
+ * back from the SPL layer, which does not know about ZFS_ERR_* errors.
+ * See the comment at the user_ns_get() function in spl-zone.c for
+ * details.
+ */
+ if (error == ENOTTY)
+ error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+ return (error);
+}
+
+static int
+zfs_ioc_userns_detach(zfs_cmd_t *zc)
+{
+ int error;
+
+ if (zc == NULL)
+ return (SET_ERROR(EINVAL));
+
+ error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+ /*
+ * See the comment in zfs_ioc_userns_attach() for details on what is
+ * going on here.
+ */
+ if (error == ENOTTY)
+ error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+ return (error);
+}
+
uint64_t
zfs_max_nvlist_src_size_os(void)
{
@@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
void
zfs_ioctl_init_os(void)
{
+ zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
+ zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
+ zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
+ zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
}
#ifdef CONFIG_COMPAT
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 81a059651..a67ba821d 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
int error = 0;
zfsvfs_t *zfsvfs = NULL;
vfs_t *vfs = NULL;
+ int canwrite;
+ int dataset_visible_zone;
ASSERT(zm);
ASSERT(osname);
+ dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
+
+ /*
+ * Refuse to mount a filesystem if we are in a namespace and the
+ * dataset is not visible or writable in that namespace.
+ */
+ if (!INGLOBALZONE(curproc) &&
+ (!dataset_visible_zone || !canwrite)) {
+ return (SET_ERROR(EPERM));
+ }
+
error = zfsvfs_parse_options(zm->mnt_data, &vfs);
if (error)
return (error);
+ /*
+ * If a non-writable filesystem is being mounted without the
+ * read-only flag, pretend it was set, as done for snapshots.
+ */
+ if (!canwrite)
+ vfs->vfs_readonly = true;
+
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) {
zfsvfs_vfs_free(vfs);
diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c
index c2fd3fee1..b18efde9b 100644
--- a/module/os/linux/zfs/zpl_super.c
+++ b/module/os/linux/zfs/zpl_super.c
@@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
struct file_system_type zpl_fs_type = {
.owner = THIS_MODULE,
.name = ZFS_DRIVER,
+ .fs_flags = FS_USERNS_MOUNT,
.mount = zpl_mount,
.kill_sb = zpl_kill_sb,
};
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index fa71f412b..9b32e73af 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -177,7 +177,8 @@ tests = ['upgrade_projectquota_001_pos']
tags = ['functional', 'upgrade']
[tests/functional/user_namespace:Linux]
-tests = ['user_namespace_001']
+tests = ['user_namespace_001', 'user_namespace_002', 'user_namespace_003',
+ 'user_namespace_004']
tags = ['functional', 'user_namespace']
[tests/functional/userquota:Linux]
diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
index 1ee786d13..47357dca5 100644
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@@ -146,11 +146,13 @@ export SYSTEM_FILES_LINUX='attr
mkswap
modprobe
mpstat
+ nsenter
parted
perf
setfattr
sha256sum
udevadm
+ unshare
useradd
userdel
usermod
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index d759e5196..e65a8bba2 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1895,6 +1895,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/user_namespace/cleanup.ksh \
functional/user_namespace/setup.ksh \
functional/user_namespace/user_namespace_001.ksh \
+ functional/user_namespace/user_namespace_002.ksh \
+ functional/user_namespace/user_namespace_003.ksh \
+ functional/user_namespace/user_namespace_004.ksh \
functional/userquota/cleanup.ksh \
functional/userquota/groupspace_001_pos.ksh \
functional/userquota/groupspace_002_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh
index 3d19c4273..39aad91d0 100755
--- a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh
+++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh
@@ -47,6 +47,11 @@ function cleanup
done
}
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+
log_onexit cleanup
log_assert "Check root in user namespaces"
diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
new file mode 100755
index 000000000..a5f76014a
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
@@ -0,0 +1,115 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
+
+#
+# DESCRIPTION:
+# Regression test for delegation of datasets to user namespaces.
+#
+# STRATEGY:
+# 1. Delegate a dataset to a user namespace.
+# 2. Check that 'zfs list' is only able to see inside the delegation.
+# 3. Check that 'zfs create' is able to create only inside the delegation.
+# 4. Check that the filesystems can be mounted inside the delegation,
+# and that file permissions are appropriate.
+# 5. Check that 'zfs destroy' is able to destroy only inside the delegation.
+# 6. Check that 'zfs unzone' has a desirable effect.
+#
+
+verify_runnable "both"
+
+user_ns_cleanup() {
+ if [ -n "$proc_ns_added" ]; then
+ log_must zfs unzone $proc_ns_added $TESTPOOL/userns
+ fi
+ if [ -n "$unshared_pid" ]; then
+ kill -9 $unshared_pid
+ # Give it a sec to make the global cleanup more reliable.
+ sleep 1
+ fi
+ log_must zfs destroy -r $TESTPOOL/userns
+}
+
+log_onexit user_ns_cleanup
+
+log_assert "Check zfs/zpool command delegation in user namespaces"
+
+# Create the baseline datasets.
+log_must zfs create -o zoned=on $TESTPOOL/userns
+log_must zfs create -o zoned=on $TESTPOOL/userns/testds
+# Partial match should be denied; hence we also set this to be 'zoned'.
+log_must zfs create -o zoned=on $TESTPOOL/user
+
+# 1. Create a user namespace with a cloned mount namespace, then delegate.
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+unshare -Urm /usr/bin/sleep 1h &
+unshared_pid=$!
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+proc_ns=/proc/$unshared_pid/ns/user
+sleep 2 # Wait for unshare to acquire user namespace
+log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}"
+
+NSENTER="nsenter -t $unshared_pid --all"
+
+$NSENTER echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to enter user namespace"
+fi
+
+# 1b. Pre-test by checking that 'zone' does something new.
+list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
+log_must test -z "$list"
+log_must zfs zone $proc_ns $TESTPOOL/userns
+proc_ns_added="$ns"
+
+# 2. 'zfs list'
+list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
+log_must test "$list" = "$TESTPOOL $TESTPOOL/userns $TESTPOOL/userns/testds "
+
+# 3. 'zfs create'
+log_must $NSENTER zfs create $TESTPOOL/userns/created
+log_mustnot $NSENTER zfs create $TESTPOOL/user/created
+
+# 4. Check file permissions (create mounts the filesystem). The 'permissions'
+# check is simply, does it get mapped to user namespace's root/root?
+log_must $NSENTER df -h /$TESTPOOL/userns/created
+log_must $NSENTER mkfile 8192 /$TESTPOOL/userns/created/testfile
+uidgid=$($NSENTER stat -c '%u %g' /$TESTPOOL/userns/created/testfile)
+log_must test "${uidgid}" = "0 0"
+
+# 5. 'zfs destroy'
+log_must $NSENTER zfs destroy $TESTPOOL/userns/created
+log_mustnot $NSENTER zfs destroy $TESTPOOL/user
+
+# 6. 'zfs unzone' should have an effect
+log_must zfs unzone $proc_ns $TESTPOOL/userns
+proc_ns_added=""
+list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
+log_must test -z "$list"
+
+log_pass "Check zfs/zpool command delegation in user namespaces"
diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
new file mode 100755
index 000000000..20a7f6677
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
@@ -0,0 +1,97 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
+
+#
+# DESCRIPTION:
+# Regression test for delegation of datasets to user namespaces.
+#
+# STRATEGY:
+# 1. Delegate two datasets with distinctive names to a user namespace.
+# 2. Check that 'zfs list' is not able to see datasets outside of the
+# delegation, which have a prefix matching one of the delegated sets.
+# Also, check that all the delegated sets are visible.
+#
+
+verify_runnable "both"
+
+user_ns_cleanup() {
+ if [ -n "$proc_ns_added" ]; then
+ log_must zfs unzone $proc_ns_added $TESTPOOL/userns
+ log_must zfs unzone $proc_ns_added $TESTPOOL/otheruserns
+ fi
+ if [ -n "$unshared_pid" ]; then
+ kill -9 $unshared_pid
+ # Give it a sec to make the global cleanup more reliable.
+ sleep 1
+ fi
+ log_must zfs destroy -r $TESTPOOL/userns
+ log_must zfs destroy -r $TESTPOOL/usernsisitnot
+ log_must zfs destroy -r $TESTPOOL/otheruserns
+}
+
+log_onexit user_ns_cleanup
+
+log_assert "Check zfs list command handling of dataset visibility in user namespaces"
+
+# Create the baseline dataset.
+log_must zfs create -o zoned=on $TESTPOOL/userns
+# Datasets with a prefix matching the delegated dataset should not be
+# automatically considered visible.
+log_must zfs create -o zoned=on $TESTPOOL/usernsisitnot
+# All delegated datasets should be visible.
+log_must zfs create -o zoned=on $TESTPOOL/otheruserns
+
+# 1. Create a user namespace with a cloned mount namespace, then delegate.
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+unshare -Urm /usr/bin/sleep 1h &
+unshared_pid=$!
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+proc_ns=/proc/$unshared_pid/ns/user
+sleep 2 # Wait for unshare to acquire user namespace
+log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}"
+
+NSENTER="nsenter -t $unshared_pid --all"
+
+$NSENTER echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to enter user namespace"
+fi
+
+# 1b. Pre-test by checking that 'zone' does something new.
+list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
+log_must test -z "$list"
+log_must zfs zone $proc_ns $TESTPOOL/userns
+log_must zfs zone $proc_ns $TESTPOOL/otheruserns
+proc_ns_added="$ns"
+
+# 2. 'zfs list'
+list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
+log_must test "$list" = "$TESTPOOL $TESTPOOL/otheruserns $TESTPOOL/userns "
+
+log_pass "Check zfs list command handling of dataset visibility in user namespaces"
diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh
new file mode 100755
index 000000000..6edb0413c
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh
@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
+
+#
+# DESCRIPTION:
+# Regression test for safeguards around the delegation of datasets to
+# user namespaces.
+#
+# STRATEGY:
+# 1. Check that 'zfs zone' correctly handles the case of the first
+# argument being a non-namespace file.
+# 2. Check that 'zfs zone' correctly handles the case of the first
+# argument being a non-namespace and non-existent file.
+#
+
+verify_runnable "both"
+
+user_ns_cleanup() {
+ if [ -n "$temp_file" ]; then
+ log_must rm -f "$temp_file"
+ fi
+
+ log_must zfs destroy -r "$TESTPOOL/userns"
+}
+
+log_onexit user_ns_cleanup
+
+log_assert "Check zfs zone command handling of non-namespace files"
+
+# Pass if user namespaces are not supported.
+unshare -Urm echo test
+if [ "$?" -ne "0" ]; then
+ log_unsupported "Failed to create user namespace"
+fi
+
+# Create the baseline datasets.
+log_must zfs create -o zoned=on "$TESTPOOL/userns"
+
+# 1. Try to pass a non-namespace file to zfs zone.
+temp_file="$(TMPDIR=$TEST_BASE_DIR mktemp)"
+log_mustnot zfs zone "$temp_file" "$TESTPOOL/userns"
+
+# 2. Try to pass a non-namespace and non-existent file to zfs zone.
+log_mustnot zfs zone "$TEMP_BASE_DIR/nonexistent" "$TESTPOOL/userns"
+
+log_pass "Check zfs zone command handling of non-namespace files"