aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zfs_vfsops.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/zfs_vfsops.c')
-rw-r--r--module/zfs/zfs_vfsops.c505
1 files changed, 350 insertions, 155 deletions
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index d03f92ba0..f68dde85f 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/* Portions Copyright 2010 Robert Milkowski */
+
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -46,6 +47,7 @@
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/zap.h>
+#include <sys/sa.h>
#include <sys/varargs.h>
#include <sys/policy.h>
#include <sys/atomic.h>
@@ -60,6 +62,8 @@
#include <sys/dnlc.h>
#include <sys/dmu_objset.h>
#include <sys/spa_boot.h>
+#include <sys/sa.h>
+#include "zfs_comutil.h"
int zfsfstype;
vfsops_t *zfs_vfsops = NULL;
@@ -163,8 +167,7 @@ zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
if (zfsvfs->z_log != NULL)
zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
- else
- txg_wait_synced(dp, 0);
+
ZFS_EXIT(zfsvfs);
} else {
/*
@@ -381,14 +384,6 @@ vscan_changed_cb(void *arg, uint64_t newval)
}
static void
-acl_mode_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_acl_mode = newval;
-}
-
-static void
acl_inherit_changed_cb(void *arg, uint64_t newval)
{
zfsvfs_t *zfsvfs = arg;
@@ -518,8 +513,6 @@ zfs_register_callbacks(vfs_t *vfsp)
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "aclmode", acl_mode_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zfsvfs);
@@ -560,7 +553,6 @@ unregister:
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
@@ -568,69 +560,59 @@ unregister:
}
-static void
-uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
- int64_t delta, dmu_tx_t *tx)
-{
- uint64_t used = 0;
- char buf[32];
- int err;
- uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
-
- if (delta == 0)
- return;
-
- (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
- err = zap_lookup(os, obj, buf, 8, 1, &used);
- ASSERT(err == 0 || err == ENOENT);
- /* no underflow/overflow */
- ASSERT(delta > 0 || used >= -delta);
- ASSERT(delta < 0 || used + delta > used);
- used += delta;
- if (used == 0)
- err = zap_remove(os, obj, buf, tx);
- else
- err = zap_update(os, obj, buf, 8, 1, &used, tx);
- ASSERT(err == 0);
-}
-
-static void
-zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
- void *oldbonus, void *newbonus,
- uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
+static int
+zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
+ uint64_t *userp, uint64_t *groupp)
{
- znode_phys_t *oldznp = oldbonus;
- znode_phys_t *newznp = newbonus;
+ znode_phys_t *znp = data;
+ int error = 0;
- if (bonustype != DMU_OT_ZNODE)
- return;
+ /*
+ * Is it a valid type of object to track?
+ */
+ if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
+ return (ENOENT);
- /* We charge 512 for the dnode (if it's allocated). */
- if (oldznp->zp_gen != 0)
- oldused += DNODE_SIZE;
- if (newznp->zp_gen != 0)
- newused += DNODE_SIZE;
+ /*
+ * If we have a NULL data pointer
+ * then assume the id's aren't changing and
+ * return EEXIST to the dmu to let it know to
+ * use the same ids
+ */
+ if (data == NULL)
+ return (EEXIST);
- if (oldznp->zp_uid == newznp->zp_uid) {
- uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
+ if (bonustype == DMU_OT_ZNODE) {
+ *userp = znp->zp_uid;
+ *groupp = znp->zp_gid;
} else {
- uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
- uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
- }
+ int hdrsize;
- if (oldznp->zp_gid == newznp->zp_gid) {
- uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
- } else {
- uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
- uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
+ ASSERT(bonustype == DMU_OT_SA);
+ hdrsize = sa_hdrsize(data);
+
+ if (hdrsize != 0) {
+ *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_UID_OFFSET));
+ *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_GID_OFFSET));
+ } else {
+ /*
+ * This should only happen for newly created
+ * files that haven't had the znode data filled
+ * in yet.
+ */
+ *userp = 0;
+ *groupp = 0;
+ }
}
+ return (error);
}
static void
fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
char *domainbuf, int buflen, uid_t *ridp)
{
- extern uint64_t strtonum(const char *str, char **nptr);
uint64_t fuid;
const char *domain;
@@ -811,7 +793,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
}
boolean_t
-zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
+zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
{
char buf[32];
uint64_t used, quota, usedobj, quotaobj;
@@ -834,33 +816,57 @@ zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
return (used >= quota);
}
+boolean_t
+zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
+{
+ uint64_t fuid;
+ uint64_t quotaobj;
+ uid_t id;
+
+ quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
+
+ id = isgroup ? zp->z_gid : zp->z_uid;
+
+ if (quotaobj == 0 || zfsvfs->z_replay)
+ return (B_FALSE);
+
+ if (IS_EPHEMERAL(id)) {
+ VERIFY(0 == sa_lookup(zp->z_sa_hdl,
+ isgroup ? SA_ZPL_GID(zfsvfs) : SA_ZPL_UID(zfsvfs),
+ &fuid, sizeof (fuid)));
+ } else {
+ fuid = (uint64_t)id;
+ }
+
+ return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
+}
+
int
-zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
+zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
{
objset_t *os;
zfsvfs_t *zfsvfs;
uint64_t zval;
int i, error;
+ uint64_t sa_obj;
- if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
- return (error);
- if (zval)
- mode |= DS_MODE_READONLY;
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
- if (error == EROFS) {
- mode |= DS_MODE_READONLY;
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
- }
- if (error)
+ /*
+ * We claim to always be readonly so we can open snapshots;
+ * other ZPL code will prevent us from writing to snapshots.
+ */
+ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+ if (error) {
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
+ }
/*
* Initialize the zfs-specific filesystem structure.
* Should probably make this a kmem cache, shuffle fields,
* and just bzero up to z_hold_mtx[].
*/
- zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
zfsvfs->z_vfs = NULL;
zfsvfs->z_parent = zfsvfs;
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
@@ -870,15 +876,15 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
if (error) {
goto out;
- } else if (zfsvfs->z_version > ZPL_VERSION) {
- (void) printf("Mismatched versions: File system "
- "is version %llu on-disk format, which is "
- "incompatible with this software version %lld!",
- (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
+ } else if (zfsvfs->z_version >
+ zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+ (void) printf("Can't mount a version %lld file system "
+ "on a version %lld pool\n. Pool must be upgraded to mount "
+ "this file system.", (u_longlong_t)zfsvfs->z_version,
+ (u_longlong_t)spa_version(dmu_objset_spa(os)));
error = ENOTSUP;
goto out;
}
-
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
goto out;
zfsvfs->z_norm = (int)zval;
@@ -900,6 +906,26 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+ if (zfsvfs->z_use_sa) {
+ /* should either have both of these objects or none */
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+ &sa_obj);
+ if (error)
+ return (error);
+ } else {
+ /*
+ * Pre SA versions file systems should never touch
+ * either the attribute registration or layout objects.
+ */
+ sa_obj = 0;
+ }
+
+ zfsvfs->z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END);
+
+ if (zfsvfs->z_version >= ZPL_VERSION_SA)
+ sa_register_update_callback(os, zfs_sa_upgrade);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
&zfsvfs->z_root);
@@ -944,12 +970,12 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
- *zvp = zfsvfs;
+ *zfvp = zfsvfs;
return (0);
out:
- dmu_objset_close(os);
- *zvp = NULL;
+ dmu_objset_disown(os, zfsvfs);
+ *zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
}
@@ -966,15 +992,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/*
* Set the objset user_ptr to track its zfsvfs.
*/
- mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
- mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
- if (zil_disable) {
- zil_destroy(zfsvfs->z_log, 0);
- zfsvfs->z_log = NULL;
- }
/*
* If we are not mounting (ie: online recv), then we don't
@@ -994,34 +1016,36 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
else
zfs_unlinked_drain(zfsvfs);
- if (zfsvfs->z_log) {
- /*
- * Parse and replay the intent log.
- *
- * Because of ziltest, this must be done after
- * zfs_unlinked_drain(). (Further note: ziltest
- * doesn't use readonly mounts, where
- * zfs_unlinked_drain() isn't called.) This is because
- * ziltest causes spa_sync() to think it's committed,
- * but actually it is not, so the intent log contains
- * many txg's worth of changes.
- *
- * In particular, if object N is in the unlinked set in
- * the last txg to actually sync, then it could be
- * actually freed in a later txg and then reallocated
- * in a yet later txg. This would write a "create
- * object N" record to the intent log. Normally, this
- * would be fine because the spa_sync() would have
- * written out the fact that object N is free, before
- * we could write the "create object N" intent log
- * record.
- *
- * But when we are in ziltest mode, we advance the "open
- * txg" without actually spa_sync()-ing the changes to
- * disk. So we would see that object N is still
- * allocated and in the unlinked set, and there is an
- * intent log record saying to allocate it.
- */
+ /*
+ * Parse and replay the intent log.
+ *
+ * Because of ziltest, this must be done after
+ * zfs_unlinked_drain(). (Further note: ziltest
+ * doesn't use readonly mounts, where
+ * zfs_unlinked_drain() isn't called.) This is because
+ * ziltest causes spa_sync() to think it's committed,
+ * but actually it is not, so the intent log contains
+ * many txg's worth of changes.
+ *
+ * In particular, if object N is in the unlinked set in
+ * the last txg to actually sync, then it could be
+ * actually freed in a later txg and then reallocated
+ * in a yet later txg. This would write a "create
+ * object N" record to the intent log. Normally, this
+ * would be fine because the spa_sync() would have
+ * written out the fact that object N is free, before
+ * we could write the "create object N" intent log
+ * record.
+ *
+ * But when we are in ziltest mode, we advance the "open
+ * txg" without actually spa_sync()-ing the changes to
+ * disk. So we would see that object N is still
+ * allocated and in the unlinked set, and there is an
+ * intent log record saying to allocate it.
+ */
+ if (zil_replay_disable) {
+ zil_destroy(zfsvfs->z_log, B_FALSE);
+ } else {
zfsvfs->z_replay = B_TRUE;
zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
zfsvfs->z_replay = B_FALSE;
@@ -1070,7 +1094,9 @@ zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+ vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
}
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
}
static int
@@ -1084,7 +1110,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
ASSERT(vfsp);
ASSERT(osname);
- error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
+ error = zfsvfs_create(osname, &zfsvfs);
if (error)
return (error);
zfsvfs->z_vfs = vfsp;
@@ -1135,6 +1161,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
}
+ vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
uint64_t pval;
@@ -1146,9 +1173,9 @@ zfs_domount(vfs_t *vfsp, char *osname)
xattr_changed_cb(zfsvfs, pval);
zfsvfs->z_issnap = B_TRUE;
- mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
- mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
} else {
error = zfsvfs_setup(zfsvfs, B_TRUE);
}
@@ -1157,7 +1184,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
zfsctl_create(zfsvfs);
out:
if (error) {
- dmu_objset_close(zfsvfs->z_os);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
zfsvfs_free(zfsvfs);
} else {
atomic_add_32(&zfs_active_fs_count, 1);
@@ -1201,9 +1228,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
zfsvfs) == 0);
- VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
- zfsvfs) == 0);
-
VERIFY(dsl_prop_unregister(ds, "aclinherit",
acl_inherit_changed_cb, zfsvfs) == 0);
@@ -1267,6 +1291,139 @@ zfs_parse_bootfs(char *bpath, char *outpath)
return (error);
}
+/*
+ * zfs_check_global_label:
+ * Check that the hex label string is appropriate for the dataset
+ * being mounted into the global_zone proper.
+ *
+ * Return an error if the hex label string is not default or
+ * admin_low/admin_high. For admin_low labels, the corresponding
+ * dataset must be readonly.
+ */
+int
+zfs_check_global_label(const char *dsname, const char *hexsl)
+{
+ if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
+ return (0);
+ if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
+ return (0);
+ if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
+ /* must be readonly */
+ uint64_t rdonly;
+
+ if (dsl_prop_get_integer(dsname,
+ zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
+ return (EACCES);
+ return (rdonly ? 0 : EACCES);
+ }
+ return (EACCES);
+}
+
+/*
+ * zfs_mount_label_policy:
+ * Determine whether the mount is allowed according to MAC check.
+ * by comparing (where appropriate) label of the dataset against
+ * the label of the zone being mounted into. If the dataset has
+ * no label, create one.
+ *
+ * Returns:
+ * 0 : access allowed
+ * >0 : error code, such as EACCES
+ */
+static int
+zfs_mount_label_policy(vfs_t *vfsp, char *osname)
+{
+ int error, retv;
+ zone_t *mntzone = NULL;
+ ts_label_t *mnt_tsl;
+ bslabel_t *mnt_sl;
+ bslabel_t ds_sl;
+ char ds_hexsl[MAXNAMELEN];
+
+ retv = EACCES; /* assume the worst */
+
+ /*
+ * Start by getting the dataset label if it exists.
+ */
+ error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
+ 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
+ if (error)
+ return (EACCES);
+
+ /*
+ * If labeling is NOT enabled, then disallow the mount of datasets
+ * which have a non-default label already. No other label checks
+ * are needed.
+ */
+ if (!is_system_labeled()) {
+ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
+ return (0);
+ return (EACCES);
+ }
+
+ /*
+ * Get the label of the mountpoint. If mounting into the global
+ * zone (i.e. mountpoint is not within an active zone and the
+ * zoned property is off), the label must be default or
+ * admin_low/admin_high only; no other checks are needed.
+ */
+ mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
+ if (mntzone->zone_id == GLOBAL_ZONEID) {
+ uint64_t zoned;
+
+ zone_rele(mntzone);
+
+ if (dsl_prop_get_integer(osname,
+ zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
+ return (EACCES);
+ if (!zoned)
+ return (zfs_check_global_label(osname, ds_hexsl));
+ else
+ /*
+ * This is the case of a zone dataset being mounted
+ * initially, before the zone has been fully created;
+ * allow this mount into global zone.
+ */
+ return (0);
+ }
+
+ mnt_tsl = mntzone->zone_slabel;
+ ASSERT(mnt_tsl != NULL);
+ label_hold(mnt_tsl);
+ mnt_sl = label2bslabel(mnt_tsl);
+
+ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
+ /*
+ * The dataset doesn't have a real label, so fabricate one.
+ */
+ char *str = NULL;
+
+ if (l_to_str_internal(mnt_sl, &str) == 0 &&
+ dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
+ ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
+ retv = 0;
+ if (str != NULL)
+ kmem_free(str, strlen(str) + 1);
+ } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
+ /*
+ * Now compare labels to complete the MAC check. If the
+ * labels are equal then allow access. If the mountpoint
+ * label dominates the dataset label, allow readonly access.
+ * Otherwise, access is denied.
+ */
+ if (blequal(mnt_sl, &ds_sl))
+ retv = 0;
+ else if (bldominates(mnt_sl, &ds_sl)) {
+ vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
+ retv = 0;
+ }
+ }
+
+ label_rele(mnt_tsl);
+ zone_rele(mntzone);
+ return (retv);
+}
+
static int
zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
{
@@ -1419,8 +1576,7 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
*/
error = secpolicy_fs_mount(cr, mvp, vfsp);
if (error) {
- error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
- if (error == 0) {
+ if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) {
vattr_t vattr;
/*
@@ -1430,16 +1586,14 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
vattr.va_mask = AT_UID;
- if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
+ if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
goto out;
}
if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
- error = EPERM;
goto out;
}
-
secpolicy_fs_mount_clearopts(cr, vfsp);
} else {
goto out;
@@ -1456,6 +1610,10 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
goto out;
}
+ error = zfs_mount_label_policy(vfsp, osname);
+ if (error)
+ goto out;
+
/*
* When doing a remount, we simply refresh our temporary properties
* according to those options set in the current VFS options.
@@ -1617,7 +1775,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
mutex_enter(&zfsvfs->z_znodes_lock);
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
zp = list_next(&zfsvfs->z_all_znodes, zp))
- if (zp->z_dbuf) {
+ if (zp->z_sa_hdl) {
ASSERT(ZTOV(zp)->v_count > 0);
zfs_znode_dmu_fini(zp);
}
@@ -1668,9 +1826,8 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
ret = secpolicy_fs_unmount(cr, vfsp);
if (ret) {
- ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
- ZFS_DELEG_PERM_MOUNT, cr);
- if (ret)
+ if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
+ ZFS_DELEG_PERM_MOUNT, cr))
return (ret);
}
@@ -1725,14 +1882,14 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
/*
* Unset the objset user_ptr.
*/
- mutex_enter(&os->os->os_user_ptr_lock);
+ mutex_enter(&os->os_user_ptr_lock);
dmu_objset_set_user(os, NULL);
- mutex_exit(&os->os->os_user_ptr_lock);
+ mutex_exit(&os->os_user_ptr_lock);
/*
* Finally release the objset
*/
- dmu_objset_close(os);
+ dmu_objset_disown(os, zfsvfs);
}
/*
@@ -1813,7 +1970,9 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
ZFS_EXIT(zfsvfs);
return (err);
}
- zp_gen = zp->z_phys->zp_gen & gen_mask;
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+ sizeof (uint64_t));
+ zp_gen = zp_gen & gen_mask;
if (zp_gen == 0)
zp_gen = 1;
if (zp->z_unlinked || zp_gen != fid_gen) {
@@ -1835,17 +1994,13 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
* 'z_teardown_inactive_lock' write held.
*/
int
-zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
{
int error;
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
return (error);
-
- *modep = zfsvfs->z_os->os_mode;
- if (name)
- dmu_objset_name(zfsvfs->z_os, name);
- dmu_objset_close(zfsvfs->z_os);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
return (0);
}
@@ -1854,18 +2009,30 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
* Reopen zfsvfs_t::z_os and release VOPs.
*/
int
-zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
+zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
{
- int err;
+ int err, err2;
ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
- err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
+ err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
+ &zfsvfs->z_os);
if (err) {
zfsvfs->z_os = NULL;
} else {
znode_t *zp;
+ uint64_t sa_obj = 0;
+
+ err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj);
+
+ if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA)
+ goto bail;
+
+
+ zfsvfs->z_attr_table = sa_setup(zfsvfs->z_os, sa_obj,
+ zfs_attr_table, ZPL_END);
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
@@ -1884,6 +2051,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
}
+bail:
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
@@ -1906,9 +2074,11 @@ zfs_freevfs(vfs_t *vfsp)
/*
* If this is a snapshot, we have an extra VFS_HOLD on our parent
- * from zfs_mount(). Release it here.
+ * from zfs_mount(). Release it here. If we came through
+ * zfs_mountroot() instead, we didn't grab an extra hold, so
+ * skip the VFS_RELE for rootvfs.
*/
- if (zfsvfs->z_issnap)
+ if (zfsvfs->z_issnap && (vfsp != rootvfs))
VFS_RELE(zfsvfs->z_parent->z_vfs);
zfsvfs_free(zfsvfs);
@@ -2000,13 +2170,23 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
if (newvers < zfsvfs->z_version)
return (EINVAL);
+ if (zfs_spa_version_map(newvers) >
+ spa_version(dmu_objset_spa(zfsvfs->z_os)))
+ return (ENOTSUP);
+
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+ dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+ ZFS_SA_ATTRS);
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+ }
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (error);
}
+
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
8, 1, &newvers, tx);
@@ -2015,9 +2195,24 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
return (error);
}
- spa_history_internal_log(LOG_DS_UPGRADE,
- dmu_objset_spa(os), tx, CRED(),
- "oldver=%llu newver=%llu dataset = %llu",
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+ uint64_t sa_obj;
+
+ ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+ SPA_VERSION_SA);
+ sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+ DMU_OT_NONE, 0, tx);
+
+ error = zap_add(os, MASTER_NODE_OBJ,
+ ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+ ASSERT3U(error, ==, 0);
+
+ VERIFY(0 == sa_set_sa_object(os, sa_obj));
+ sa_register_update_callback(os, zfs_sa_upgrade);
+ }
+
+ spa_history_log_internal(LOG_DS_UPGRADE,
+ dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
zfsvfs->z_version, newvers, dmu_objset_id(os));
dmu_tx_commit(tx);