diff options
Diffstat (limited to 'module/zfs/zfs_vfsops.c')
-rw-r--r-- | module/zfs/zfs_vfsops.c | 505 |
1 files changed, 350 insertions, 155 deletions
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index d03f92ba0..f68dde85f 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -19,10 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* Portions Copyright 2010 Robert Milkowski */ + #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> @@ -46,6 +47,7 @@ #include <sys/dsl_deleg.h> #include <sys/spa.h> #include <sys/zap.h> +#include <sys/sa.h> #include <sys/varargs.h> #include <sys/policy.h> #include <sys/atomic.h> @@ -60,6 +62,8 @@ #include <sys/dnlc.h> #include <sys/dmu_objset.h> #include <sys/spa_boot.h> +#include <sys/sa.h> +#include "zfs_comutil.h" int zfsfstype; vfsops_t *zfs_vfsops = NULL; @@ -163,8 +167,7 @@ zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, UINT64_MAX, 0); - else - txg_wait_synced(dp, 0); + ZFS_EXIT(zfsvfs); } else { /* @@ -381,14 +384,6 @@ vscan_changed_cb(void *arg, uint64_t newval) } static void -acl_mode_changed_cb(void *arg, uint64_t newval) -{ - zfsvfs_t *zfsvfs = arg; - - zfsvfs->z_acl_mode = newval; -} - -static void acl_inherit_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; @@ -518,8 +513,6 @@ zfs_register_callbacks(vfs_t *vfsp) error = error ? error : dsl_prop_register(ds, "snapdir", snapdir_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "aclmode", acl_mode_changed_cb, zfsvfs); - error = error ? error : dsl_prop_register(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "vscan", vscan_changed_cb, zfsvfs); @@ -560,7 +553,6 @@ unregister: (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); @@ -568,69 +560,59 @@ unregister: } -static void -uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid, - int64_t delta, dmu_tx_t *tx) -{ - uint64_t used = 0; - char buf[32]; - int err; - uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; - - if (delta == 0) - return; - - (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid); - err = zap_lookup(os, obj, buf, 8, 1, &used); - ASSERT(err == 0 || err == ENOENT); - /* no underflow/overflow */ - ASSERT(delta > 0 || used >= -delta); - ASSERT(delta < 0 || used + delta > used); - used += delta; - if (used == 0) - err = zap_remove(os, obj, buf, tx); - else - err = zap_update(os, obj, buf, 8, 1, &used, tx); - ASSERT(err == 0); -} - -static void -zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype, - void *oldbonus, void *newbonus, - uint64_t oldused, uint64_t newused, dmu_tx_t *tx) +static int +zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, + uint64_t *userp, uint64_t *groupp) { - znode_phys_t *oldznp = oldbonus; - znode_phys_t *newznp = newbonus; + znode_phys_t *znp = data; + int error = 0; - if (bonustype != DMU_OT_ZNODE) - return; + /* + * Is it a valid type of object to track? + */ + if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) + return (ENOENT); - /* We charge 512 for the dnode (if it's allocated). */ - if (oldznp->zp_gen != 0) - oldused += DNODE_SIZE; - if (newznp->zp_gen != 0) - newused += DNODE_SIZE; + /* + * If we have a NULL data pointer + * then assume the id's aren't changing and + * return EEXIST to the dmu to let it know to + * use the same ids + */ + if (data == NULL) + return (EEXIST); - if (oldznp->zp_uid == newznp->zp_uid) { - uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx); + if (bonustype == DMU_OT_ZNODE) { + *userp = znp->zp_uid; + *groupp = znp->zp_gid; } else { - uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx); - uidacct(os, B_FALSE, newznp->zp_uid, newused, tx); - } + int hdrsize; - if (oldznp->zp_gid == newznp->zp_gid) { - uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx); - } else { - uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx); - uidacct(os, B_TRUE, newznp->zp_gid, newused, tx); + ASSERT(bonustype == DMU_OT_SA); + hdrsize = sa_hdrsize(data); + + if (hdrsize != 0) { + *userp = *((uint64_t *)((uintptr_t)data + hdrsize + + SA_UID_OFFSET)); + *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + + SA_GID_OFFSET)); + } else { + /* + * This should only happen for newly created + * files that haven't had the znode data filled + * in yet. + */ + *userp = 0; + *groupp = 0; + } } + return (error); } static void fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, char *domainbuf, int buflen, uid_t *ridp) { - extern uint64_t strtonum(const char *str, char **nptr); uint64_t fuid; const char *domain; @@ -811,7 +793,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, } boolean_t -zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) +zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) { char buf[32]; uint64_t used, quota, usedobj, quotaobj; @@ -834,33 +816,57 @@ zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) return (used >= quota); } +boolean_t +zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) +{ + uint64_t fuid; + uint64_t quotaobj; + uid_t id; + + quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; + + id = isgroup ? zp->z_gid : zp->z_uid; + + if (quotaobj == 0 || zfsvfs->z_replay) + return (B_FALSE); + + if (IS_EPHEMERAL(id)) { + VERIFY(0 == sa_lookup(zp->z_sa_hdl, + isgroup ? SA_ZPL_GID(zfsvfs) : SA_ZPL_UID(zfsvfs), + &fuid, sizeof (fuid))); + } else { + fuid = (uint64_t)id; + } + + return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); +} + int -zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) +zfsvfs_create(const char *osname, zfsvfs_t **zfvp) { objset_t *os; zfsvfs_t *zfsvfs; uint64_t zval; int i, error; + uint64_t sa_obj; - if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL)) - return (error); - if (zval) - mode |= DS_MODE_READONLY; + zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); - error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); - if (error == EROFS) { - mode |= DS_MODE_READONLY; - error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); - } - if (error) + /* + * We claim to always be readonly so we can open snapshots; + * other ZPL code will prevent us from writing to snapshots. + */ + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + if (error) { + kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); + } /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ - zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = NULL; zfsvfs->z_parent = zfsvfs; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; @@ -870,15 +876,15 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); if (error) { goto out; - } else if (zfsvfs->z_version > ZPL_VERSION) { - (void) printf("Mismatched versions: File system " - "is version %llu on-disk format, which is " - "incompatible with this software version %lld!", - (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); + } else if (zfsvfs->z_version > + zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { + (void) printf("Can't mount a version %lld file system " + "on a version %lld pool\n. Pool must be upgraded to mount " + "this file system.", (u_longlong_t)zfsvfs->z_version, + (u_longlong_t)spa_version(dmu_objset_spa(os))); error = ENOTSUP; goto out; } - if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) goto out; zfsvfs->z_norm = (int)zval; @@ -900,6 +906,26 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); + zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); + + if (zfsvfs->z_use_sa) { + /* should either have both of these objects or none */ + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, + &sa_obj); + if (error) + return (error); + } else { + /* + * Pre SA versions file systems should never touch + * either the attribute registration or layout objects. + */ + sa_obj = 0; + } + + zfsvfs->z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END); + + if (zfsvfs->z_version >= ZPL_VERSION_SA) + sa_register_update_callback(os, zfs_sa_upgrade); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zfsvfs->z_root); @@ -944,12 +970,12 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); - *zvp = zfsvfs; + *zfvp = zfsvfs; return (0); out: - dmu_objset_close(os); - *zvp = NULL; + dmu_objset_disown(os, zfsvfs); + *zfvp = NULL; kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); } @@ -966,15 +992,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) /* * Set the objset user_ptr to track its zfsvfs. */ - mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); dmu_objset_set_user(zfsvfs->z_os, zfsvfs); - mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); - if (zil_disable) { - zil_destroy(zfsvfs->z_log, 0); - zfsvfs->z_log = NULL; - } /* * If we are not mounting (ie: online recv), then we don't @@ -994,34 +1016,36 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) else zfs_unlinked_drain(zfsvfs); - if (zfsvfs->z_log) { - /* - * Parse and replay the intent log. - * - * Because of ziltest, this must be done after - * zfs_unlinked_drain(). (Further note: ziltest - * doesn't use readonly mounts, where - * zfs_unlinked_drain() isn't called.) This is because - * ziltest causes spa_sync() to think it's committed, - * but actually it is not, so the intent log contains - * many txg's worth of changes. - * - * In particular, if object N is in the unlinked set in - * the last txg to actually sync, then it could be - * actually freed in a later txg and then reallocated - * in a yet later txg. This would write a "create - * object N" record to the intent log. Normally, this - * would be fine because the spa_sync() would have - * written out the fact that object N is free, before - * we could write the "create object N" intent log - * record. - * - * But when we are in ziltest mode, we advance the "open - * txg" without actually spa_sync()-ing the changes to - * disk. So we would see that object N is still - * allocated and in the unlinked set, and there is an - * intent log record saying to allocate it. - */ + /* + * Parse and replay the intent log. + * + * Because of ziltest, this must be done after + * zfs_unlinked_drain(). (Further note: ziltest + * doesn't use readonly mounts, where + * zfs_unlinked_drain() isn't called.) This is because + * ziltest causes spa_sync() to think it's committed, + * but actually it is not, so the intent log contains + * many txg's worth of changes. + * + * In particular, if object N is in the unlinked set in + * the last txg to actually sync, then it could be + * actually freed in a later txg and then reallocated + * in a yet later txg. This would write a "create + * object N" record to the intent log. Normally, this + * would be fine because the spa_sync() would have + * written out the fact that object N is free, before + * we could write the "create object N" intent log + * record. + * + * But when we are in ziltest mode, we advance the "open + * txg" without actually spa_sync()-ing the changes to + * disk. So we would see that object N is still + * allocated and in the unlinked set, and there is an + * intent log record saying to allocate it. + */ + if (zil_replay_disable) { + zil_destroy(zfsvfs->z_log, B_FALSE); + } else { zfsvfs->z_replay = B_TRUE; zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); zfsvfs->z_replay = B_FALSE; @@ -1070,7 +1094,9 @@ zfs_set_fuid_feature(zfsvfs_t *zfsvfs) vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); + vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); } + zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); } static int @@ -1084,7 +1110,7 @@ zfs_domount(vfs_t *vfsp, char *osname) ASSERT(vfsp); ASSERT(osname); - error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs); + error = zfsvfs_create(osname, &zfsvfs); if (error) return (error); zfsvfs->z_vfs = vfsp; @@ -1135,6 +1161,7 @@ zfs_domount(vfs_t *vfsp, char *osname) vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); } + vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); if (dmu_objset_is_snapshot(zfsvfs->z_os)) { uint64_t pval; @@ -1146,9 +1173,9 @@ zfs_domount(vfs_t *vfsp, char *osname) xattr_changed_cb(zfsvfs, pval); zfsvfs->z_issnap = B_TRUE; - mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); dmu_objset_set_user(zfsvfs->z_os, zfsvfs); - mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); } else { error = zfsvfs_setup(zfsvfs, B_TRUE); } @@ -1157,7 +1184,7 @@ zfs_domount(vfs_t *vfsp, char *osname) zfsctl_create(zfsvfs); out: if (error) { - dmu_objset_close(zfsvfs->z_os); + dmu_objset_disown(zfsvfs->z_os, zfsvfs); zfsvfs_free(zfsvfs); } else { atomic_add_32(&zfs_active_fs_count, 1); @@ -1201,9 +1228,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs) VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs) == 0); - VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, - zfsvfs) == 0); - VERIFY(dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs) == 0); @@ -1267,6 +1291,139 @@ zfs_parse_bootfs(char *bpath, char *outpath) return (error); } +/* + * zfs_check_global_label: + * Check that the hex label string is appropriate for the dataset + * being mounted into the global_zone proper. + * + * Return an error if the hex label string is not default or + * admin_low/admin_high. For admin_low labels, the corresponding + * dataset must be readonly. + */ +int +zfs_check_global_label(const char *dsname, const char *hexsl) +{ + if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) + return (0); + if (strcasecmp(hexsl, ADMIN_HIGH) == 0) + return (0); + if (strcasecmp(hexsl, ADMIN_LOW) == 0) { + /* must be readonly */ + uint64_t rdonly; + + if (dsl_prop_get_integer(dsname, + zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) + return (EACCES); + return (rdonly ? 0 : EACCES); + } + return (EACCES); +} + +/* + * zfs_mount_label_policy: + * Determine whether the mount is allowed according to MAC check. + * by comparing (where appropriate) label of the dataset against + * the label of the zone being mounted into. If the dataset has + * no label, create one. + * + * Returns: + * 0 : access allowed + * >0 : error code, such as EACCES + */ +static int +zfs_mount_label_policy(vfs_t *vfsp, char *osname) +{ + int error, retv; + zone_t *mntzone = NULL; + ts_label_t *mnt_tsl; + bslabel_t *mnt_sl; + bslabel_t ds_sl; + char ds_hexsl[MAXNAMELEN]; + + retv = EACCES; /* assume the worst */ + + /* + * Start by getting the dataset label if it exists. + */ + error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), + 1, sizeof (ds_hexsl), &ds_hexsl, NULL); + if (error) + return (EACCES); + + /* + * If labeling is NOT enabled, then disallow the mount of datasets + * which have a non-default label already. No other label checks + * are needed. + */ + if (!is_system_labeled()) { + if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) + return (0); + return (EACCES); + } + + /* + * Get the label of the mountpoint. If mounting into the global + * zone (i.e. mountpoint is not within an active zone and the + * zoned property is off), the label must be default or + * admin_low/admin_high only; no other checks are needed. + */ + mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); + if (mntzone->zone_id == GLOBAL_ZONEID) { + uint64_t zoned; + + zone_rele(mntzone); + + if (dsl_prop_get_integer(osname, + zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) + return (EACCES); + if (!zoned) + return (zfs_check_global_label(osname, ds_hexsl)); + else + /* + * This is the case of a zone dataset being mounted + * initially, before the zone has been fully created; + * allow this mount into global zone. + */ + return (0); + } + + mnt_tsl = mntzone->zone_slabel; + ASSERT(mnt_tsl != NULL); + label_hold(mnt_tsl); + mnt_sl = label2bslabel(mnt_tsl); + + if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { + /* + * The dataset doesn't have a real label, so fabricate one. + */ + char *str = NULL; + + if (l_to_str_internal(mnt_sl, &str) == 0 && + dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), + ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) + retv = 0; + if (str != NULL) + kmem_free(str, strlen(str) + 1); + } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { + /* + * Now compare labels to complete the MAC check. If the + * labels are equal then allow access. If the mountpoint + * label dominates the dataset label, allow readonly access. + * Otherwise, access is denied. + */ + if (blequal(mnt_sl, &ds_sl)) + retv = 0; + else if (bldominates(mnt_sl, &ds_sl)) { + vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); + retv = 0; + } + } + + label_rele(mnt_tsl); + zone_rele(mntzone); + return (retv); +} + static int zfs_mountroot(vfs_t *vfsp, enum whymountroot why) { @@ -1419,8 +1576,7 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) */ error = secpolicy_fs_mount(cr, mvp, vfsp); if (error) { - error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); - if (error == 0) { + if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) { vattr_t vattr; /* @@ -1430,16 +1586,14 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) vattr.va_mask = AT_UID; - if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { + if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { goto out; } if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 && VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) { - error = EPERM; goto out; } - secpolicy_fs_mount_clearopts(cr, vfsp); } else { goto out; @@ -1456,6 +1610,10 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) goto out; } + error = zfs_mount_label_policy(vfsp, osname); + if (error) + goto out; + /* * When doing a remount, we simply refresh our temporary properties * according to those options set in the current VFS options. @@ -1617,7 +1775,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) - if (zp->z_dbuf) { + if (zp->z_sa_hdl) { ASSERT(ZTOV(zp)->v_count > 0); zfs_znode_dmu_fini(zp); } @@ -1668,9 +1826,8 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) ret = secpolicy_fs_unmount(cr, vfsp); if (ret) { - ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), - ZFS_DELEG_PERM_MOUNT, cr); - if (ret) + if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), + ZFS_DELEG_PERM_MOUNT, cr)) return (ret); } @@ -1725,14 +1882,14 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) /* * Unset the objset user_ptr. */ - mutex_enter(&os->os->os_user_ptr_lock); + mutex_enter(&os->os_user_ptr_lock); dmu_objset_set_user(os, NULL); - mutex_exit(&os->os->os_user_ptr_lock); + mutex_exit(&os->os_user_ptr_lock); /* * Finally release the objset */ - dmu_objset_close(os); + dmu_objset_disown(os, zfsvfs); } /* @@ -1813,7 +1970,9 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) ZFS_EXIT(zfsvfs); return (err); } - zp_gen = zp->z_phys->zp_gen & gen_mask; + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, + sizeof (uint64_t)); + zp_gen = zp_gen & gen_mask; if (zp_gen == 0) zp_gen = 1; if (zp->z_unlinked || zp_gen != fid_gen) { @@ -1835,17 +1994,13 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) * 'z_teardown_inactive_lock' write held. */ int -zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) +zfs_suspend_fs(zfsvfs_t *zfsvfs) { int error; if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) return (error); - - *modep = zfsvfs->z_os->os_mode; - if (name) - dmu_objset_name(zfsvfs->z_os, name); - dmu_objset_close(zfsvfs->z_os); + dmu_objset_disown(zfsvfs->z_os, zfsvfs); return (0); } @@ -1854,18 +2009,30 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) * Reopen zfsvfs_t::z_os and release VOPs. */ int -zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) +zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) { - int err; + int err, err2; ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); - err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); + err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, + &zfsvfs->z_os); if (err) { zfsvfs->z_os = NULL; } else { znode_t *zp; + uint64_t sa_obj = 0; + + err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj); + + if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA) + goto bail; + + + zfsvfs->z_attr_table = sa_setup(zfsvfs->z_os, sa_obj, + zfs_attr_table, ZPL_END); VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); @@ -1884,6 +2051,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) } +bail: /* release the VOPs */ rw_exit(&zfsvfs->z_teardown_inactive_lock); rrw_exit(&zfsvfs->z_teardown_lock, FTAG); @@ -1906,9 +2074,11 @@ zfs_freevfs(vfs_t *vfsp) /* * If this is a snapshot, we have an extra VFS_HOLD on our parent - * from zfs_mount(). Release it here. + * from zfs_mount(). Release it here. If we came through + * zfs_mountroot() instead, we didn't grab an extra hold, so + * skip the VFS_RELE for rootvfs. */ - if (zfsvfs->z_issnap) + if (zfsvfs->z_issnap && (vfsp != rootvfs)) VFS_RELE(zfsvfs->z_parent->z_vfs); zfsvfs_free(zfsvfs); @@ -2000,13 +2170,23 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) if (newvers < zfsvfs->z_version) return (EINVAL); + if (zfs_spa_version_map(newvers) > + spa_version(dmu_objset_spa(zfsvfs->z_os))) + return (ENOTSUP); + tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); + if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { + dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, + ZFS_SA_ATTRS); + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); + } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } + error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &newvers, tx); @@ -2015,9 +2195,24 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) return (error); } - spa_history_internal_log(LOG_DS_UPGRADE, - dmu_objset_spa(os), tx, CRED(), - "oldver=%llu newver=%llu dataset = %llu", + if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { + uint64_t sa_obj; + + ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, + SPA_VERSION_SA); + sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, + DMU_OT_NONE, 0, tx); + + error = zap_add(os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); + ASSERT3U(error, ==, 0); + + VERIFY(0 == sa_set_sa_object(os, sa_obj)); + sa_register_update_callback(os, zfs_sa_upgrade); + } + + spa_history_log_internal(LOG_DS_UPGRADE, + dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", zfsvfs->z_version, newvers, dmu_objset_id(os)); dmu_tx_commit(tx); |