diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/Makefile.in | 2 | ||||
-rw-r--r-- | module/zfs/dmu_objset.c | 35 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 3 | ||||
-rw-r--r-- | module/zfs/zfs_ctldir.c | 984 | ||||
-rw-r--r-- | module/zfs/zfs_dir.c | 11 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 88 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 47 | ||||
-rw-r--r-- | module/zfs/zfs_vnops.c | 10 | ||||
-rw-r--r-- | module/zfs/zfs_znode.c | 14 | ||||
-rw-r--r-- | module/zfs/zpl_ctldir.c | 519 | ||||
-rw-r--r-- | module/zfs/zpl_export.c | 6 | ||||
-rw-r--r-- | module/zfs/zpl_inode.c | 15 | ||||
-rw-r--r-- | module/zfs/zpl_super.c | 24 |
13 files changed, 1675 insertions, 83 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index b303168c8..5ec75a03a 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -64,6 +64,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zap_leaf.o $(MODULE)-objs += @top_srcdir@/module/zfs/zap_micro.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_acl.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_byteswap.o +$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_ctldir.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_debug.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_dir.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_fm.o @@ -83,6 +84,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zio_checksum.o $(MODULE)-objs += @top_srcdir@/module/zfs/zio_compress.o $(MODULE)-objs += @top_srcdir@/module/zfs/zio_inject.o $(MODULE)-objs += @top_srcdir@/module/zfs/zle.o +$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_ctldir.o $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_export.o $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_file.o $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_inode.o diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 0703a9466..1d0b4619f 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1584,6 +1584,41 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name, return (0); } +/* + * Determine the objset id for a given snapshot name. + */ +int +dmu_snapshot_id(objset_t *os, const char *snapname, uint64_t *idp) +{ + dsl_dataset_t *ds = os->os_dsl_dataset; + zap_cursor_t cursor; + zap_attribute_t attr; + int error; + + if (ds->ds_phys->ds_snapnames_zapobj == 0) + return (ENOENT); + + zap_cursor_init(&cursor, ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj); + + error = zap_cursor_move_to_key(&cursor, snapname, MT_EXACT); + if (error) { + zap_cursor_fini(&cursor); + return (error); + } + + error = zap_cursor_retrieve(&cursor, &attr); + if (error) { + zap_cursor_fini(&cursor); + return (error); + } + + *idp = attr.za_first_integer; + zap_cursor_fini(&cursor); + + return (0); +} + int dmu_dir_list_next(objset_t *os, int namelen, char *name, uint64_t *idp, uint64_t *offp) diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 718c3ad52..2deec8cf1 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -2373,8 +2373,7 @@ dsl_snapshot_rename_one(const char *name, void *arg) return (err == ENOENT ? 0 : err); } -/* XXX: Ignore for SPL version until mounting the FS is supported */ -#if defined(_KERNEL) && !defined(HAVE_SPL) +#ifdef _KERNEL /* * For all filesystems undergoing rename, we'll need to unmount it. */ diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c new file mode 100644 index 000000000..01bf52fe9 --- /dev/null +++ b/module/zfs/zfs_ctldir.c @@ -0,0 +1,984 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (C) 2011 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * LLNL-CODE-403049. + * Rewritten for Linux by: + * Rohan Puri <[email protected]> + * Brian Behlendorf <[email protected]> + */ + +/* + * ZFS control directory (a.k.a. ".zfs") + * + * This directory provides a common location for all ZFS meta-objects. + * Currently, this is only the 'snapshot' and 'shares' directory, but this may + * expand in the future. The elements are built dynamically, as the hierarchy + * does not actually exist on disk. + * + * For 'snapshot', we don't want to have all snapshots always mounted, because + * this would take up a huge amount of space in /etc/mnttab. We have three + * types of objects: + * + * ctldir ------> snapshotdir -------> snapshot + * | + * | + * V + * mounted fs + * + * The 'snapshot' node contains just enough information to lookup '..' and act + * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we + * perform an automount of the underlying filesystem and return the + * corresponding inode. + * + * All mounts are handled automatically by an user mode helper which invokes + * the mount mount procedure. Unmounts are handled by allowing the mount + * point to expire so the kernel may automatically unmount it. + * + * The '.zfs', '.zfs/snapshot', and all directories created under + * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same + * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under). + * + * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths + * (ie: snapshots) are complete ZFS filesystems and have their own unique + * zfs_sb_t. However, the fsid reported by these mounts will be the same + * as that used by the parent zfs_sb_t to make NFS happy. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/pathname.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> +#include <sys/zfs_ctldir.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_vfsops.h> +#include <sys/zfs_vnops.h> +#include <sys/stat.h> +#include <sys/dmu.h> +#include <sys/dsl_deleg.h> +#include <sys/mount.h> +#include <sys/zpl.h> +#include "zfs_namecheck.h" + +/* + * Control Directory Tunables (.zfs) + */ +int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; + +static zfs_snapentry_t * +zfsctl_sep_alloc(void) +{ + return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); +} + +void +zfsctl_sep_free(zfs_snapentry_t *sep) +{ + kmem_free(sep->se_name, MAXNAMELEN); + kmem_free(sep->se_path, PATH_MAX); + kmem_free(sep, sizeof (zfs_snapentry_t)); +} + +/* + * Attempt to expire an automounted snapshot, unmounts are attempted every + * 'zfs_expire_snapshot' seconds until they succeed. The work request is + * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t. + */ +static void +zfsctl_expire_snapshot(void *data) +{ + zfs_snapentry_t *sep; + zfs_sb_t *zsb; + int error; + + sep = spl_get_work_data(data, zfs_snapentry_t, se_work.work); + zsb = ITOZSB(sep->se_inode); + + error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE); + if (error == EBUSY) + schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ); +} + +int +snapentry_compare(const void *a, const void *b) +{ + const zfs_snapentry_t *sa = a; + const zfs_snapentry_t *sb = b; + int ret = strcmp(sa->se_name, sb->se_name); + + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +boolean_t +zfsctl_is_node(struct inode *ip) +{ + return (ITOZ(ip)->z_is_ctldir); +} + +boolean_t +zfsctl_is_snapdir(struct inode *ip) +{ + return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); +} + +/* + * Allocate a new inode with the passed id and ops. + */ +static struct inode * +zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id, + const struct file_operations *fops, const struct inode_operations *ops) +{ + struct timespec now = current_fs_time(zsb->z_sb); + struct inode *ip; + znode_t *zp; + + ip = new_inode(zsb->z_sb); + if (ip == NULL) + return (NULL); + + zp = ITOZ(ip); + ASSERT3P(zp->z_dirlocks, ==, NULL); + ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT3P(zp->z_xattr_cached, ==, NULL); + zp->z_id = id; + zp->z_unlinked = 0; + zp->z_atime_dirty = 0; + zp->z_zn_prefetch = 0; + zp->z_moved = 0; + zp->z_sa_hdl = NULL; + zp->z_blksz = 0; + zp->z_seq = 0; + zp->z_mapcnt = 0; + zp->z_gen = 0; + zp->z_size = 0; + zp->z_atime[0] = 0; + zp->z_atime[1] = 0; + zp->z_links = 0; + zp->z_pflags = 0; + zp->z_uid = 0; + zp->z_gid = 0; + zp->z_mode = 0; + zp->z_sync_cnt = 0; + zp->z_is_zvol = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_TRUE; + zp->z_is_sa = B_FALSE; + ip->i_ino = id; + ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO); + ip->i_uid = 0; + ip->i_gid = 0; + ip->i_blkbits = SPA_MINBLOCKSHIFT; + ip->i_atime = now; + ip->i_mtime = now; + ip->i_ctime = now; + ip->i_fop = fops; + ip->i_op = ops; + + if (insert_inode_locked(ip)) { + unlock_new_inode(ip); + iput(ip); + return (NULL); + } + + mutex_enter(&zsb->z_znodes_lock); + list_insert_tail(&zsb->z_all_znodes, zp); + membar_producer(); + mutex_exit(&zsb->z_znodes_lock); + + unlock_new_inode(ip); + + return (ip); +} + +/* + * Lookup the inode with given id, it will be allocated if needed. + */ +static struct inode * +zfsctl_inode_lookup(zfs_sb_t *zsb, unsigned long id, + const struct file_operations *fops, const struct inode_operations *ops) +{ + struct inode *ip = NULL; + + while (ip == NULL) { + ip = ilookup(zsb->z_sb, id); + if (ip) + break; + + /* May fail due to concurrent zfsctl_inode_alloc() */ + ip = zfsctl_inode_alloc(zsb, id, fops, ops); + } + + return (ip); +} + +/* + * Free zfsctl inode specific structures, currently there are none. + */ +void +zfsctl_inode_destroy(struct inode *ip) +{ + return; +} + +/* + * An inode is being evicted from the cache. + */ +void +zfsctl_inode_inactive(struct inode *ip) +{ + if (zfsctl_is_snapdir(ip)) + zfsctl_snapdir_inactive(ip); +} + +/* + * Create the '.zfs' directory. This directory is cached as part of the VFS + * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount() + * therefore checks against a vfs_count of 2 instead of 1. This reference + * is removed when the ctldir is destroyed in the unmount. All other entities + * under the '.zfs' directory are created dynamically as needed. + */ +int +zfsctl_create(zfs_sb_t *zsb) +{ + ASSERT(zsb->z_ctldir == NULL); + + zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT, + &zpl_fops_root, &zpl_ops_root); + if (zsb->z_ctldir == NULL) + return (ENOENT); + + return (0); +} + +/* + * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. + */ +void +zfsctl_destroy(zfs_sb_t *zsb) +{ + iput(zsb->z_ctldir); + zsb->z_ctldir = NULL; +} + +/* + * Given a root znode, retrieve the associated .zfs directory. + * Add a hold to the vnode and return it. + */ +struct inode * +zfsctl_root(znode_t *zp) +{ + ASSERT(zfs_has_ctldir(zp)); + igrab(ZTOZSB(zp)->z_ctldir); + return (ZTOZSB(zp)->z_ctldir); +} + +/*ARGSUSED*/ +int +zfsctl_fid(struct inode *ip, fid_t *fidp) +{ + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); + uint64_t object = zp->z_id; + zfid_short_t *zfid; + int i; + + ZFS_ENTER(zsb); + + if (fidp->fid_len < SHORT_FID_LEN) { + fidp->fid_len = SHORT_FID_LEN; + ZFS_EXIT(zsb); + return (ENOSPC); + } + + zfid = (zfid_short_t *)fidp; + + zfid->zf_len = SHORT_FID_LEN; + + for (i = 0; i < sizeof (zfid->zf_object); i++) + zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); + + /* .zfs znodes always have a generation number of 0 */ + for (i = 0; i < sizeof (zfid->zf_gen); i++) + zfid->zf_gen[i] = 0; + + ZFS_EXIT(zsb); + return (0); +} + +static int +zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname) +{ + objset_t *os = ITOZSB(ip)->z_os; + + if (snapshot_namecheck(name, NULL, NULL) != 0) + return (EILSEQ); + + dmu_objset_name(os, zname); + if ((strlen(zname) + 1 + strlen(name)) >= len) + return (ENAMETOOLONG); + + (void) strcat(zname, "@"); + (void) strcat(zname, name); + + return (0); +} + +static int +zfsctl_snapshot_zpath(struct path *path, int len, char *zpath) +{ + char *path_buffer, *path_ptr; + int path_len, error = 0; + + path_buffer = kmem_alloc(len, KM_SLEEP); + + path_ptr = d_path(path, path_buffer, len); + if (IS_ERR(path_ptr)) { + error = -PTR_ERR(path_ptr); + goto out; + } + + path_len = path_buffer + len - 1 - path_ptr; + if (path_len > len) { + error = EFAULT; + goto out; + } + + memcpy(zpath, path_ptr, path_len); + zpath[path_len] = '\0'; +out: + kmem_free(path_buffer, len); + + return (error); +} + +/* + * Special case the handling of "..". + */ +/* ARGSUSED */ +int +zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp, + int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) +{ + zfs_sb_t *zsb = ITOZSB(dip); + int error = 0; + + ZFS_ENTER(zsb); + + if (strcmp(name, "..") == 0) { + *ipp = dip->i_sb->s_root->d_inode; + } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { + *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR, + &zpl_fops_snapdir, &zpl_ops_snapdir); + } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { + *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES, + &zpl_fops_shares, &zpl_ops_shares); + } else { + *ipp = NULL; + } + + if (*ipp == NULL) + error = ENOENT; + + ZFS_EXIT(zsb); + + return (error); +} + +/* + * Lookup entry point for the 'snapshot' directory. Try to open the + * snapshot if it exist, creating the pseudo filesystem inode as necessary. + * Perform a mount of the associated dataset on top of the inode. + */ +/* ARGSUSED */ +int +zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp, + int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) +{ + zfs_sb_t *zsb = ITOZSB(dip); + uint64_t id; + int error; + + ZFS_ENTER(zsb); + + error = dmu_snapshot_id(zsb->z_os, name, &id); + if (error) { + ZFS_EXIT(zsb); + return (error); + } + + *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id, + &simple_dir_operations, &simple_dir_inode_operations); + if (*ipp) { +#ifdef HAVE_AUTOMOUNT + (*ipp)->i_flags |= S_AUTOMOUNT; +#endif /* HAVE_AUTOMOUNT */ + } else { + error = ENOENT; + } + + ZFS_EXIT(zsb); + + return (error); +} + +static void +zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name) +{ + avl_index_t where; + + ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock)); + ASSERT(sep != NULL); + + /* + * Change the name in the AVL tree. + */ + avl_remove(&zsb->z_ctldir_snaps, sep); + (void) strcpy(sep->se_name, name); + VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL); + avl_insert(&zsb->z_ctldir_snaps, sep, where); +} + +/* + * Renaming a directory under '.zfs/snapshot' will automatically trigger + * a rename of the snapshot to the new given name. The rename is confined + * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. + */ +/*ARGSUSED*/ +int +zfsctl_snapdir_rename(struct inode *sdip, char *sname, + struct inode *tdip, char *tname, cred_t *cr, int flags) +{ + zfs_sb_t *zsb = ITOZSB(sdip); + zfs_snapentry_t search, *sep; + avl_index_t where; + char *to, *from, *real; + int error; + + ZFS_ENTER(zsb); + + to = kmem_alloc(MAXNAMELEN, KM_SLEEP); + from = kmem_alloc(MAXNAMELEN, KM_SLEEP); + real = kmem_alloc(MAXNAMELEN, KM_SLEEP); + + if (zsb->z_case == ZFS_CASE_INSENSITIVE) { + error = dmu_snapshot_realname(zsb->z_os, sname, real, + MAXNAMELEN, NULL); + if (error == 0) { + sname = real; + } else if (error != ENOTSUP) { + goto out; + } + } + + error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from); + if (!error) + error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to); + if (!error) + error = zfs_secpolicy_rename_perms(from, to, cr); + if (error) + goto out; + + /* + * Cannot move snapshots out of the snapdir. + */ + if (sdip != tdip) { + error = EINVAL; + goto out; + } + + /* + * No-op when names are identical. + */ + if (strcmp(sname, tname) == 0) { + error = 0; + goto out; + } + + mutex_enter(&zsb->z_ctldir_lock); + + error = dmu_objset_rename(from, to, B_FALSE); + if (error) + goto out_unlock; + + search.se_name = (char *)sname; + sep = avl_find(&zsb->z_ctldir_snaps, &search, &where); + if (sep) + zfsctl_rename_snap(zsb, sep, tname); + +out_unlock: + mutex_exit(&zsb->z_ctldir_lock); +out: + kmem_free(from, MAXNAMELEN); + kmem_free(to, MAXNAMELEN); + kmem_free(real, MAXNAMELEN); + + ZFS_EXIT(zsb); + + return (error); +} + +/* + * Removing a directory under '.zfs/snapshot' will automatically trigger + * the removal of the snapshot with the given name. + */ +/* ARGSUSED */ +int +zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) +{ + zfs_sb_t *zsb = ITOZSB(dip); + char *snapname, *real; + int error; + + ZFS_ENTER(zsb); + + snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); + real = kmem_alloc(MAXNAMELEN, KM_SLEEP); + + if (zsb->z_case == ZFS_CASE_INSENSITIVE) { + error = dmu_snapshot_realname(zsb->z_os, name, real, + MAXNAMELEN, NULL); + if (error == 0) { + name = real; + } else if (error != ENOTSUP) { + goto out; + } + } + + error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname); + if (!error) + error = zfs_secpolicy_destroy_perms(snapname, cr); + if (error) + goto out; + + error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE); + if ((error == 0) || (error == ENOENT)) + error = dmu_objset_destroy(snapname, B_FALSE); +out: + kmem_free(snapname, MAXNAMELEN); + kmem_free(real, MAXNAMELEN); + + ZFS_EXIT(zsb); + + return (error); +} + +/* + * Creating a directory under '.zfs/snapshot' will automatically trigger + * the creation of a new snapshot with the given name. + */ +/* ARGSUSED */ +int +zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, + struct inode **ipp, cred_t *cr, int flags) +{ + zfs_sb_t *zsb = ITOZSB(dip); + char *dsname; + int error; + + dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); + + if (snapshot_namecheck(dirname, NULL, NULL) != 0) { + error = EILSEQ; + goto out; + } + + dmu_objset_name(zsb->z_os, dsname); + + error = zfs_secpolicy_snapshot_perms(dsname, cr); + if (error) + goto out; + + if (error == 0) { + error = dmu_objset_snapshot(dsname, dirname, + NULL, NULL, B_FALSE, B_FALSE, -1); + if (error) + goto out; + + error = zfsctl_snapdir_lookup(dip, dirname, ipp, + 0, cr, NULL, NULL); + } +out: + kmem_free(dsname, MAXNAMELEN); + + return (error); +} + +/* + * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed + * from the snapshot list. This will normally happen as part of the auto + * unmount, however in the case of a manual snapshot unmount this will be + * the only notification we receive. + */ +void +zfsctl_snapdir_inactive(struct inode *ip) +{ + zfs_sb_t *zsb = ITOZSB(ip); + zfs_snapentry_t *sep, *next; + + mutex_enter(&zsb->z_ctldir_lock); + + sep = avl_first(&zsb->z_ctldir_snaps); + while (sep != NULL) { + next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); + + if (sep->se_inode == ip) { + avl_remove(&zsb->z_ctldir_snaps, sep); + cancel_delayed_work_sync(&sep->se_work); + zfsctl_sep_free(sep); + break; + } + sep = next; + } + + mutex_exit(&zsb->z_ctldir_lock); +} + +/* + * Attempt to unmount a snapshot by making a call to user space. + * There is no assurance that this can or will succeed, is just a + * best effort. In the case where it does fail, perhaps because + * it's in use, the unmount will fail harmlessly. + */ +#define SET_UNMOUNT_CMD \ + "exec 0</dev/null " \ + " 1>/dev/null " \ + " 2>/dev/null; " \ + "umount -t zfs -n %s%s" + +static int +__zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags) +{ + char *argv[] = { "/bin/sh", "-c", NULL, NULL }; + char *envp[] = { NULL }; + int error; + + argv[2] = kmem_asprintf(SET_UNMOUNT_CMD, + flags & MNT_FORCE ? "-f " : "", sep->se_path); + error = call_usermodehelper(argv[0], argv, envp, 1); + strfree(argv[2]); + + /* + * The umount system utility will return 256 on error. We must + * assume this error is because the file system is busy so it is + * converted to the more sensible EBUSY. + */ + if (error) + error = EBUSY; + + /* + * This was the result of a manual unmount, cancel the delayed work + * to prevent zfsctl_expire_snapshot() from attempting a unmount. + */ + if ((error == 0) && !(flags & MNT_EXPIRE)) + cancel_delayed_work(&sep->se_work); + + return (error); +} + +int +zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags) +{ + zfs_snapentry_t search; + zfs_snapentry_t *sep; + int error = 0; + + mutex_enter(&zsb->z_ctldir_lock); + + search.se_name = name; + sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); + if (sep) { + avl_remove(&zsb->z_ctldir_snaps, sep); + error = __zfsctl_unmount_snapshot(sep, flags); + if (error == EBUSY) + avl_add(&zsb->z_ctldir_snaps, sep); + else + zfsctl_sep_free(sep); + } else { + error = ENOENT; + } + + mutex_exit(&zsb->z_ctldir_lock); + ASSERT3S(error, >=, 0); + + return (error); +} + +/* + * Traverse all mounted snapshots and attempt to unmount them. This + * is best effort, on failure EEXIST is returned and count will be set + * to the number of file snapshots which could not be unmounted. + */ +int +zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count) +{ + zfs_snapentry_t *sep, *next; + int error = 0; + + *count = 0; + + ASSERT(zsb->z_ctldir != NULL); + mutex_enter(&zsb->z_ctldir_lock); + + sep = avl_first(&zsb->z_ctldir_snaps); + while (sep != NULL) { + next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); + avl_remove(&zsb->z_ctldir_snaps, sep); + error = __zfsctl_unmount_snapshot(sep, flags); + if (error == EBUSY) { + avl_add(&zsb->z_ctldir_snaps, sep); + (*count)++; + } else { + zfsctl_sep_free(sep); + } + + sep = next; + } + + mutex_exit(&zsb->z_ctldir_lock); + + return ((*count > 0) ? EEXIST : 0); +} + +#define SET_MOUNT_CMD \ + "exec 0</dev/null " \ + " 1>/dev/null " \ + " 2>/dev/null; " \ + "mount -t zfs -n %s %s" + +int +zfsctl_mount_snapshot(struct path *path, int flags) +{ + struct dentry *dentry = path->dentry; + struct inode *ip = dentry->d_inode; + zfs_sb_t *zsb = ITOZSB(ip); + char *full_name, *full_path; + zfs_snapentry_t *sep; + zfs_snapentry_t search; + char *argv[] = { "/bin/sh", "-c", NULL, NULL }; + char *envp[] = { NULL }; + int error; + + ZFS_ENTER(zsb); + + full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP); + full_path = kmem_zalloc(PATH_MAX, KM_SLEEP); + + error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name); + if (error) + goto error; + + error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path); + if (error) + goto error; + + /* + * Attempt to mount the snapshot from user space. Normally this + * would be done using the vfs_kern_mount() function, however that + * function is marked GPL-only and cannot be used. On error we + * careful to log the real error to the console and return EISDIR + * to safely abort the automount. This should be very rare. + */ + argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path); + error = call_usermodehelper(argv[0], argv, envp, 1); + strfree(argv[2]); + if (error) { + printk("ZFS: Unable to automount %s at %s: %d\n", + full_name, full_path, error); + error = EISDIR; + goto error; + } + + mutex_enter(&zsb->z_ctldir_lock); + + /* + * Ensure a previous entry does not exist, if it does safely remove + * it any cancel the outstanding expiration. This can occur when a + * snapshot is manually unmounted and then an automount is triggered. + */ + search.se_name = full_name; + sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); + if (sep) { + avl_remove(&zsb->z_ctldir_snaps, sep); + cancel_delayed_work_sync(&sep->se_work); + zfsctl_sep_free(sep); + } + + sep = zfsctl_sep_alloc(); + sep->se_name = full_name; + sep->se_path = full_path; + sep->se_inode = ip; + avl_add(&zsb->z_ctldir_snaps, sep); + + spl_init_delayed_work(&sep->se_work, zfsctl_expire_snapshot, sep); + schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ); + + mutex_exit(&zsb->z_ctldir_lock); +error: + if (error) { + kmem_free(full_name, MAXNAMELEN); + kmem_free(full_path, PATH_MAX); + } + + ZFS_EXIT(zsb); + + return (error); +} + +/* + * Check if this super block has a matching objset id. + */ +static int +zfsctl_test_super(struct super_block *sb, void *objsetidp) +{ + zfs_sb_t *zsb = sb->s_fs_info; + uint64_t objsetid = *(uint64_t *)objsetidp; + + return (dmu_objset_id(zsb->z_os) == objsetid); +} + +/* + * Prevent a new super block from being allocated if an existing one + * could not be located. We only want to preform a lookup operation. + */ +static int +zfsctl_set_super(struct super_block *sb, void *objsetidp) +{ + return (-EEXIST); +} + +int +zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp) +{ + zfs_sb_t *zsb = sb->s_fs_info; + struct super_block *sbp; + zfs_snapentry_t *sep; + uint64_t id; + int error; + + ASSERT(zsb->z_ctldir != NULL); + + mutex_enter(&zsb->z_ctldir_lock); + + /* + * Verify that the snapshot is mounted. + */ + sep = avl_first(&zsb->z_ctldir_snaps); + while (sep != NULL) { + error = dmu_snapshot_id(zsb->z_os, sep->se_name, &id); + if (error) + goto out; + + if (id == objsetid) + break; + + sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep); + } + + if (sep != NULL) { + /* + * Lookup the mounted root rather than the covered mount + * point. This may fail if the snapshot has just been + * unmounted by an unrelated user space process. This + * race cannot occur to an expired mount point because + * we hold the zsb->z_ctldir_lock to prevent the race. + */ + sbp = sget(&zpl_fs_type, zfsctl_test_super, + zfsctl_set_super, &id); + if (IS_ERR(sbp)) { + error = -PTR_ERR(sbp); + } else { + *zsbp = sbp->s_fs_info; + deactivate_super(sbp); + } + } else { + error = EINVAL; + } +out: + mutex_exit(&zsb->z_ctldir_lock); + ASSERT3S(error, >=, 0); + + return (error); +} + +/* ARGSUSED */ +int +zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, + int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) +{ + zfs_sb_t *zsb = ITOZSB(dip); + struct inode *ip; + znode_t *dzp; + int error; + + ZFS_ENTER(zsb); + + if (zsb->z_shares_dir == 0) { + ZFS_EXIT(zsb); + return (-ENOTSUP); + } + + error = zfs_zget(zsb, zsb->z_shares_dir, &dzp); + if (error) { + ZFS_EXIT(zsb); + return (error); + } + + error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL); + + iput(ZTOI(dzp)); + ZFS_EXIT(zsb); + + return (error); +} + + +/* + * Initialize the various pieces we'll need to create and manipulate .zfs + * directories. Currently this is unused but available. + */ +void +zfsctl_init(void) +{ +} + +/* + * Cleanup the various pieces we needed for .zfs directories. In particular + * ensure the expiry timer is canceled safely. + */ +void +zfsctl_fini(void) +{ +} + +module_param(zfs_expire_snapshot, int, 0644); +MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 8f1a0c2cc..6cd9c8508 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -50,6 +50,7 @@ #include <sys/zap.h> #include <sys/dmu.h> #include <sys/atomic.h> +#include <sys/zfs_ctldir.h> #include <sys/zfs_fuid.h> #include <sys/sa.h> #include <sys/zfs_sa.h> @@ -415,28 +416,24 @@ zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags, /* * If we are a snapshot mounted under .zfs, return - * the vp for the snapshot directory. + * the inode pointer for the snapshot directory. */ if ((error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0) return (error); -#ifdef HAVE_SNAPSHOT + if (parent == dzp->z_id && zsb->z_parent != zsb) { error = zfsctl_root_lookup(zsb->z_parent->z_ctldir, - "snapshot", ipp, NULL, 0, NULL, kcred, - NULL, NULL, NULL); + "snapshot", ipp, 0, kcred, NULL, NULL); return (error); } -#endif /* HAVE_SNAPSHOT */ rw_enter(&dzp->z_parent_lock, RW_READER); error = zfs_zget(zsb, parent, &zp); if (error == 0) *ipp = ZTOI(zp); rw_exit(&dzp->z_parent_lock); -#ifdef HAVE_SNAPSHOT } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { *ipp = zfsctl_root(dzp); -#endif /* HAVE_SNAPSHOT */ } else { int zf; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 532f17aa1..d2ad1af71 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -58,6 +58,7 @@ #include <sys/mount.h> #include <sys/sdt.h> #include <sys/fs/zfs.h> +#include <sys/zfs_ctldir.h> #include <sys/zfs_dir.h> #include <sys/zfs_onexit.h> #include <sys/zvol.h> @@ -2690,33 +2691,6 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc) return (error); } -#ifdef HAVE_SNAPSHOT -/* - * Search the vfs list for a specified resource. Returns a pointer to it - * or NULL if no suitable entry is found. The caller of this routine - * is responsible for releasing the returned vfs pointer. - */ -static vfs_t * -zfs_get_vfs(const char *resource) -{ - struct vfs *vfsp; - struct vfs *vfs_found = NULL; - - vfs_list_read_lock(); - vfsp = rootvfs; - do { - if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) { - mntget(vfsp); - vfs_found = vfsp; - break; - } - vfsp = vfsp->vfs_next; - } while (vfsp != rootvfs); - vfs_list_unlock(); - return (vfs_found); -} -#endif /* HAVE_SNAPSHOT */ - /* ARGSUSED */ static void zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) @@ -3067,38 +3041,52 @@ out: return (error); } +/* + * inputs: + * name dataset name, or when 'arg == NULL' the full snapshot name + * arg short snapshot name (i.e. part after the '@') + */ int zfs_unmount_snap(const char *name, void *arg) { -#ifdef HAVE_SNAPSHOT - vfs_t *vfsp = NULL; + zfs_sb_t *zsb = NULL; + char *dsname; + char *snapname; + char *fullname; + char *ptr; + int error; if (arg) { - char *snapname = arg; - char *fullname = kmem_asprintf("%s@%s", name, snapname); - vfsp = zfs_get_vfs(fullname); - strfree(fullname); - } else if (strchr(name, '@')) { - vfsp = zfs_get_vfs(name); + dsname = strdup(name); + snapname = strdup(arg); + } else { + ptr = strchr(name, '@'); + if (ptr) { + dsname = strdup(name); + dsname[ptr - name] = '\0'; + snapname = strdup(ptr + 1); + } else { + return (0); + } } - if (vfsp) { - /* - * Always force the unmount for snapshots. - */ - int flag = MS_FORCE; - int err; + fullname = kmem_asprintf("%s@%s", dsname, snapname); - if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { - mntput(vfsp); - return (err); - } - mntput(vfsp); - if ((err = dounmount(vfsp, flag, kcred)) != 0) - return (err); + error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); + if (error == 0) { + error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); + zfs_sb_rele(zsb, FTAG); + + /* Allow ENOENT for consistency with upstream */ + if (error == ENOENT) + error = 0; } -#endif /* HAVE_SNAPSHOT */ - return (0); + + strfree(dsname); + strfree(snapname); + strfree(fullname); + + return (error); } /* diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index fb319a547..8f1c713c0 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -56,6 +56,7 @@ #include <sys/modctl.h> #include <sys/refstr.h> #include <sys/zfs_ioctl.h> +#include <sys/zfs_ctldir.h> #include <sys/zfs_fuid.h> #include <sys/bootconf.h> #include <sys/sunddi.h> @@ -710,6 +711,10 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); + avl_create(&zsb->z_ctldir_snaps, snapentry_compare, + sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); + mutex_init(&zsb->z_ctldir_lock, NULL, MUTEX_DEFAULT, NULL); + *zsbp = zsb; return (0); @@ -819,6 +824,8 @@ zfs_sb_free(zfs_sb_t *zsb) rw_destroy(&zsb->z_fuid_lock); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zsb->z_hold_mtx[i]); + mutex_destroy(&zsb->z_ctldir_lock); + avl_destroy(&zsb->z_ctldir_snaps); kmem_free(zsb, sizeof (zfs_sb_t)); } EXPORT_SYMBOL(zfs_sb_free); @@ -1183,9 +1190,6 @@ zfs_domount(struct super_block *sb, void *data, int silent) mutex_exit(&zsb->z_os->os_user_ptr_lock); } else { error = zfs_sb_setup(zsb, B_TRUE); -#ifdef HAVE_SNAPSHOT - (void) zfs_snap_create(zsb); -#endif /* HAVE_SNAPSHOT */ } /* Allocate a root inode for the filesystem. */ @@ -1202,6 +1206,9 @@ zfs_domount(struct super_block *sb, void *data, int silent) error = ENOMEM; goto out; } + + if (!zsb->z_issnap) + zfsctl_create(zsb); out: if (error) { dmu_objset_disown(zsb->z_os, zsb); @@ -1212,6 +1219,27 @@ out: } EXPORT_SYMBOL(zfs_domount); +/* + * Called when an unmount is requested and certain sanity checks have + * already passed. At this point no dentries or inodes have been reclaimed + * from their respective caches. We drop the extra reference on the .zfs + * control directory to allow everything to be reclaimed. All snapshots + * must already have been unmounted to reach this point. + */ +void +zfs_preumount(struct super_block *sb) +{ + zfs_sb_t *zsb = sb->s_fs_info; + + if (zsb->z_ctldir != NULL) + zfsctl_destroy(zsb); +} +EXPORT_SYMBOL(zfs_preumount); + +/* + * Called once all other unmount released tear down has occurred. + * It is our responsibility to release any remaining infrastructure. + */ /*ARGSUSED*/ int zfs_umount(struct super_block *sb) @@ -1288,11 +1316,10 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) ZFS_EXIT(zsb); -#ifdef HAVE_SNAPSHOT - err = zfsctl_lookup_objset(vfsp, objsetid, &zsb); + err = zfsctl_lookup_objset(sb, objsetid, &zsb); if (err) return (EINVAL); -#endif /* HAVE_SNAPSHOT */ + ZFS_ENTER(zsb); } @@ -1309,22 +1336,20 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) return (EINVAL); } -#ifdef HAVE_SNAPSHOT /* A zero fid_gen means we are in the .zfs control directories */ if (fid_gen == 0 && (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { *ipp = zsb->z_ctldir; ASSERT(*ipp != NULL); if (object == ZFSCTL_INO_SNAPDIR) { - VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, NULL, - 0, NULL, NULL, NULL, NULL, NULL) == 0); + VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, + 0, kcred, NULL, NULL) == 0); } else { igrab(*ipp); } ZFS_EXIT(zsb); return (0); } -#endif /* HAVE_SNAPSHOT */ gen_mask = -1ULL >> (64 - 8 * i); @@ -1550,6 +1575,7 @@ EXPORT_SYMBOL(zfs_get_zplprop); void zfs_init(void) { + zfsctl_init(); zfs_znode_init(); dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); register_filesystem(&zpl_fs_type); @@ -1561,4 +1587,5 @@ zfs_fini(void) { unregister_filesystem(&zpl_fs_type); zfs_znode_fini(); + zfsctl_fini(); } diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 74b96b8d7..2da5fec86 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -63,6 +63,7 @@ #include <sys/sid.h> #include <sys/mode.h> #include "fs/fs_subr.h" +#include <sys/zfs_ctldir.h> #include <sys/zfs_fuid.h> #include <sys/zfs_sa.h> #include <sys/zfs_vnops.h> @@ -2045,7 +2046,7 @@ zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir, dmu_prefetch(os, objnum, 0, 0); } - if (*pos >= 2) { + if (*pos > 2 || (*pos == 2 && !zfs_show_ctldir(zp))) { zap_cursor_advance(&zc); *pos = zap_cursor_serialize(&zc); } else { @@ -3876,9 +3877,10 @@ zfs_inactive(struct inode *ip) zfs_sb_t *zsb = ITOZSB(ip); int error; -#ifdef HAVE_SNAPSHOT - /* Early return for snapshot inode? */ -#endif /* HAVE_SNAPSHOT */ + if (zfsctl_is_node(ip)) { + zfsctl_inode_inactive(ip); + return; + } rw_enter(&zsb->z_teardown_inactive_lock, RW_READER); if (zp->z_sa_hdl == NULL) { diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 709ae74f8..3a6872f3e 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -52,6 +52,7 @@ #include <sys/zfs_rlock.h> #include <sys/zfs_fuid.h> #include <sys/zfs_vnops.h> +#include <sys/zfs_ctldir.h> #include <sys/dnode.h> #include <sys/fs/zfs.h> #include <sys/kidmap.h> @@ -267,6 +268,9 @@ zfs_inode_destroy(struct inode *ip) znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ZTOZSB(zp); + if (zfsctl_is_node(ip)) + zfsctl_inode_destroy(ip); + mutex_enter(&zsb->z_znodes_lock); list_remove(&zsb->z_all_znodes, zp); zsb->z_nr_znodes--; @@ -353,6 +357,8 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, zp = ITOZ(ip); ASSERT(zp->z_dirlocks == NULL); + ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_moved = 0; zp->z_sa_hdl = NULL; zp->z_unlinked = 0; @@ -362,7 +368,9 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - zp->z_is_zvol = 0; + zp->z_is_zvol = B_FALSE; + zp->z_is_mapped = B_FALSE; + zp->z_is_ctldir = B_FALSE; zfs_znode_sa_init(zsb, zp, db, obj_type, hdl); @@ -434,6 +442,10 @@ zfs_inode_update(znode_t *zp) zsb = ZTOZSB(zp); ip = ZTOI(zp); + /* Skip .zfs control nodes which do not exist on disk. */ + if (zfsctl_is_node(ip)) + return; + sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16); sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16); sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16); diff --git a/module/zfs/zpl_ctldir.c b/module/zfs/zpl_ctldir.c new file mode 100644 index 000000000..6c742c9e8 --- /dev/null +++ b/module/zfs/zpl_ctldir.c @@ -0,0 +1,519 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (C) 2011 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * LLNL-CODE-403049. + * Rewritten for Linux by: + * Rohan Puri <[email protected]> + * Brian Behlendorf <[email protected]> + */ + +#include <sys/zfs_vfsops.h> +#include <sys/zfs_vnops.h> +#include <sys/zfs_znode.h> +#include <sys/zfs_ctldir.h> +#include <sys/zpl.h> + +/* + * Common open routine. Disallow any write access. + */ +/* ARGSUSED */ +static int +zpl_common_open(struct inode *ip, struct file *filp) +{ + if (filp->f_mode & FMODE_WRITE) + return (-EACCES); + + return generic_file_open(ip, filp); +} + +static int +zpl_common_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *ip = dentry->d_inode; + int error = 0; + + switch (filp->f_pos) { + case 0: + error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR); + if (error) + break; + + filp->f_pos++; + /* fall-thru */ + case 1: + error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR); + if (error) + break; + + filp->f_pos++; + /* fall-thru */ + default: + break; + } + + return (error); +} + +/* + * Get root directory contents. + */ +static int +zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *ip = dentry->d_inode; + zfs_sb_t *zsb = ITOZSB(ip); + int error = 0; + + ZFS_ENTER(zsb); + + switch (filp->f_pos) { + case 0: + error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR); + if (error) + goto out; + + filp->f_pos++; + /* fall-thru */ + case 1: + error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR); + if (error) + goto out; + + filp->f_pos++; + /* fall-thru */ + case 2: + error = filldir(dirent, ZFS_SNAPDIR_NAME, + strlen(ZFS_SNAPDIR_NAME), 2, ZFSCTL_INO_SNAPDIR, DT_DIR); + if (error) + goto out; + + filp->f_pos++; + /* fall-thru */ + case 3: + error = filldir(dirent, ZFS_SHAREDIR_NAME, + strlen(ZFS_SHAREDIR_NAME), 3, ZFSCTL_INO_SHARES, DT_DIR); + if (error) + goto out; + + filp->f_pos++; + /* fall-thru */ + } +out: + ZFS_EXIT(zsb); + + return (error); +} + +/* + * Get root directory attributes. + */ +/* ARGSUSED */ +static int +zpl_root_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + int error; + + error = simple_getattr(mnt, dentry, stat); + stat->atime = CURRENT_TIME; + + return (error); +} + +static struct dentry * +zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) +{ + cred_t *cr = CRED(); + struct inode *ip; + int error; + + crhold(cr); + error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL); + ASSERT3S(error, <=, 0); + crfree(cr); + + if (error) { + if (error == -ENOENT) + return d_splice_alias(NULL, dentry); + else + return ERR_PTR(error); + } + + return d_splice_alias(ip, dentry); +} + +/* + * The '.zfs' control directory file and inode operations. + */ +const struct file_operations zpl_fops_root = { + .open = zpl_common_open, + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = zpl_root_readdir, +}; + +const struct inode_operations zpl_ops_root = { + .lookup = zpl_root_lookup, + .getattr = zpl_root_getattr, +}; + +static struct dentry * +zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, + struct nameidata *nd) +{ + cred_t *cr = CRED(); + struct inode *ip; + int error; + + crhold(cr); + error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip, + 0, cr, NULL, NULL); + ASSERT3S(error, <=, 0); + crfree(cr); + + if (error) { + if (error == -ENOENT) + return d_splice_alias(NULL, dentry); + else + return ERR_PTR(error); + } + + /* + * Auto mounting of snapshots is only supported for 2.6.37 and + * newer kernels. Prior to this kernel the ops->follow_link() + * callback was used as a hack to trigger the mount. The + * resulting vfsmount was then explicitly grafted in to the + * name space. While it might be possible to add compatibility + * code to accomplish this it would require considerable care. + */ +#ifdef HAVE_AUTOMOUNT + dentry->d_op = &zpl_dops_snapdirs; +#endif /* HAVE_AUTOMOUNT */ + + return d_splice_alias(ip, dentry); +} + +/* ARGSUSED */ +static int +zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *dip = dentry->d_inode; + zfs_sb_t *zsb = ITOZSB(dip); + char snapname[MAXNAMELEN]; + uint64_t id, cookie; + boolean_t case_conflict; + int error = 0; + + ZFS_ENTER(zsb); + + cookie = filp->f_pos; + switch (filp->f_pos) { + case 0: + error = filldir(dirent, ".", 1, 0, dip->i_ino, DT_DIR); + if (error) + goto out; + + filp->f_pos++; + /* fall-thru */ + case 1: + error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR); + if (error) + goto out; + + filp->f_pos++; + /* fall-thru */ + default: + while (error == 0) { + error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN, + snapname, &id, &cookie, &case_conflict); + if (error) + goto out; + + error = filldir(dirent, snapname, strlen(snapname), + filp->f_pos, ZFSCTL_INO_SHARES - id, DT_DIR); + if (error) + goto out; + + filp->f_pos = cookie; + } + } +out: + ZFS_EXIT(zsb); + + if (error == -ENOENT) + return (0); + + return (error); +} + +int +zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry, + struct inode *tdip, struct dentry *tdentry) +{ + cred_t *cr = CRED(); + int error; + + crhold(cr); + error = -zfsctl_snapdir_rename(sdip, dname(sdentry), + tdip, dname(tdentry), cr, 0); + ASSERT3S(error, <=, 0); + crfree(cr); + + return (error); +} + +static int +zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry) +{ + cred_t *cr = CRED(); + int error; + + crhold(cr); + error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0); + ASSERT3S(error, <=, 0); + crfree(cr); + + return (error); +} + +static int +zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, int mode) +{ + cred_t *cr = CRED(); + vattr_t *vap; + struct inode *ip; + int error; + + crhold(cr); + vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP); + zpl_vap_init(vap, dip, dentry, mode | S_IFDIR, cr); + + error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0); + if (error == 0) { +#ifdef HAVE_AUTOMOUNT + dentry->d_op = &zpl_dops_snapdirs; +#endif /* HAVE_AUTOMOUNT */ + d_instantiate(dentry, ip); + } + + kmem_free(vap, sizeof(vattr_t)); + ASSERT3S(error, <=, 0); + crfree(cr); + + return (error); +} + +#ifdef HAVE_AUTOMOUNT +static struct vfsmount * +zpl_snapdir_automount(struct path *path) +{ + struct dentry *dentry = path->dentry; + int error; + + /* + * We must briefly disable automounts for this dentry because the + * user space mount utility will trigger another lookup on this + * directory. That will result in zpl_snapdir_automount() being + * called repeatedly. The DCACHE_NEED_AUTOMOUNT flag can be + * safely reset once the mount completes. + */ + dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT; + error = -zfsctl_mount_snapshot(path, 0); + dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; + if (error) + return ERR_PTR(error); + + /* + * Rather than returning the new vfsmount for the snapshot we must + * return NULL to indicate a mount collision. This is done because + * the user space mount calls do_add_mount() which adds the vfsmount + * to the name space. If we returned the new mount here it would be + * added again to the vfsmount list resulting in list corruption. + */ + return (NULL); +} +#endif /* HAVE_AUTOMOUNT */ + +/* + * Get snapshot directory attributes. + */ +/* ARGSUSED */ +static int +zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + zfs_sb_t *zsb = ITOZSB(dentry->d_inode); + int error; + + ZFS_ENTER(zsb); + error = simple_getattr(mnt, dentry, stat); + stat->nlink = stat->size = avl_numnodes(&zsb->z_ctldir_snaps) + 2; + stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os); + stat->atime = CURRENT_TIME; + ZFS_EXIT(zsb); + + return (error); +} + +/* + * The '.zfs/snapshot' directory file operations. These mainly control + * generating the list of available snapshots when doing an 'ls' in the + * directory. See zpl_snapdir_readdir(). + */ +const struct file_operations zpl_fops_snapdir = { + .open = zpl_common_open, + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = zpl_snapdir_readdir, +}; + +/* + * The '.zfs/snapshot' directory inode operations. These mainly control + * creating an inode for a snapshot directory and initializing the needed + * infrastructure to automount the snapshot. See zpl_snapdir_lookup(). + */ +const struct inode_operations zpl_ops_snapdir = { + .lookup = zpl_snapdir_lookup, + .getattr = zpl_snapdir_getattr, + .rename = zpl_snapdir_rename, + .rmdir = zpl_snapdir_rmdir, + .mkdir = zpl_snapdir_mkdir, +}; + +#ifdef HAVE_AUTOMOUNT +const struct dentry_operations zpl_dops_snapdirs = { + .d_automount = zpl_snapdir_automount, +}; +#endif /* HAVE_AUTOMOUNT */ + +static struct dentry * +zpl_shares_lookup(struct inode *dip, struct dentry *dentry, + struct nameidata *nd) +{ + cred_t *cr = CRED(); + struct inode *ip = NULL; + int error; + + crhold(cr); + error = -zfsctl_shares_lookup(dip, dname(dentry), &ip, + 0, cr, NULL, NULL); + ASSERT3S(error, <=, 0); + crfree(cr); + + if (error) { + if (error == -ENOENT) + return d_splice_alias(NULL, dentry); + else + return ERR_PTR(error); + } + + return d_splice_alias(ip, dentry); +} + +/* ARGSUSED */ +static int +zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + cred_t *cr = CRED(); + struct dentry *dentry = filp->f_path.dentry; + struct inode *ip = dentry->d_inode; + zfs_sb_t *zsb = ITOZSB(ip); + znode_t *dzp; + int error; + + ZFS_ENTER(zsb); + + if (zsb->z_shares_dir == 0) { + error = zpl_common_readdir(filp, dirent, filldir); + ZFS_EXIT(zsb); + return (error); + } + + error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp); + if (error) { + ZFS_EXIT(zsb); + return (error); + } + + crhold(cr); + error = -zfs_readdir(ZTOI(dzp), dirent, filldir, &filp->f_pos, cr); + crfree(cr); + + iput(ZTOI(dzp)); + ZFS_EXIT(zsb); + ASSERT3S(error, <=, 0); + + return (error); +} + +/* ARGSUSED */ +static int +zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *ip = dentry->d_inode; + zfs_sb_t *zsb = ITOZSB(ip); + znode_t *dzp; + int error; + + ZFS_ENTER(zsb); + + if (zsb->z_shares_dir == 0) { + error = simple_getattr(mnt, dentry, stat); + stat->nlink = stat->size = 2; + stat->atime = CURRENT_TIME; + ZFS_EXIT(zsb); + return (error); + } + + error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp); + if (error == 0) + error = -zfs_getattr_fast(dentry->d_inode, stat); + + iput(ZTOI(dzp)); + ZFS_EXIT(zsb); + ASSERT3S(error, <=, 0); + + return (error); +} + +/* + * The '.zfs/shares' directory file operations. + */ +const struct file_operations zpl_fops_shares = { + .open = zpl_common_open, + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = zpl_shares_readdir, +}; + +/* + * The '.zfs/shares' directory inode operations. + */ +const struct inode_operations zpl_ops_shares = { + .lookup = zpl_shares_lookup, + .getattr = zpl_shares_getattr, +}; diff --git a/module/zfs/zpl_export.c b/module/zfs/zpl_export.c index 4fe998437..f82ee3088 100644 --- a/module/zfs/zpl_export.c +++ b/module/zfs/zpl_export.c @@ -25,6 +25,7 @@ #include <sys/zfs_vnops.h> #include <sys/zfs_znode.h> +#include <sys/zfs_ctldir.h> #include <sys/zpl.h> @@ -42,7 +43,10 @@ zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable) fid->fid_len = len_bytes - offsetof(fid_t, fid_data); - rc = zfs_fid(ip, fid); + if (zfsctl_is_node(ip)) + rc = zfsctl_fid(ip, fid); + else + rc = zfs_fid(ip, fid); len_bytes = offsetof(fid_t, fid_data) + fid->fid_len; *max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32); diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 9b5533755..d9b918b43 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -25,6 +25,7 @@ #include <sys/zfs_vfsops.h> #include <sys/zfs_vnops.h> +#include <sys/zfs_znode.h> #include <sys/vfs.h> #include <sys/zpl.h> @@ -51,7 +52,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return d_splice_alias(ip, dentry); } -static void +void zpl_vap_init(vattr_t *vap, struct inode *dir, struct dentry *dentry, mode_t mode, cred_t *cr) { @@ -171,8 +172,20 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry) static int zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { + boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap; int error; + /* + * Ensure MNT_SHRINKABLE is set on snapshots to ensure they are + * unmounted automatically with the parent file system. This + * is done on the first getattr because it's not easy to get the + * vfsmount structure at mount time. This call path is explicitly + * marked unlikely to avoid any performance impact. FWIW, ext4 + * resorts to a similar trick for sysadmin convenience. + */ + if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE))) + mnt->mnt_flags |= MNT_SHRINKABLE; + error = -zfs_getattr_fast(dentry->d_inode, stat); ASSERT3S(error, <=, 0); diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c index 0e6e9360f..98d0a0312 100644 --- a/module/zfs/zpl_super.c +++ b/module/zfs/zpl_super.c @@ -26,6 +26,7 @@ #include <sys/zfs_vfsops.h> #include <sys/zfs_vnops.h> #include <sys/zfs_znode.h> +#include <sys/zfs_ctldir.h> #include <sys/zpl.h> @@ -139,6 +140,20 @@ zpl_remount_fs(struct super_block *sb, int *flags, char *data) return (error); } +static void +zpl_umount_begin(struct super_block *sb) +{ + zfs_sb_t *zsb = sb->s_fs_info; + int count; + + /* + * Best effort to unmount snapshots in .zfs/snapshot/. Normally this + * isn't required because snapshots have the MNT_SHRINKABLE flag set. + */ + if (zsb->z_ctldir) + (void) zfsctl_unmount_snapshots(zsb, MNT_FORCE, &count); +} + /* * The Linux VFS automatically handles the following flags: * MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, MNT_NOATIME, MNT_READONLY @@ -199,13 +214,7 @@ zpl_get_sb(struct file_system_type *fs_type, int flags, static void zpl_kill_sb(struct super_block *sb) { -#ifdef HAVE_SNAPSHOT - zfs_sb_t *zsb = sb->s_fs_info; - - if (zsb && dmu_objset_is_snapshot(zsb->z_os)) - zfs_snap_destroy(zsb); -#endif /* HAVE_SNAPSHOT */ - + zfs_preumount(sb); kill_anon_super(sb); } @@ -306,6 +315,7 @@ const struct super_operations zpl_super_operations = { .sync_fs = zpl_sync_fs, .statfs = zpl_statfs, .remount_fs = zpl_remount_fs, + .umount_begin = zpl_umount_begin, .show_options = zpl_show_options, .show_stats = NULL, #ifdef HAVE_NR_CACHED_OBJECTS |