13 files changed, 1675 insertions, 83 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index b303168c8..5ec75a03a 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -64,6 +64,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zap_leaf.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zap_micro.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_acl.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_byteswap.o
+$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_ctldir.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_debug.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_dir.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_fm.o
@@ -83,6 +84,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zio_checksum.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zio_compress.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zio_inject.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zle.o
+$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_ctldir.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_export.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_file.o
 $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_inode.o
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 0703a9466..1d0b4619f 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1584,6 +1584,41 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
 	return (0);
 }
 
+/*
+ * Determine the objset id for a given snapshot name.
+ */
+int
+dmu_snapshot_id(objset_t *os, const char *snapname, uint64_t *idp)
+{
+	dsl_dataset_t *ds = os->os_dsl_dataset;
+	zap_cursor_t cursor;
+	zap_attribute_t attr;
+	int error;
+
+	if (ds->ds_phys->ds_snapnames_zapobj == 0)
+		return (ENOENT);
+
+	zap_cursor_init(&cursor, ds->ds_dir->dd_pool->dp_meta_objset,
+	    ds->ds_phys->ds_snapnames_zapobj);
+
+	error = zap_cursor_move_to_key(&cursor, snapname, MT_EXACT);
+	if (error) {
+		zap_cursor_fini(&cursor);
+		return (error);
+	}
+
+	error = zap_cursor_retrieve(&cursor, &attr);
+	if (error) {
+		zap_cursor_fini(&cursor);
+		return (error);
+	}
+
+	*idp = attr.za_first_integer;
+	zap_cursor_fini(&cursor);
+
+	return (0);
+}
+
 int
 dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp)
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 718c3ad52..2deec8cf1 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -2373,8 +2373,7 @@ dsl_snapshot_rename_one(const char *name, void *arg)
 		return (err == ENOENT ? 0 : err);
 	}
 
-/* XXX: Ignore for SPL version until mounting the FS is supported */
-#if defined(_KERNEL) && !defined(HAVE_SPL)
+#ifdef _KERNEL
 	/*
 	 * For all filesystems undergoing rename, we'll need to unmount it.
 	 */
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
new file mode 100644
index 000000000..01bf52fe9
--- /dev/null
+++ b/module/zfs/zfs_ctldir.c
@@ -0,0 +1,984 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ *
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <[email protected]>
+ *   Brian Behlendorf <[email protected]>
+ */
+
+/*
+ * ZFS control directory (a.k.a. ".zfs")
+ *
+ * This directory provides a common location for all ZFS meta-objects.
+ * Currently, this is only the 'snapshot' and 'shares' directory, but this may
+ * expand in the future.  The elements are built dynamically, as the hierarchy
+ * does not actually exist on disk.
+ *
+ * For 'snapshot', we don't want to have all snapshots always mounted, because
+ * this would take up a huge amount of space in /etc/mnttab.  We have three
+ * types of objects:
+ *
+ *	ctldir ------> snapshotdir -------> snapshot
+ *                                             |
+ *                                             |
+ *                                             V
+ *                                         mounted fs
+ *
+ * The 'snapshot' node contains just enough information to lookup '..' and act
+ * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
+ * perform an automount of the underlying filesystem and return the
+ * corresponding inode.
+ *
+ * All mounts are handled automatically by an user mode helper which invokes
+ * the mount mount procedure.  Unmounts are handled by allowing the mount
+ * point to expire so the kernel may automatically unmount it.
+ *
+ * The '.zfs', '.zfs/snapshot', and all directories created under
+ * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
+ * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
+ *
+ * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
+ * (ie: snapshots) are complete ZFS filesystems and have their own unique
+ * zfs_sb_t.  However, the fsid reported by these mounts will be the same
+ * as that used by the parent zfs_sb_t to make NFS happy.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/stat.h>
+#include <sys/dmu.h>
+#include <sys/dsl_deleg.h>
+#include <sys/mount.h>
+#include <sys/zpl.h>
+#include "zfs_namecheck.h"
+
+/*
+ * Control Directory Tunables (.zfs)
+ */
+int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
+
+static zfs_snapentry_t *
+zfsctl_sep_alloc(void)
+{
+	return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
+}
+
+void
+zfsctl_sep_free(zfs_snapentry_t *sep)
+{
+	kmem_free(sep->se_name, MAXNAMELEN);
+	kmem_free(sep->se_path, PATH_MAX);
+	kmem_free(sep, sizeof (zfs_snapentry_t));
+}
+
+/*
+ * Attempt to expire an automounted snapshot, unmounts are attempted every
+ * 'zfs_expire_snapshot' seconds until they succeed.  The work request is
+ * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t.
+ */
+static void
+zfsctl_expire_snapshot(void *data)
+{
+	zfs_snapentry_t *sep;
+	zfs_sb_t *zsb;
+	int error;
+
+	sep = spl_get_work_data(data, zfs_snapentry_t, se_work.work);
+	zsb = ITOZSB(sep->se_inode);
+
+	error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
+	if (error == EBUSY)
+		schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
+}
+
+int
+snapentry_compare(const void *a, const void *b)
+{
+	const zfs_snapentry_t *sa = a;
+	const zfs_snapentry_t *sb = b;
+	int ret = strcmp(sa->se_name, sb->se_name);
+
+	if (ret < 0)
+		return (-1);
+	else if (ret > 0)
+		return (1);
+	else
+		return (0);
+}
+
+boolean_t
+zfsctl_is_node(struct inode *ip)
+{
+	return (ITOZ(ip)->z_is_ctldir);
+}
+
+boolean_t
+zfsctl_is_snapdir(struct inode *ip)
+{
+	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
+}
+
+/*
+ * Allocate a new inode with the passed id and ops.
+ */
+static struct inode *
+zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
+    const struct file_operations *fops, const struct inode_operations *ops)
+{
+	struct timespec now = current_fs_time(zsb->z_sb);
+	struct inode *ip;
+	znode_t *zp;
+
+	ip = new_inode(zsb->z_sb);
+	if (ip == NULL)
+		return (NULL);
+
+	zp = ITOZ(ip);
+	ASSERT3P(zp->z_dirlocks, ==, NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+	zp->z_id = id;
+	zp->z_unlinked = 0;
+	zp->z_atime_dirty = 0;
+	zp->z_zn_prefetch = 0;
+	zp->z_moved = 0;
+	zp->z_sa_hdl = NULL;
+	zp->z_blksz = 0;
+	zp->z_seq = 0;
+	zp->z_mapcnt = 0;
+	zp->z_gen = 0;
+	zp->z_size = 0;
+	zp->z_atime[0] = 0;
+	zp->z_atime[1] = 0;
+	zp->z_links = 0;
+	zp->z_pflags = 0;
+	zp->z_uid = 0;
+	zp->z_gid = 0;
+	zp->z_mode = 0;
+	zp->z_sync_cnt = 0;
+	zp->z_is_zvol = B_FALSE;
+	zp->z_is_mapped = B_FALSE;
+	zp->z_is_ctldir = B_TRUE;
+	zp->z_is_sa = B_FALSE;
+	ip->i_ino = id;
+	ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
+	ip->i_uid = 0;
+	ip->i_gid = 0;
+	ip->i_blkbits = SPA_MINBLOCKSHIFT;
+	ip->i_atime = now;
+	ip->i_mtime = now;
+	ip->i_ctime = now;
+	ip->i_fop = fops;
+	ip->i_op = ops;
+
+	if (insert_inode_locked(ip)) {
+		unlock_new_inode(ip);
+		iput(ip);
+		return (NULL);
+	}
+
+	mutex_enter(&zsb->z_znodes_lock);
+	list_insert_tail(&zsb->z_all_znodes, zp);
+	membar_producer();
+	mutex_exit(&zsb->z_znodes_lock);
+
+	unlock_new_inode(ip);
+
+	return (ip);
+}
+
+/*
+ * Lookup the inode with given id, it will be allocated if needed.
+ */
+static struct inode *
+zfsctl_inode_lookup(zfs_sb_t *zsb, unsigned long id,
+    const struct file_operations *fops, const struct inode_operations *ops)
+{
+	struct inode *ip = NULL;
+
+	while (ip == NULL) {
+		ip = ilookup(zsb->z_sb, id);
+		if (ip)
+			break;
+
+		/* May fail due to concurrent zfsctl_inode_alloc() */
+		ip = zfsctl_inode_alloc(zsb, id, fops, ops);
+	}
+
+	return (ip);
+}
+
+/*
+ * Free zfsctl inode specific structures, currently there are none.
+ */
+void
+zfsctl_inode_destroy(struct inode *ip)
+{
+	return;
+}
+
+/*
+ * An inode is being evicted from the cache.
+ */
+void
+zfsctl_inode_inactive(struct inode *ip)
+{
+	if (zfsctl_is_snapdir(ip))
+		zfsctl_snapdir_inactive(ip);
+}
+
+/*
+ * Create the '.zfs' directory.  This directory is cached as part of the VFS
+ * structure.  This results in a hold on the zfs_sb_t.  The code in zfs_umount()
+ * therefore checks against a vfs_count of 2 instead of 1.  This reference
+ * is removed when the ctldir is destroyed in the unmount.  All other entities
+ * under the '.zfs' directory are created dynamically as needed.
+ */
+int
+zfsctl_create(zfs_sb_t *zsb)
+{
+	ASSERT(zsb->z_ctldir == NULL);
+
+	zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
+	    &zpl_fops_root, &zpl_ops_root);
+	if (zsb->z_ctldir == NULL)
+		return (ENOENT);
+
+	return (0);
+}
+
+/*
+ * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
+ */
+void
+zfsctl_destroy(zfs_sb_t *zsb)
+{
+	iput(zsb->z_ctldir);
+	zsb->z_ctldir = NULL;
+}
+
+/*
+ * Given a root znode, retrieve the associated .zfs directory.
+ * Add a hold to the vnode and return it.
+ */
+struct inode *
+zfsctl_root(znode_t *zp)
+{
+	ASSERT(zfs_has_ctldir(zp));
+	igrab(ZTOZSB(zp)->z_ctldir);
+	return (ZTOZSB(zp)->z_ctldir);
+}
+
+/*ARGSUSED*/
+int
+zfsctl_fid(struct inode *ip, fid_t *fidp)
+{
+	znode_t		*zp = ITOZ(ip);
+	zfs_sb_t	*zsb = ITOZSB(ip);
+	uint64_t	object = zp->z_id;
+	zfid_short_t	*zfid;
+	int		i;
+
+	ZFS_ENTER(zsb);
+
+	if (fidp->fid_len < SHORT_FID_LEN) {
+		fidp->fid_len = SHORT_FID_LEN;
+		ZFS_EXIT(zsb);
+		return (ENOSPC);
+	}
+
+	zfid = (zfid_short_t *)fidp;
+
+	zfid->zf_len = SHORT_FID_LEN;
+
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
+
+	/* .zfs znodes always have a generation number of 0 */
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = 0;
+
+	ZFS_EXIT(zsb);
+	return (0);
+}
+
+static int
+zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname)
+{
+	objset_t *os = ITOZSB(ip)->z_os;
+
+	if (snapshot_namecheck(name, NULL, NULL) != 0)
+		return (EILSEQ);
+
+	dmu_objset_name(os, zname);
+	if ((strlen(zname) + 1 + strlen(name)) >= len)
+		return (ENAMETOOLONG);
+
+	(void) strcat(zname, "@");
+	(void) strcat(zname, name);
+
+	return (0);
+}
+
+static int
+zfsctl_snapshot_zpath(struct path *path, int len, char *zpath)
+{
+	char *path_buffer, *path_ptr;
+	int path_len, error = 0;
+
+	path_buffer = kmem_alloc(len, KM_SLEEP);
+
+	path_ptr = d_path(path, path_buffer, len);
+	if (IS_ERR(path_ptr)) {
+		error = -PTR_ERR(path_ptr);
+		goto out;
+	}
+
+	path_len = path_buffer + len - 1 - path_ptr;
+	if (path_len > len) {
+		error = EFAULT;
+		goto out;
+	}
+
+	memcpy(zpath, path_ptr, path_len);
+	zpath[path_len] = '\0';
+out:
+	kmem_free(path_buffer, len);
+
+	return (error);
+}
+
+/*
+ * Special case the handling of "..".
+ */
+/* ARGSUSED */
+int
+zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
+    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
+{
+	zfs_sb_t *zsb = ITOZSB(dip);
+	int error = 0;
+
+	ZFS_ENTER(zsb);
+
+	if (strcmp(name, "..") == 0) {
+		*ipp = dip->i_sb->s_root->d_inode;
+	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
+		*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
+		    &zpl_fops_snapdir, &zpl_ops_snapdir);
+	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
+		*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
+		    &zpl_fops_shares, &zpl_ops_shares);
+	} else {
+		*ipp = NULL;
+	}
+
+	if (*ipp == NULL)
+		error = ENOENT;
+
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+/*
+ * Lookup entry point for the 'snapshot' directory.  Try to open the
+ * snapshot if it exist, creating the pseudo filesystem inode as necessary.
+ * Perform a mount of the associated dataset on top of the inode.
+ */
+/* ARGSUSED */
+int
+zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
+    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
+{
+	zfs_sb_t *zsb = ITOZSB(dip);
+	uint64_t id;
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	error = dmu_snapshot_id(zsb->z_os, name, &id);
+	if (error) {
+		ZFS_EXIT(zsb);
+		return (error);
+	}
+
+	*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
+	    &simple_dir_operations, &simple_dir_inode_operations);
+	if (*ipp) {
+#ifdef HAVE_AUTOMOUNT
+		(*ipp)->i_flags |= S_AUTOMOUNT;
+#endif /* HAVE_AUTOMOUNT */
+	} else {
+		error = ENOENT;
+	}
+
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+static void
+zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
+{
+	avl_index_t where;
+
+	ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock));
+	ASSERT(sep != NULL);
+
+	/*
+	 * Change the name in the AVL tree.
+	 */
+	avl_remove(&zsb->z_ctldir_snaps, sep);
+	(void) strcpy(sep->se_name, name);
+	VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL);
+	avl_insert(&zsb->z_ctldir_snaps, sep, where);
+}
+
+/*
+ * Renaming a directory under '.zfs/snapshot' will automatically trigger
+ * a rename of the snapshot to the new given name.  The rename is confined
+ * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
+ */
+/*ARGSUSED*/
+int
+zfsctl_snapdir_rename(struct inode *sdip, char *sname,
+    struct inode *tdip, char *tname, cred_t *cr, int flags)
+{
+	zfs_sb_t *zsb = ITOZSB(sdip);
+	zfs_snapentry_t search, *sep;
+	avl_index_t where;
+	char *to, *from, *real;
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+
+	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
+		error = dmu_snapshot_realname(zsb->z_os, sname, real,
+		    MAXNAMELEN, NULL);
+		if (error == 0) {
+			sname = real;
+		} else if (error != ENOTSUP) {
+			goto out;
+		}
+	}
+
+	error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
+	if (!error)
+		error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
+	if (!error)
+		error = zfs_secpolicy_rename_perms(from, to, cr);
+	if (error)
+		goto out;
+
+	/*
+	 * Cannot move snapshots out of the snapdir.
+	 */
+	if (sdip != tdip) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * No-op when names are identical.
+	 */
+	if (strcmp(sname, tname) == 0) {
+		error = 0;
+		goto out;
+	}
+
+	mutex_enter(&zsb->z_ctldir_lock);
+
+	error = dmu_objset_rename(from, to, B_FALSE);
+	if (error)
+		goto out_unlock;
+
+	search.se_name = (char *)sname;
+	sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
+	if (sep)
+		zfsctl_rename_snap(zsb, sep, tname);
+
+out_unlock:
+	mutex_exit(&zsb->z_ctldir_lock);
+out:
+	kmem_free(from, MAXNAMELEN);
+	kmem_free(to, MAXNAMELEN);
+	kmem_free(real, MAXNAMELEN);
+
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+/*
+ * Removing a directory under '.zfs/snapshot' will automatically trigger
+ * the removal of the snapshot with the given name.
+ */
+/* ARGSUSED */
+int
+zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
+{
+	zfs_sb_t *zsb = ITOZSB(dip);
+	char *snapname, *real;
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+
+	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
+		error = dmu_snapshot_realname(zsb->z_os, name, real,
+		    MAXNAMELEN, NULL);
+		if (error == 0) {
+			name = real;
+		} else if (error != ENOTSUP) {
+			goto out;
+		}
+	}
+
+	error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
+	if (!error)
+		error = zfs_secpolicy_destroy_perms(snapname, cr);
+	if (error)
+		goto out;
+
+	error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
+	if ((error == 0) || (error == ENOENT))
+		error = dmu_objset_destroy(snapname, B_FALSE);
+out:
+	kmem_free(snapname, MAXNAMELEN);
+	kmem_free(real, MAXNAMELEN);
+
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+/*
+ * Creating a directory under '.zfs/snapshot' will automatically trigger
+ * the creation of a new snapshot with the given name.
+ */
+/* ARGSUSED */
+int
+zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
+	struct inode **ipp, cred_t *cr, int flags)
+{
+	zfs_sb_t *zsb = ITOZSB(dip);
+	char *dsname;
+	int error;
+
+	dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+
+	if (snapshot_namecheck(dirname, NULL, NULL) != 0) {
+		error = EILSEQ;
+		goto out;
+	}
+
+	dmu_objset_name(zsb->z_os, dsname);
+
+	error = zfs_secpolicy_snapshot_perms(dsname, cr);
+	if (error)
+		goto out;
+
+	if (error == 0) {
+		error = dmu_objset_snapshot(dsname, dirname,
+		    NULL, NULL, B_FALSE, B_FALSE, -1);
+		if (error)
+			goto out;
+
+		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
+		    0, cr, NULL, NULL);
+	}
+out:
+	kmem_free(dsname, MAXNAMELEN);
+
+	return (error);
+}
+
+/*
+ * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed
+ * from the snapshot list.  This will normally happen as part of the auto
+ * unmount, however in the case of a manual snapshot unmount this will be
+ * the only notification we receive.
+ */
+void
+zfsctl_snapdir_inactive(struct inode *ip)
+{
+	zfs_sb_t *zsb = ITOZSB(ip);
+	zfs_snapentry_t *sep, *next;
+
+	mutex_enter(&zsb->z_ctldir_lock);
+
+	sep = avl_first(&zsb->z_ctldir_snaps);
+	while (sep != NULL) {
+		next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
+
+		if (sep->se_inode == ip) {
+			avl_remove(&zsb->z_ctldir_snaps, sep);
+			cancel_delayed_work_sync(&sep->se_work);
+			zfsctl_sep_free(sep);
+			break;
+		}
+		sep = next;
+	}
+
+	mutex_exit(&zsb->z_ctldir_lock);
+}
+
+/*
+ * Attempt to unmount a snapshot by making a call to user space.
+ * There is no assurance that this can or will succeed, is just a
+ * best effort.  In the case where it does fail, perhaps because
+ * it's in use, the unmount will fail harmlessly.
+ */
+#define SET_UNMOUNT_CMD \
+	"exec 0</dev/null " \
+	"     1>/dev/null " \
+	"     2>/dev/null; " \
+	"umount -t zfs -n %s%s"
+
+static int
+__zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
+{
+	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
+	char *envp[] = { NULL };
+	int error;
+
+	argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
+	    flags & MNT_FORCE ? "-f " : "", sep->se_path);
+	error = call_usermodehelper(argv[0], argv, envp, 1);
+	strfree(argv[2]);
+
+	/*
+	 * The umount system utility will return 256 on error.  We must
+	 * assume this error is because the file system is busy so it is
+	 * converted to the more sensible EBUSY.
+	 */
+	if (error)
+		error = EBUSY;
+
+	/*
+	 * This was the result of a manual unmount, cancel the delayed work
+	 * to prevent zfsctl_expire_snapshot() from attempting a unmount.
+	 */
+	if ((error == 0) && !(flags & MNT_EXPIRE))
+		cancel_delayed_work(&sep->se_work);
+
+	return (error);
+}
+
+int
+zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
+{
+	zfs_snapentry_t search;
+	zfs_snapentry_t *sep;
+	int error = 0;
+
+	mutex_enter(&zsb->z_ctldir_lock);
+
+	search.se_name = name;
+	sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
+	if (sep) {
+		avl_remove(&zsb->z_ctldir_snaps, sep);
+		error = __zfsctl_unmount_snapshot(sep, flags);
+		if (error == EBUSY)
+			avl_add(&zsb->z_ctldir_snaps, sep);
+		else
+			zfsctl_sep_free(sep);
+	} else {
+		error = ENOENT;
+	}
+
+	mutex_exit(&zsb->z_ctldir_lock);
+	ASSERT3S(error, >=, 0);
+
+	return (error);
+}
+
+/*
+ * Traverse all mounted snapshots and attempt to unmount them.  This
+ * is best effort, on failure EEXIST is returned and count will be set
+ * to the number of file snapshots which could not be unmounted.
+ */
+int
+zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
+{
+	zfs_snapentry_t *sep, *next;
+	int error = 0;
+
+	*count = 0;
+
+	ASSERT(zsb->z_ctldir != NULL);
+	mutex_enter(&zsb->z_ctldir_lock);
+
+	sep = avl_first(&zsb->z_ctldir_snaps);
+	while (sep != NULL) {
+		next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
+		avl_remove(&zsb->z_ctldir_snaps, sep);
+		error = __zfsctl_unmount_snapshot(sep, flags);
+		if (error == EBUSY) {
+			avl_add(&zsb->z_ctldir_snaps, sep);
+			(*count)++;
+		} else {
+			zfsctl_sep_free(sep);
+		}
+
+		sep = next;
+	}
+
+	mutex_exit(&zsb->z_ctldir_lock);
+
+	return ((*count > 0) ? EEXIST : 0);
+}
+
+#define SET_MOUNT_CMD \
+	"exec 0</dev/null " \
+	"     1>/dev/null " \
+	"     2>/dev/null; " \
+	"mount -t zfs -n %s %s"
+
+int
+zfsctl_mount_snapshot(struct path *path, int flags)
+{
+	struct dentry *dentry = path->dentry;
+	struct inode *ip = dentry->d_inode;
+	zfs_sb_t *zsb = ITOZSB(ip);
+	char *full_name, *full_path;
+	zfs_snapentry_t *sep;
+	zfs_snapentry_t search;
+	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
+	char *envp[] = { NULL };
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
+	full_path = kmem_zalloc(PATH_MAX, KM_SLEEP);
+
+	error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name);
+	if (error)
+		goto error;
+
+	error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path);
+	if (error)
+		goto error;
+
+	/*
+	 * Attempt to mount the snapshot from user space.  Normally this
+	 * would be done using the vfs_kern_mount() function, however that
+	 * function is marked GPL-only and cannot be used.  On error we
+	 * careful to log the real error to the console and return EISDIR
+	 * to safely abort the automount.  This should be very rare.
+	 */
+	argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
+	error = call_usermodehelper(argv[0], argv, envp, 1);
+	strfree(argv[2]);
+	if (error) {
+		printk("ZFS: Unable to automount %s at %s: %d\n",
+		    full_name, full_path, error);
+		error = EISDIR;
+		goto error;
+	}
+
+	mutex_enter(&zsb->z_ctldir_lock);
+
+	/*
+	 * Ensure a previous entry does not exist, if it does safely remove
+	 * it any cancel the outstanding expiration.  This can occur when a
+	 * snapshot is manually unmounted and then an automount is triggered.
+	 */
+	search.se_name = full_name;
+	sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
+	if (sep) {
+		avl_remove(&zsb->z_ctldir_snaps, sep);
+		cancel_delayed_work_sync(&sep->se_work);
+		zfsctl_sep_free(sep);
+	}
+
+	sep = zfsctl_sep_alloc();
+	sep->se_name = full_name;
+	sep->se_path = full_path;
+	sep->se_inode = ip;
+	avl_add(&zsb->z_ctldir_snaps, sep);
+
+        spl_init_delayed_work(&sep->se_work, zfsctl_expire_snapshot, sep);
+	schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
+
+	mutex_exit(&zsb->z_ctldir_lock);
+error:
+	if (error) {
+		kmem_free(full_name, MAXNAMELEN);
+		kmem_free(full_path, PATH_MAX);
+	}
+
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+/*
+ * Check if this super block has a matching objset id.
+ */
+static int
+zfsctl_test_super(struct super_block *sb, void *objsetidp)
+{
+	zfs_sb_t *zsb = sb->s_fs_info;
+	uint64_t objsetid = *(uint64_t *)objsetidp;
+
+	return (dmu_objset_id(zsb->z_os) == objsetid);
+}
+
+/*
+ * Prevent a new super block from being allocated if an existing one
+ * could not be located.  We only want to preform a lookup operation.
+ */
+static int
+zfsctl_set_super(struct super_block *sb, void *objsetidp)
+{
+	return (-EEXIST);
+}
+
+int
+zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
+{
+	zfs_sb_t *zsb = sb->s_fs_info;
+	struct super_block *sbp;
+	zfs_snapentry_t *sep;
+	uint64_t id;
+	int error;
+
+	ASSERT(zsb->z_ctldir != NULL);
+
+	mutex_enter(&zsb->z_ctldir_lock);
+
+	/*
+	 * Verify that the snapshot is mounted.
+	 */
+	sep = avl_first(&zsb->z_ctldir_snaps);
+	while (sep != NULL) {
+		error = dmu_snapshot_id(zsb->z_os, sep->se_name, &id);
+		if (error)
+			goto out;
+
+		if (id == objsetid)
+			break;
+
+		sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
+	}
+
+	if (sep != NULL) {
+		/*
+		 * Lookup the mounted root rather than the covered mount
+		 * point.  This may fail if the snapshot has just been
+		 * unmounted by an unrelated user space process.  This
+		 * race cannot occur to an expired mount point because
+		 * we hold the zsb->z_ctldir_lock to prevent the race.
+		 */
+		sbp = sget(&zpl_fs_type, zfsctl_test_super,
+		    zfsctl_set_super, &id);
+		if (IS_ERR(sbp)) {
+			error = -PTR_ERR(sbp);
+		} else {
+			*zsbp = sbp->s_fs_info;
+			deactivate_super(sbp);
+		}
+	} else {
+		error = EINVAL;
+	}
+out:
+	mutex_exit(&zsb->z_ctldir_lock);
+	ASSERT3S(error, >=, 0);
+
+	return (error);
+}
+
+/* ARGSUSED */
+int
+zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
+    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
+{
+	zfs_sb_t *zsb = ITOZSB(dip);
+	struct inode *ip;
+	znode_t *dzp;
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	if (zsb->z_shares_dir == 0) {
+		ZFS_EXIT(zsb);
+		return (-ENOTSUP);
+	}
+
+	error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
+	if (error) {
+		ZFS_EXIT(zsb);
+		return (error);
+	}
+
+	error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
+
+	iput(ZTOI(dzp));
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+
+/*
+ * Initialize the various pieces we'll need to create and manipulate .zfs
+ * directories.  Currently this is unused but available.
+ */
+void
+zfsctl_init(void)
+{
+}
+
+/*
+ * Cleanup the various pieces we needed for .zfs directories.  In particular
+ * ensure the expiry timer is canceled safely.
+ */
+void
+zfsctl_fini(void)
+{
+}
+
+module_param(zfs_expire_snapshot, int, 0644);
+MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index 8f1a0c2cc..6cd9c8508 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -50,6 +50,7 @@
 #include <sys/zap.h>
 #include <sys/dmu.h>
 #include <sys/atomic.h>
+#include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/sa.h>
 #include <sys/zfs_sa.h>
@@ -415,28 +416,24 @@ zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
 
 		/*
 		 * If we are a snapshot mounted under .zfs, return
-		 * the vp for the snapshot directory.
+		 * the inode pointer for the snapshot directory.
 		 */
 		if ((error = sa_lookup(dzp->z_sa_hdl,
 		    SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0)
 			return (error);
-#ifdef HAVE_SNAPSHOT
+
 		if (parent == dzp->z_id && zsb->z_parent != zsb) {
 			error = zfsctl_root_lookup(zsb->z_parent->z_ctldir,
-			    "snapshot", ipp, NULL, 0, NULL, kcred,
-			    NULL, NULL, NULL);
+			    "snapshot", ipp, 0, kcred, NULL, NULL);
 			return (error);
 		}
-#endif /* HAVE_SNAPSHOT */
 		rw_enter(&dzp->z_parent_lock, RW_READER);
 		error = zfs_zget(zsb, parent, &zp);
 		if (error == 0)
 			*ipp = ZTOI(zp);
 		rw_exit(&dzp->z_parent_lock);
-#ifdef HAVE_SNAPSHOT
 	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
 		*ipp = zfsctl_root(dzp);
-#endif /* HAVE_SNAPSHOT */
 	} else {
 		int zf;
 
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 532f17aa1..d2ad1af71 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -58,6 +58,7 @@
 #include <sys/mount.h>
 #include <sys/sdt.h>
 #include <sys/fs/zfs.h>
+#include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
@@ -2690,33 +2691,6 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 	return (error);
 }
 
-#ifdef HAVE_SNAPSHOT
-/*
- * Search the vfs list for a specified resource.  Returns a pointer to it
- * or NULL if no suitable entry is found. The caller of this routine
- * is responsible for releasing the returned vfs pointer.
- */
-static vfs_t *
-zfs_get_vfs(const char *resource)
-{
-	struct vfs *vfsp;
-	struct vfs *vfs_found = NULL;
-
-	vfs_list_read_lock();
-	vfsp = rootvfs;
-	do {
-		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
-			mntget(vfsp);
-			vfs_found = vfsp;
-			break;
-		}
-		vfsp = vfsp->vfs_next;
-	} while (vfsp != rootvfs);
-	vfs_list_unlock();
-	return (vfs_found);
-}
-#endif /* HAVE_SNAPSHOT */
-
 /* ARGSUSED */
 static void
 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
@@ -3067,38 +3041,52 @@ out:
 	return (error);
 }
 
+/*
+ * inputs:
+ * name		dataset name, or when 'arg == NULL' the full snapshot name
+ * arg		short snapshot name (i.e. part after the '@')
+ */
 int
 zfs_unmount_snap(const char *name, void *arg)
 {
-#ifdef HAVE_SNAPSHOT
-	vfs_t *vfsp = NULL;
+	zfs_sb_t *zsb = NULL;
+	char *dsname;
+	char *snapname;
+	char *fullname;
+	char *ptr;
+	int error;
 
 	if (arg) {
-		char *snapname = arg;
-		char *fullname = kmem_asprintf("%s@%s", name, snapname);
-		vfsp = zfs_get_vfs(fullname);
-		strfree(fullname);
-	} else if (strchr(name, '@')) {
-		vfsp = zfs_get_vfs(name);
+		dsname = strdup(name);
+		snapname = strdup(arg);
+	} else {
+		ptr = strchr(name, '@');
+		if (ptr) {
+			dsname = strdup(name);
+			dsname[ptr - name] = '\0';
+			snapname = strdup(ptr + 1);
+		} else {
+			return (0);
+		}
 	}
 
-	if (vfsp) {
-		/*
-		 * Always force the unmount for snapshots.
-		 */
-		int flag = MS_FORCE;
-		int err;
+	fullname = kmem_asprintf("%s@%s", dsname, snapname);
 
-		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
-			mntput(vfsp);
-			return (err);
-		}
-		mntput(vfsp);
-		if ((err = dounmount(vfsp, flag, kcred)) != 0)
-			return (err);
+	error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
+	if (error == 0) {
+		error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE);
+		zfs_sb_rele(zsb, FTAG);
+
+		/* Allow ENOENT for consistency with upstream */
+		if (error == ENOENT)
+			error = 0;
 	}
-#endif /* HAVE_SNAPSHOT */
-	return (0);
+
+	strfree(dsname);
+	strfree(snapname);
+	strfree(fullname);
+
+	return (error);
 }
 
 /*
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index fb319a547..8f1c713c0 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -56,6 +56,7 @@
 #include <sys/modctl.h>
 #include <sys/refstr.h>
 #include <sys/zfs_ioctl.h>
+#include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/bootconf.h>
 #include <sys/sunddi.h>
@@ -710,6 +711,10 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
+	avl_create(&zsb->z_ctldir_snaps, snapentry_compare,
+	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
+	mutex_init(&zsb->z_ctldir_lock, NULL, MUTEX_DEFAULT, NULL);
+
 	*zsbp = zsb;
 	return (0);
 
@@ -819,6 +824,8 @@ zfs_sb_free(zfs_sb_t *zsb)
 	rw_destroy(&zsb->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zsb->z_hold_mtx[i]);
+	mutex_destroy(&zsb->z_ctldir_lock);
+	avl_destroy(&zsb->z_ctldir_snaps);
 	kmem_free(zsb, sizeof (zfs_sb_t));
 }
 EXPORT_SYMBOL(zfs_sb_free);
@@ -1183,9 +1190,6 @@ zfs_domount(struct super_block *sb, void *data, int silent)
 		mutex_exit(&zsb->z_os->os_user_ptr_lock);
 	} else {
 		error = zfs_sb_setup(zsb, B_TRUE);
-#ifdef HAVE_SNAPSHOT
-		(void) zfs_snap_create(zsb);
-#endif /* HAVE_SNAPSHOT */
 	}
 
 	/* Allocate a root inode for the filesystem. */
@@ -1202,6 +1206,9 @@ zfs_domount(struct super_block *sb, void *data, int silent)
 		error = ENOMEM;
 		goto out;
 	}
+
+	if (!zsb->z_issnap)
+		zfsctl_create(zsb);
 out:
 	if (error) {
 		dmu_objset_disown(zsb->z_os, zsb);
@@ -1212,6 +1219,27 @@ out:
 }
 EXPORT_SYMBOL(zfs_domount);
 
+/*
+ * Called when an unmount is requested and certain sanity checks have
+ * already passed.  At this point no dentries or inodes have been reclaimed
+ * from their respective caches.  We drop the extra reference on the .zfs
+ * control directory to allow everything to be reclaimed.  All snapshots
+ * must already have been unmounted to reach this point.
+ */
+void
+zfs_preumount(struct super_block *sb)
+{
+	zfs_sb_t *zsb = sb->s_fs_info;
+
+	if (zsb->z_ctldir != NULL)
+		zfsctl_destroy(zsb);
+}
+EXPORT_SYMBOL(zfs_preumount);
+
+/*
+ * Called once all other unmount released tear down has occurred.
+ * It is our responsibility to release any remaining infrastructure.
+ */
 /*ARGSUSED*/
 int
 zfs_umount(struct super_block *sb)
@@ -1288,11 +1316,10 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 
 		ZFS_EXIT(zsb);
 
-#ifdef HAVE_SNAPSHOT
-		err = zfsctl_lookup_objset(vfsp, objsetid, &zsb);
+		err = zfsctl_lookup_objset(sb, objsetid, &zsb);
 		if (err)
 			return (EINVAL);
-#endif /* HAVE_SNAPSHOT */
+
 		ZFS_ENTER(zsb);
 	}
 
@@ -1309,22 +1336,20 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 		return (EINVAL);
 	}
 
-#ifdef HAVE_SNAPSHOT
 	/* A zero fid_gen means we are in the .zfs control directories */
 	if (fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
 		*ipp = zsb->z_ctldir;
 		ASSERT(*ipp != NULL);
 		if (object == ZFSCTL_INO_SNAPDIR) {
-			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, NULL,
-			    0, NULL, NULL, NULL, NULL, NULL) == 0);
+			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
+			    0, kcred, NULL, NULL) == 0);
 		} else {
 			igrab(*ipp);
 		}
 		ZFS_EXIT(zsb);
 		return (0);
 	}
-#endif /* HAVE_SNAPSHOT */
 
 	gen_mask = -1ULL >> (64 - 8 * i);
 
@@ -1550,6 +1575,7 @@ EXPORT_SYMBOL(zfs_get_zplprop);
 void
 zfs_init(void)
 {
+	zfsctl_init();
 	zfs_znode_init();
 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
 	register_filesystem(&zpl_fs_type);
@@ -1561,4 +1587,5 @@ zfs_fini(void)
 {
 	unregister_filesystem(&zpl_fs_type);
 	zfs_znode_fini();
+	zfsctl_fini();
 }
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 74b96b8d7..2da5fec86 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -63,6 +63,7 @@
 #include <sys/sid.h>
 #include <sys/mode.h>
 #include "fs/fs_subr.h"
+#include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_vnops.h>
@@ -2045,7 +2046,7 @@ zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
 			dmu_prefetch(os, objnum, 0, 0);
 		}
 
-		if (*pos >= 2) {
+		if (*pos > 2 || (*pos == 2 && !zfs_show_ctldir(zp))) {
 			zap_cursor_advance(&zc);
 			*pos = zap_cursor_serialize(&zc);
 		} else {
@@ -3876,9 +3877,10 @@ zfs_inactive(struct inode *ip)
 	zfs_sb_t *zsb = ITOZSB(ip);
 	int error;
 
-#ifdef HAVE_SNAPSHOT
-	/* Early return for snapshot inode? */
-#endif /* HAVE_SNAPSHOT */
+	if (zfsctl_is_node(ip)) {
+		zfsctl_inode_inactive(ip);
+		return;
+	}
 
 	rw_enter(&zsb->z_teardown_inactive_lock, RW_READER);
 	if (zp->z_sa_hdl == NULL) {
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 709ae74f8..3a6872f3e 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -52,6 +52,7 @@
 #include <sys/zfs_rlock.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
 #include <sys/dnode.h>
 #include <sys/fs/zfs.h>
 #include <sys/kidmap.h>
@@ -267,6 +268,9 @@ zfs_inode_destroy(struct inode *ip)
 	znode_t *zp = ITOZ(ip);
 	zfs_sb_t *zsb = ZTOZSB(zp);
 
+	if (zfsctl_is_node(ip))
+		zfsctl_inode_destroy(ip);
+
 	mutex_enter(&zsb->z_znodes_lock);
 	list_remove(&zsb->z_all_znodes, zp);
 	zsb->z_nr_znodes--;
@@ -353,6 +357,8 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
 
 	zp = ITOZ(ip);
 	ASSERT(zp->z_dirlocks == NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 	zp->z_moved = 0;
 	zp->z_sa_hdl = NULL;
 	zp->z_unlinked = 0;
@@ -362,7 +368,9 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
-	zp->z_is_zvol = 0;
+	zp->z_is_zvol = B_FALSE;
+	zp->z_is_mapped = B_FALSE;
+	zp->z_is_ctldir = B_FALSE;
 
 	zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
 
@@ -434,6 +442,10 @@ zfs_inode_update(znode_t *zp)
 	zsb = ZTOZSB(zp);
 	ip = ZTOI(zp);
 
+	/* Skip .zfs control nodes which do not exist on disk. */
+	if (zfsctl_is_node(ip))
+		return;
+
 	sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
 	sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
 	sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
diff --git a/module/zfs/zpl_ctldir.c b/module/zfs/zpl_ctldir.c
new file mode 100644
index 000000000..6c742c9e8
--- /dev/null
+++ b/module/zfs/zpl_ctldir.c
@@ -0,0 +1,519 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <[email protected]>
+ *   Brian Behlendorf <[email protected]>
+ */
+
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zpl.h>
+
+/*
+ * Common open routine.  Disallow any write access.
+ */
+/* ARGSUSED */
+static int
+zpl_common_open(struct inode *ip, struct file *filp)
+{
+	if (filp->f_mode & FMODE_WRITE)
+		return (-EACCES);
+
+	return generic_file_open(ip, filp);
+}
+
+static int
+zpl_common_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *ip = dentry->d_inode;
+	int error = 0;
+
+	switch (filp->f_pos) {
+	case 0:
+		error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
+		if (error)
+			break;
+
+		filp->f_pos++;
+		/* fall-thru */
+	case 1:
+		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
+		if (error)
+			break;
+
+		filp->f_pos++;
+		/* fall-thru */
+	default:
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Get root directory contents.
+ */
+static int
+zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *ip = dentry->d_inode;
+	zfs_sb_t *zsb = ITOZSB(ip);
+	int error = 0;
+
+	ZFS_ENTER(zsb);
+
+	switch (filp->f_pos) {
+	case 0:
+		error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
+		if (error)
+			goto out;
+
+		filp->f_pos++;
+		/* fall-thru */
+	case 1:
+		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
+		if (error)
+			goto out;
+
+		filp->f_pos++;
+		/* fall-thru */
+	case 2:
+		error = filldir(dirent, ZFS_SNAPDIR_NAME,
+		    strlen(ZFS_SNAPDIR_NAME), 2, ZFSCTL_INO_SNAPDIR, DT_DIR);
+		if (error)
+			goto out;
+
+		filp->f_pos++;
+		/* fall-thru */
+	case 3:
+		error = filldir(dirent, ZFS_SHAREDIR_NAME,
+		    strlen(ZFS_SHAREDIR_NAME), 3, ZFSCTL_INO_SHARES, DT_DIR);
+		if (error)
+			goto out;
+
+		filp->f_pos++;
+		/* fall-thru */
+	}
+out:
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+/*
+ * Get root directory attributes.
+ */
+/* ARGSUSED */
+static int
+zpl_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
+    struct kstat *stat)
+{
+	int error;
+
+	error = simple_getattr(mnt, dentry, stat);
+	stat->atime = CURRENT_TIME;
+
+	return (error);
+}
+
+static struct dentry *
+zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
+{
+	cred_t *cr = CRED();
+	struct inode *ip;
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	if (error) {
+		if (error == -ENOENT)
+			return d_splice_alias(NULL, dentry);
+		else
+			return ERR_PTR(error);
+	}
+
+        return d_splice_alias(ip, dentry);
+}
+
+/*
+ * The '.zfs' control directory file and inode operations.
+ */
+const struct file_operations zpl_fops_root = {
+	.open		= zpl_common_open,
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= zpl_root_readdir,
+};
+
+const struct inode_operations zpl_ops_root = {
+	.lookup		= zpl_root_lookup,
+	.getattr	= zpl_root_getattr,
+};
+
+static struct dentry *
+zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
+    struct nameidata *nd)
+{
+	cred_t *cr = CRED();
+	struct inode *ip;
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
+	    0, cr, NULL, NULL);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	if (error) {
+		if (error == -ENOENT)
+			return d_splice_alias(NULL, dentry);
+		else
+			return ERR_PTR(error);
+	}
+
+	/*
+	 * Auto mounting of snapshots is only supported for 2.6.37 and
+	 * newer kernels.  Prior to this kernel the ops->follow_link()
+	 * callback was used as a hack to trigger the mount.  The
+	 * resulting vfsmount was then explicitly grafted in to the
+	 * name space.  While it might be possible to add compatibility
+	 * code to accomplish this it would require considerable care.
+	 */
+#ifdef HAVE_AUTOMOUNT
+	dentry->d_op = &zpl_dops_snapdirs;
+#endif /* HAVE_AUTOMOUNT */
+
+	return d_splice_alias(ip, dentry);
+}
+
+/* ARGSUSED */
+static int
+zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *dip = dentry->d_inode;
+	zfs_sb_t *zsb = ITOZSB(dip);
+	char snapname[MAXNAMELEN];
+	uint64_t id, cookie;
+	boolean_t case_conflict;
+	int error = 0;
+
+	ZFS_ENTER(zsb);
+
+	cookie = filp->f_pos;
+	switch (filp->f_pos) {
+	case 0:
+		error = filldir(dirent, ".", 1, 0, dip->i_ino, DT_DIR);
+		if (error)
+			goto out;
+
+		filp->f_pos++;
+		/* fall-thru */
+	case 1:
+		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
+		if (error)
+			goto out;
+
+		filp->f_pos++;
+		/* fall-thru */
+	default:
+		while (error == 0) {
+			error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
+			    snapname, &id, &cookie, &case_conflict);
+			if (error)
+				goto out;
+
+			error = filldir(dirent, snapname, strlen(snapname),
+			    filp->f_pos, ZFSCTL_INO_SHARES - id, DT_DIR);
+			if (error)
+				goto out;
+
+			filp->f_pos = cookie;
+		}
+	}
+out:
+	ZFS_EXIT(zsb);
+
+	if (error == -ENOENT)
+		return (0);
+
+	return (error);
+}
+
+int
+zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
+    struct inode *tdip, struct dentry *tdentry)
+{
+	cred_t *cr = CRED();
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
+	    tdip, dname(tdentry), cr, 0);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	return (error);
+}
+
+static int
+zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
+{
+	cred_t *cr = CRED();
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	return (error);
+}
+
+static int
+zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, int mode)
+{
+	cred_t *cr = CRED();
+	vattr_t *vap;
+	struct inode *ip;
+	int error;
+
+	crhold(cr);
+	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
+	zpl_vap_init(vap, dip, dentry, mode | S_IFDIR, cr);
+
+	error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
+	if (error == 0) {
+#ifdef HAVE_AUTOMOUNT
+		dentry->d_op = &zpl_dops_snapdirs;
+#endif /* HAVE_AUTOMOUNT */
+		d_instantiate(dentry, ip);
+	}
+
+	kmem_free(vap, sizeof(vattr_t));
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	return (error);
+}
+
+#ifdef HAVE_AUTOMOUNT
+static struct vfsmount *
+zpl_snapdir_automount(struct path *path)
+{
+	struct dentry *dentry = path->dentry;
+	int error;
+
+	/*
+	 * We must briefly disable automounts for this dentry because the
+	 * user space mount utility will trigger another lookup on this
+	 * directory.  That will result in zpl_snapdir_automount() being
+	 * called repeatedly.  The DCACHE_NEED_AUTOMOUNT flag can be
+	 * safely reset once the mount completes.
+	 */
+	dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
+	error = -zfsctl_mount_snapshot(path, 0);
+	dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+	if (error)
+		return ERR_PTR(error);
+
+	/*
+	 * Rather than returning the new vfsmount for the snapshot we must
+	 * return NULL to indicate a mount collision.  This is done because
+	 * the user space mount calls do_add_mount() which adds the vfsmount
+	 * to the name space.  If we returned the new mount here it would be
+	 * added again to the vfsmount list resulting in list corruption.
+	 */
+	return (NULL);
+}
+#endif /* HAVE_AUTOMOUNT */
+
+/*
+ * Get snapshot directory attributes.
+ */
+/* ARGSUSED */
+static int
+zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry,
+    struct kstat *stat)
+{
+	zfs_sb_t *zsb = ITOZSB(dentry->d_inode);
+	int error;
+
+	ZFS_ENTER(zsb);
+	error = simple_getattr(mnt, dentry, stat);
+	stat->nlink = stat->size = avl_numnodes(&zsb->z_ctldir_snaps) + 2;
+	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
+	stat->atime = CURRENT_TIME;
+	ZFS_EXIT(zsb);
+
+	return (error);
+}
+
+/*
+ * The '.zfs/snapshot' directory file operations.  These mainly control
+ * generating the list of available snapshots when doing an 'ls' in the
+ * directory.  See zpl_snapdir_readdir().
+ */
+const struct file_operations zpl_fops_snapdir = {
+	.open		= zpl_common_open,
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= zpl_snapdir_readdir,
+};
+
+/*
+ * The '.zfs/snapshot' directory inode operations.  These mainly control
+ * creating an inode for a snapshot directory and initializing the needed
+ * infrastructure to automount the snapshot.  See zpl_snapdir_lookup().
+ */
+const struct inode_operations zpl_ops_snapdir = {
+	.lookup		= zpl_snapdir_lookup,
+	.getattr	= zpl_snapdir_getattr,
+	.rename		= zpl_snapdir_rename,
+	.rmdir		= zpl_snapdir_rmdir,
+	.mkdir		= zpl_snapdir_mkdir,
+};
+
+#ifdef HAVE_AUTOMOUNT
+const struct dentry_operations zpl_dops_snapdirs = {
+	.d_automount	= zpl_snapdir_automount,
+};
+#endif /* HAVE_AUTOMOUNT */
+
+static struct dentry *
+zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
+    struct nameidata *nd)
+{
+	cred_t *cr = CRED();
+	struct inode *ip = NULL;
+	int error;
+
+	crhold(cr);
+	error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
+	    0, cr, NULL, NULL);
+	ASSERT3S(error, <=, 0);
+	crfree(cr);
+
+	if (error) {
+		if (error == -ENOENT)
+			return d_splice_alias(NULL, dentry);
+		else
+			return ERR_PTR(error);
+	}
+
+	return d_splice_alias(ip, dentry);
+}
+
+/* ARGSUSED */
+static int
+zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	cred_t *cr = CRED();
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *ip = dentry->d_inode;
+	zfs_sb_t *zsb = ITOZSB(ip);
+	znode_t *dzp;
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	if (zsb->z_shares_dir == 0) {
+		error = zpl_common_readdir(filp, dirent, filldir);
+		ZFS_EXIT(zsb);
+		return (error);
+	}
+
+	error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
+	if (error) {
+		ZFS_EXIT(zsb);
+		return (error);
+	}
+
+	crhold(cr);
+	error = -zfs_readdir(ZTOI(dzp), dirent, filldir, &filp->f_pos, cr);
+	crfree(cr);
+
+	iput(ZTOI(dzp));
+	ZFS_EXIT(zsb);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry,
+    struct kstat *stat)
+{
+	struct inode *ip = dentry->d_inode;
+	zfs_sb_t *zsb = ITOZSB(ip);
+	znode_t *dzp;
+	int error;
+
+	ZFS_ENTER(zsb);
+
+	if (zsb->z_shares_dir == 0) {
+		error = simple_getattr(mnt, dentry, stat);
+		stat->nlink = stat->size = 2;
+		stat->atime = CURRENT_TIME;
+		ZFS_EXIT(zsb);
+		return (error);
+	}
+
+	error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
+	if (error == 0)
+		error = -zfs_getattr_fast(dentry->d_inode, stat);
+
+	iput(ZTOI(dzp));
+	ZFS_EXIT(zsb);
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+/*
+ * The '.zfs/shares' directory file operations.
+ */
+const struct file_operations zpl_fops_shares = {
+	.open		= zpl_common_open,
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= zpl_shares_readdir,
+};
+
+/*
+ * The '.zfs/shares' directory inode operations.
+ */
+const struct inode_operations zpl_ops_shares = {
+	.lookup		= zpl_shares_lookup,
+	.getattr	= zpl_shares_getattr,
+};
diff --git a/module/zfs/zpl_export.c b/module/zfs/zpl_export.c
index 4fe998437..f82ee3088 100644
--- a/module/zfs/zpl_export.c
+++ b/module/zfs/zpl_export.c
@@ -25,6 +25,7 @@
 
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_ctldir.h>
 #include <sys/zpl.h>
 
 
@@ -42,7 +43,10 @@ zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
 
 	fid->fid_len = len_bytes - offsetof(fid_t, fid_data);
 
-	rc = zfs_fid(ip, fid);
+	if (zfsctl_is_node(ip))
+		rc = zfsctl_fid(ip, fid);
+	else
+		rc = zfs_fid(ip, fid);
 
 	len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
 	*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index 9b5533755..d9b918b43 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -25,6 +25,7 @@
 
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
+#include <sys/zfs_znode.h>
 #include <sys/vfs.h>
 #include <sys/zpl.h>
 
@@ -51,7 +52,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	return d_splice_alias(ip, dentry);
 }
 
-static void
+void
 zpl_vap_init(vattr_t *vap, struct inode *dir, struct dentry *dentry,
     mode_t mode, cred_t *cr)
 {
@@ -171,8 +172,20 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry)
 static int
 zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
+	boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap;
 	int error;
 
+	/*
+	 * Ensure MNT_SHRINKABLE is set on snapshots to ensure they are
+	 * unmounted automatically with the parent file system.  This
+	 * is done on the first getattr because it's not easy to get the
+	 * vfsmount structure at mount time.  This call path is explicitly
+	 * marked unlikely to avoid any performance impact.  FWIW, ext4
+	 * resorts to a similar trick for sysadmin convenience.
+	 */
+	if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE)))
+		mnt->mnt_flags |= MNT_SHRINKABLE;
+
 	error = -zfs_getattr_fast(dentry->d_inode, stat);
 	ASSERT3S(error, <=, 0);
 
diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c
index 0e6e9360f..98d0a0312 100644
--- a/module/zfs/zpl_super.c
+++ b/module/zfs/zpl_super.c
@@ -26,6 +26,7 @@
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_ctldir.h>
 #include <sys/zpl.h>
 
 
@@ -139,6 +140,20 @@ zpl_remount_fs(struct super_block *sb, int *flags, char *data)
 	return (error);
 }
 
+static void
+zpl_umount_begin(struct super_block *sb)
+{
+	zfs_sb_t *zsb = sb->s_fs_info;
+	int count;
+
+	/*
+	 * Best effort to unmount snapshots in .zfs/snapshot/.  Normally this
+	 * isn't required because snapshots have the MNT_SHRINKABLE flag set.
+	 */
+	if (zsb->z_ctldir)
+		(void) zfsctl_unmount_snapshots(zsb, MNT_FORCE, &count);
+}
+
 /*
  * The Linux VFS automatically handles the following flags:
  * MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, MNT_NOATIME, MNT_READONLY
@@ -199,13 +214,7 @@ zpl_get_sb(struct file_system_type *fs_type, int flags,
 static void
 zpl_kill_sb(struct super_block *sb)
 {
-#ifdef HAVE_SNAPSHOT
-	zfs_sb_t *zsb = sb->s_fs_info;
-
-	if (zsb && dmu_objset_is_snapshot(zsb->z_os))
-		zfs_snap_destroy(zsb);
-#endif /* HAVE_SNAPSHOT */
-
+	zfs_preumount(sb);
 	kill_anon_super(sb);
 }
 
@@ -306,6 +315,7 @@ const struct super_operations zpl_super_operations = {
 	.sync_fs		= zpl_sync_fs,
 	.statfs			= zpl_statfs,
 	.remount_fs		= zpl_remount_fs,
+	.umount_begin		= zpl_umount_begin,
 	.show_options		= zpl_show_options,
 	.show_stats		= NULL,
 #ifdef HAVE_NR_CACHED_OBJECTS