Fix nfs snapdir automount

The current implementation for allowing nfs to access snapdir is very buggy. It uses a special fh for snapdirs, such that the next time nfsd does fh_to_dentry, it actually returns the root inode inside the snapshot. So nfsd never knows it cross a mountpoint. The problem is that nfsd will not hold a reference on the vfsmount of the snapshot. This cause auto unmounter to unmount the snapshot even though nfs is still holding dentries in it. To fix this, we return the inode for the snapdirs themselves. However, we also trigger automount upon fh_to_dentry, and return ESTALE so nfsd will revalidate and see the mountpoint and do crossmnt. Because nfsd will now be aware that these are different filesystems users must add crossmnt to their export options to access snapshot directories. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Chunwei Chen <[email protected]> Closes #3794 Closes #4716 Closes #5810 Closes #5833
author: Chunwei Chen <[email protected]> 2017-03-08 09:26:33 -0800
committer: Brian Behlendorf <[email protected]> 2017-03-08 09:26:33 -0800
commit: 9b77d1c9585572b7ce3af204d40278752d5f5842 (patch)
tree: f833d83cdede67ba1a43f1729caa64ebf899a00a
parent: 463009865fb4398b32a825d7c00cd8a942637fae (diff)
6 files changed, 91 insertions, 104 deletions
diff --git a/include/sys/zfs_ctldir.h b/include/sys/zfs_ctldir.h
index 26f0122ff..fb85b17f5 100644
--- a/include/sys/zfs_ctldir.h
+++ b/include/sys/zfs_ctldir.h
@@ -78,8 +78,8 @@ extern int zfsctl_snapshot_mount(struct path *path, int flags);
 extern int zfsctl_snapshot_unmount(char *snapname, int flags);
 extern int zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid,
     int delay);
-extern int zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid,
-    zfs_sb_t **zsb);
+extern int zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid,
+    int gen, struct inode **ipp);
 
 /* zfsctl '.zfs/shares' functions */
 extern int zfsctl_shares_lookup(struct inode *dip, char *name,
diff --git a/lib/libshare/nfs.c b/lib/libshare/nfs.c
index 99dd7c1c4..0cfd42c1a 100644
--- a/lib/libshare/nfs.c
+++ b/lib/libshare/nfs.c
@@ -600,7 +600,7 @@ nfs_update_shareopts(sa_share_impl_t impl_share, const char *resource,
 	old_shareopts = FSINFO(impl_share, nfs_fstype)->shareopts;
 
 	if (strcmp(shareopts, "on") == 0)
-		shareopts = "rw";
+		shareopts = "rw,crossmnt";
 
 	if (FSINFO(impl_share, nfs_fstype)->active && old_shareopts != NULL &&
 	    strcmp(old_shareopts, shareopts) != 0) {
diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
index 0f9a631d9..888a5f8eb 100644
--- a/man/man8/zfs.8
+++ b/man/man8/zfs.8
@@ -1340,7 +1340,7 @@ Controls whether the file system is shared via \fBNFS\fR, and what options are u
 .sp
 .in +4
 .nf
-/usr/sbin/exportfs -i -o sec=sys,rw,no_subtree_check,no_root_squash,mountpoint *:<mountpoint of dataset>
+/usr/sbin/exportfs -i -o sec=sys,rw,crossmnt,no_subtree_check,no_root_squash,mountpoint *:<mountpoint of dataset>
 .fi
 .in -4
 .sp
@@ -3746,6 +3746,10 @@ The following commands show how to set \fBsharenfs\fR property options to enable
 .LP
 If you are using \fBDNS\fR for host name resolution, specify the fully qualified hostname.
 
+.sp
+.LP
+If you want to access snapdir through NFS, be sure to add \fBcrossmnt\fR to the options.
+
 .LP
 \fBExample 17 \fRDelegating ZFS Administration Permissions on a ZFS Dataset
 .sp
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index e340462fb..8847db7f1 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -590,27 +590,40 @@ zfsctl_root(znode_t *zp)
 	igrab(ZTOZSB(zp)->z_ctldir);
 	return (ZTOZSB(zp)->z_ctldir);
 }
+
 /*
- * Generate a long fid which includes the root object and objset of a
- * snapshot but not the generation number.  For the root object the
- * generation number is ignored when zero to avoid needing to open
- * the dataset when generating fids for the snapshot names.
+ * Generate a long fid to indicate a snapdir. We encode whether snapdir is
+ * already monunted in gen field. We do this because nfsd lookup will not
+ * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
+ * this and do automount and return ESTALE to force nfsd revalidate and follow
+ * mount.
  */
 static int
 zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
 {
-	zfs_sb_t *zsb = ITOZSB(ip);
 	zfid_short_t *zfid = (zfid_short_t *)fidp;
 	zfid_long_t *zlfid = (zfid_long_t *)fidp;
 	uint32_t gen = 0;
 	uint64_t object;
 	uint64_t objsetid;
 	int i;
+	struct dentry *dentry;
+
+	if (fidp->fid_len < LONG_FID_LEN) {
+		fidp->fid_len = LONG_FID_LEN;
+		return (SET_ERROR(ENOSPC));
+	}
 
-	object = zsb->z_root;
+	object = ip->i_ino;
 	objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
 	zfid->zf_len = LONG_FID_LEN;
 
+	dentry = d_obtain_alias(igrab(ip));
+	if (!IS_ERR(dentry)) {
+		gen = !!d_mountpoint(dentry);
+		dput(dentry);
+	}
+
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
@@ -640,15 +653,15 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
 
 	ZFS_ENTER(zsb);
 
-	if (fidp->fid_len < SHORT_FID_LEN) {
-		fidp->fid_len = SHORT_FID_LEN;
+	if (zfsctl_is_snapdir(ip)) {
 		ZFS_EXIT(zsb);
-		return (SET_ERROR(ENOSPC));
+		return (zfsctl_snapdir_fid(ip, fidp));
 	}
 
-	if (zfsctl_is_snapdir(ip)) {
+	if (fidp->fid_len < SHORT_FID_LEN) {
+		fidp->fid_len = SHORT_FID_LEN;
 		ZFS_EXIT(zsb);
-		return (zfsctl_snapdir_fid(ip, fidp));
+		return (SET_ERROR(ENOSPC));
 	}
 
 	zfid = (zfid_short_t *)fidp;
@@ -1145,70 +1158,52 @@ error:
 }
 
 /*
- * Given the objset id of the snapshot return its zfs_sb_t as zsbp.
+ * Get the snapdir inode from fid
  */
 int
-zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
+zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
+    struct inode **ipp)
 {
-	zfs_snapentry_t *se;
 	int error;
-	spa_t *spa = ((zfs_sb_t *)(sb->s_fs_info))->z_os->os_spa;
-
-	/*
-	 * Verify that the snapshot is mounted then lookup the mounted root
-	 * rather than the covered mount point.  This may fail if the
-	 * snapshot has just been unmounted by an unrelated user space
-	 * process.  This race cannot occur to an expired mount point
-	 * because we hold the zfs_snapshot_lock to prevent the race.
-	 */
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
-		zfs_sb_t *zsb;
+	struct path path;
+	char *mnt;
+	struct dentry *dentry;
 
-		zsb = ITOZSB(se->se_root_dentry->d_inode);
-		ASSERT3U(dmu_objset_id(zsb->z_os), ==, objsetid);
+	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 
-		if (time_after(jiffies, zsb->z_snap_defer_time +
-		    MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
-			zsb->z_snap_defer_time = jiffies;
-			zfsctl_snapshot_unmount_cancel(se);
-			zfsctl_snapshot_unmount_delay_impl(se,
-			    zfs_expire_snapshot);
-		}
+	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
+	    MAXPATHLEN, mnt);
+	if (error)
+		goto out;
 
-		*zsbp = zsb;
-		zfsctl_snapshot_rele(se);
-		error = SET_ERROR(0);
-	} else {
-		error = SET_ERROR(ENOENT);
-	}
-	rw_exit(&zfs_snapshot_lock);
+	/* Trigger automount */
+	error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
+	if (error)
+		goto out;
 
+	path_put(&path);
 	/*
-	 * Automount the snapshot given the objset id by constructing the
-	 * full mount point and performing a traversal.
+	 * Get the snapdir inode. Note, we don't want to use the above
+	 * path because it contains the root of the snapshot rather
+	 * than the snapdir.
 	 */
-	if (error == ENOENT) {
-		struct path path;
-		char *mnt;
-
-		mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-		error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
-		    MAXPATHLEN, mnt);
-		if (error) {
-			kmem_free(mnt, MAXPATHLEN);
-			return (SET_ERROR(error));
-		}
-
-		error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
-		if (error == 0) {
-			*zsbp = ITOZSB(path.dentry->d_inode);
-			path_put(&path);
-		}
-
-		kmem_free(mnt, MAXPATHLEN);
+	*ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
+	if (*ipp == NULL) {
+		error = SET_ERROR(ENOENT);
+		goto out;
 	}
 
+	/* check gen, see zfsctl_snapdir_fid */
+	dentry = d_obtain_alias(igrab(*ipp));
+	if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
+		iput(*ipp);
+		*ipp = NULL;
+		error = SET_ERROR(ENOENT);
+	}
+	if (!IS_ERR(dentry))
+		dput(dentry);
+out:
+	kmem_free(mnt, MAXPATHLEN);
 	return (error);
 }
 
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 85c275822..3135319cb 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1597,8 +1597,19 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 
 	*ipp = NULL;
 
-	ZFS_ENTER(zsb);
+	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
+		zfid_short_t	*zfid = (zfid_short_t *)fidp;
+
+		for (i = 0; i < sizeof (zfid->zf_object); i++)
+			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 
+		for (i = 0; i < sizeof (zfid->zf_gen); i++)
+			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
+	} else {
+		return (SET_ERROR(EINVAL));
+	}
+
+	/* LONG_FID_LEN means snapdirs */
 	if (fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
@@ -1610,28 +1621,24 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
-		ZFS_EXIT(zsb);
+		if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {
+			dprintf("snapdir fid: objsetid (%llu) != "
+			    "ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",
+			    objsetid, ZFSCTL_INO_SNAPDIRS, object);
 
-		err = zfsctl_lookup_objset(sb, objsetid, &zsb);
-		if (err)
 			return (SET_ERROR(EINVAL));
+		}
 
-		ZFS_ENTER(zsb);
-	}
-
-	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
-		zfid_short_t	*zfid = (zfid_short_t *)fidp;
-
-		for (i = 0; i < sizeof (zfid->zf_object); i++)
-			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
+		if (fid_gen > 1 || setgen != 0) {
+			dprintf("snapdir fid: fid_gen (%llu) and setgen "
+			    "(%llu)\n", fid_gen, setgen);
+			return (SET_ERROR(EINVAL));
+		}
 
-		for (i = 0; i < sizeof (zfid->zf_gen); i++)
-			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
-	} else {
-		ZFS_EXIT(zsb);
-		return (SET_ERROR(EINVAL));
+		return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
 	}
 
+	ZFS_ENTER(zsb);
 	/* A zero fid_gen means we are in the .zfs control directories */
 	if (fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 41572d34f..2e5099574 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -4714,12 +4714,7 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 
 	gen = (uint32_t)gen64;
 
-	size = (zsb->z_parent != zsb) ? LONG_FID_LEN : SHORT_FID_LEN;
-	if (fidp->fid_len < size) {
-		fidp->fid_len = size;
-		ZFS_EXIT(zsb);
-		return (SET_ERROR(ENOSPC));
-	}
+	size = SHORT_FID_LEN;
 
 	zfid = (zfid_short_t *)fidp;
 
@@ -4734,20 +4729,6 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
-	if (size == LONG_FID_LEN) {
-		uint64_t	objsetid = dmu_objset_id(zsb->z_os);
-		zfid_long_t	*zlfid;
-
-		zlfid = (zfid_long_t *)fidp;
-
-		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
-			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
-
-		/* XXX - this should be the generation number for the objset */
-		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
-			zlfid->zf_setgen[i] = 0;
-	}
-
 	ZFS_EXIT(zsb);
 	return (0);
 }
author	Chunwei Chen <[email protected]>	2017-03-08 09:26:33 -0800
committer	Brian Behlendorf <[email protected]>	2017-03-08 09:26:33 -0800
commit	9b77d1c9585572b7ce3af204d40278752d5f5842 (patch)
tree	f833d83cdede67ba1a43f1729caa64ebf899a00a
parent	463009865fb4398b32a825d7c00cd8a942637fae (diff)