Add zfs_file_* interface, remove vnodes

Provide a common zfs_file_* interface which can be implemented on all platforms to perform normal file access from either the kernel module or the libzpool library. This allows all non-portable vnode_t usage in the common code to be replaced by the new portable zfs_file_t. The associated vnode and kobj compatibility functions, types, and macros have been removed from the SPL. Moving forward, vnodes should only be used in platform specific code when provided by the native operating system. Reviewed-by: Sean Eric Fagan <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Igor Kozhukhov <[email protected]> Reviewed-by: Jorgen Lundman <[email protected]> Signed-off-by: Matt Macy <[email protected]> Closes #9556
author: Matthew Macy <[email protected]> 2019-11-21 09:32:57 -0800
committer: Brian Behlendorf <[email protected]> 2019-11-21 09:32:57 -0800
commit: da92d5cbb38cea3a860b8a6bb8ee21f9129e7d7c (patch)
tree: cc2d84b481a30b43d4097603e79a55a1975b0b64 /module/os
parent: 67a6c3bc9ff401fa04bc41354c5172b51aaed1c9 (diff)
18 files changed, 575 insertions, 876 deletions
diff --git a/module/os/linux/spl/Makefile.in b/module/os/linux/spl/Makefile.in
index 94804bfed..b2325f91b 100644
--- a/module/os/linux/spl/Makefile.in
+++ b/module/os/linux/spl/Makefile.in
@@ -5,7 +5,6 @@ $(MODULE)-objs += ../os/linux/spl/spl-err.o
 $(MODULE)-objs += ../os/linux/spl/spl-generic.o
 $(MODULE)-objs += ../os/linux/spl/spl-kmem.o
 $(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o
-$(MODULE)-objs += ../os/linux/spl/spl-kobj.o
 $(MODULE)-objs += ../os/linux/spl/spl-kstat.o
 $(MODULE)-objs += ../os/linux/spl/spl-proc.o
 $(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o
@@ -14,6 +13,5 @@ $(MODULE)-objs += ../os/linux/spl/spl-thread.o
 $(MODULE)-objs += ../os/linux/spl/spl-trace.o
 $(MODULE)-objs += ../os/linux/spl/spl-tsd.o
 $(MODULE)-objs += ../os/linux/spl/spl-vmem.o
-$(MODULE)-objs += ../os/linux/spl/spl-vnode.o
 $(MODULE)-objs += ../os/linux/spl/spl-xdr.o
 $(MODULE)-objs += ../os/linux/spl/spl-zlib.o
diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
index 01c8636e7..aa1051f5d 100644
--- a/module/os/linux/spl/spl-generic.c
+++ b/module/os/linux/spl/spl-generic.c
@@ -27,7 +27,6 @@
 #include <sys/sysmacros.h>
 #include <sys/systeminfo.h>
 #include <sys/vmsystm.h>
-#include <sys/kobj.h>
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/vmem.h>
@@ -47,6 +46,8 @@
 #include <linux/kmod.h>
 #include "zfs_gitrev.h"
 #include <linux/mod_compat.h>
+#include <sys/cred.h>
+#include <sys/vnode.h>
 
 char spl_gitrev[64] = ZFS_META_GITREV;
 
@@ -520,6 +521,48 @@ ddi_copyout(const void *from, void *to, size_t len, int flags)
 }
 EXPORT_SYMBOL(ddi_copyout);
 
+static ssize_t
+spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
+{
+#if defined(HAVE_KERNEL_READ_PPOS)
+	return (kernel_read(file, buf, count, pos));
+#else
+	mm_segment_t saved_fs;
+	ssize_t ret;
+
+	saved_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	ret = vfs_read(file, (void __user *)buf, count, pos);
+
+	set_fs(saved_fs);
+
+	return (ret);
+#endif
+}
+
+int
+spl_getattr(struct file *filp, struct kstat *stat)
+{
+	int rc;
+
+	ASSERT(filp);
+	ASSERT(stat);
+
+#if defined(HAVE_4ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS,
+	    AT_STATX_SYNC_AS_STAT);
+#elif defined(HAVE_2ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, stat);
+#else
+	rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat);
+#endif
+	if (rc)
+		return (-rc);
+
+	return (0);
+}
+
 /*
  * Read the unique system identifier from the /etc/hostid file.
  *
@@ -563,38 +606,42 @@ static int
 hostid_read(uint32_t *hostid)
 {
 	uint64_t size;
-	struct _buf *file;
 	uint32_t value = 0;
 	int error;
+	loff_t off;
+	struct file *filp;
+	struct kstat stat;
+
+	filp = filp_open(spl_hostid_path, 0, 0);
 
-	file = kobj_open_file(spl_hostid_path);
-	if (file == (struct _buf *)-1)
+	if (IS_ERR(filp))
 		return (ENOENT);
 
-	error = kobj_get_filesize(file, &size);
+	error = spl_getattr(filp, &stat);
 	if (error) {
-		kobj_close_file(file);
+		filp_close(filp, 0);
 		return (error);
 	}
-
+	size = stat.size;
 	if (size < sizeof (HW_HOSTID_MASK)) {
-		kobj_close_file(file);
+		filp_close(filp, 0);
 		return (EINVAL);
 	}
 
+	off = 0;
 	/*
 	 * Read directly into the variable like eglibc does.
 	 * Short reads are okay; native behavior is preserved.
 	 */
-	error = kobj_read_file(file, (char *)&value, sizeof (value), 0);
+	error = spl_kernel_read(filp, &value, sizeof (value), &off);
 	if (error < 0) {
-		kobj_close_file(file);
+		filp_close(filp, 0);
 		return (EIO);
 	}
 
 	/* Mask down to 32 bits like coreutils does. */
 	*hostid = (value & HW_HOSTID_MASK);
-	kobj_close_file(file);
+	filp_close(filp, 0);
 
 	return (0);
 }
@@ -704,26 +751,21 @@ spl_init(void)
 	if ((rc = spl_kmem_cache_init()))
 		goto out4;
 
-	if ((rc = spl_vn_init()))
-		goto out5;
-
 	if ((rc = spl_proc_init()))
-		goto out6;
+		goto out5;
 
 	if ((rc = spl_kstat_init()))
-		goto out7;
+		goto out6;
 
 	if ((rc = spl_zlib_init()))
-		goto out8;
+		goto out7;
 
 	return (rc);
 
-out8:
-	spl_kstat_fini();
 out7:
-	spl_proc_fini();
+	spl_kstat_fini();
 out6:
-	spl_vn_fini();
+	spl_proc_fini();
 out5:
 	spl_kmem_cache_fini();
 out4:
@@ -742,7 +784,6 @@ spl_fini(void)
 	spl_zlib_fini();
 	spl_kstat_fini();
 	spl_proc_fini();
-	spl_vn_fini();
 	spl_kmem_cache_fini();
 	spl_taskq_fini();
 	spl_tsd_fini();
diff --git a/module/os/linux/spl/spl-kobj.c b/module/os/linux/spl/spl-kobj.c
deleted file mode 100644
index 7019369bd..000000000
--- a/module/os/linux/spl/spl-kobj.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <[email protected]>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Kobj Implementation.
- */
-
-#include <sys/kobj.h>
-
-struct _buf *
-kobj_open_file(const char *name)
-{
-	struct _buf *file;
-	vnode_t *vp;
-	int rc;
-
-	file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP));
-	if (file == NULL)
-		return ((_buf_t *)-1UL);
-
-	if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
-		kfree(file);
-		return ((_buf_t *)-1UL);
-	}
-
-	file->vp = vp;
-
-	return (file);
-} /* kobj_open_file() */
-EXPORT_SYMBOL(kobj_open_file);
-
-void
-kobj_close_file(struct _buf *file)
-{
-	VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
-	kfree(file);
-} /* kobj_close_file() */
-EXPORT_SYMBOL(kobj_close_file);
-
-int
-kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
-	ssize_t resid;
-
-	if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
-	    UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
-		return (-1);
-
-	return (size - resid);
-} /* kobj_read_file() */
-EXPORT_SYMBOL(kobj_read_file);
-
-int
-kobj_get_filesize(struct _buf *file, uint64_t *size)
-{
-	vattr_t vap;
-	int rc;
-
-	rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
-	if (rc)
-		return (rc);
-
-	*size = vap.va_size;
-
-	return (rc);
-} /* kobj_get_filesize() */
-EXPORT_SYMBOL(kobj_get_filesize);
diff --git a/module/os/linux/spl/spl-vnode.c b/module/os/linux/spl/spl-vnode.c
deleted file mode 100644
index 5de350f10..000000000
--- a/module/os/linux/spl/spl-vnode.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <[email protected]>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  Solaris Porting Layer (SPL) Vnode Implementation.
- */
-
-#include <sys/cred.h>
-#include <sys/vnode.h>
-#include <sys/kmem_cache.h>
-#include <linux/falloc.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#ifdef HAVE_FDTABLE_HEADER
-#include <linux/fdtable.h>
-#endif
-
-vnode_t *rootdir = (vnode_t *)0xabcd1234;
-EXPORT_SYMBOL(rootdir);
-
-static spl_kmem_cache_t *vn_cache;
-static spl_kmem_cache_t *vn_file_cache;
-
-static spinlock_t vn_file_lock;
-static LIST_HEAD(vn_file_list);
-
-static int
-spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len)
-{
-	int error = -EOPNOTSUPP;
-
-	if (fp->f_op->fallocate)
-		error = fp->f_op->fallocate(fp, mode, offset, len);
-
-	return (error);
-}
-
-static int
-spl_filp_fsync(struct file *fp, int sync)
-{
-	return (vfs_fsync(fp, sync));
-}
-
-static ssize_t
-spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
-{
-#if defined(HAVE_KERNEL_WRITE_PPOS)
-	return (kernel_write(file, buf, count, pos));
-#else
-	mm_segment_t saved_fs;
-	ssize_t ret;
-
-	saved_fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	ret = vfs_write(file, (__force const char __user *)buf, count, pos);
-
-	set_fs(saved_fs);
-
-	return (ret);
-#endif
-}
-
-static ssize_t
-spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
-{
-#if defined(HAVE_KERNEL_READ_PPOS)
-	return (kernel_read(file, buf, count, pos));
-#else
-	mm_segment_t saved_fs;
-	ssize_t ret;
-
-	saved_fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	ret = vfs_read(file, (void __user *)buf, count, pos);
-
-	set_fs(saved_fs);
-
-	return (ret);
-#endif
-}
-
-vtype_t
-vn_mode_to_vtype(mode_t mode)
-{
-	if (S_ISREG(mode))
-		return (VREG);
-
-	if (S_ISDIR(mode))
-		return (VDIR);
-
-	if (S_ISCHR(mode))
-		return (VCHR);
-
-	if (S_ISBLK(mode))
-		return (VBLK);
-
-	if (S_ISFIFO(mode))
-		return (VFIFO);
-
-	if (S_ISLNK(mode))
-		return (VLNK);
-
-	if (S_ISSOCK(mode))
-		return (VSOCK);
-
-	return (VNON);
-} /* vn_mode_to_vtype() */
-EXPORT_SYMBOL(vn_mode_to_vtype);
-
-mode_t
-vn_vtype_to_mode(vtype_t vtype)
-{
-	if (vtype == VREG)
-		return (S_IFREG);
-
-	if (vtype == VDIR)
-		return (S_IFDIR);
-
-	if (vtype == VCHR)
-		return (S_IFCHR);
-
-	if (vtype == VBLK)
-		return (S_IFBLK);
-
-	if (vtype == VFIFO)
-		return (S_IFIFO);
-
-	if (vtype == VLNK)
-		return (S_IFLNK);
-
-	if (vtype == VSOCK)
-		return (S_IFSOCK);
-
-	return (VNON);
-} /* vn_vtype_to_mode() */
-EXPORT_SYMBOL(vn_vtype_to_mode);
-
-vnode_t *
-vn_alloc(int flag)
-{
-	vnode_t *vp;
-
-	vp = kmem_cache_alloc(vn_cache, flag);
-	if (vp != NULL) {
-		vp->v_file = NULL;
-		vp->v_type = 0;
-	}
-
-	return (vp);
-} /* vn_alloc() */
-EXPORT_SYMBOL(vn_alloc);
-
-void
-vn_free(vnode_t *vp)
-{
-	kmem_cache_free(vn_cache, vp);
-} /* vn_free() */
-EXPORT_SYMBOL(vn_free);
-
-int
-vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp,
-    int x1, void *x2)
-{
-	struct file *fp;
-	struct kstat stat;
-	int rc, saved_umask = 0;
-	gfp_t saved_gfp;
-	vnode_t *vp;
-
-	ASSERT(flags & (FWRITE | FREAD));
-	ASSERT(seg == UIO_SYSSPACE);
-	ASSERT(vpp);
-	*vpp = NULL;
-
-	if (!(flags & FCREAT) && (flags & FWRITE))
-		flags |= FEXCL;
-
-	/*
-	 * Note for filp_open() the two low bits must be remapped to mean:
-	 * 01 - read-only  -> 00 read-only
-	 * 10 - write-only -> 01 write-only
-	 * 11 - read-write -> 10 read-write
-	 */
-	flags--;
-
-	if (flags & FCREAT)
-		saved_umask = xchg(&current->fs->umask, 0);
-
-	fp = filp_open(path, flags, mode);
-
-	if (flags & FCREAT)
-		(void) xchg(&current->fs->umask, saved_umask);
-
-	if (IS_ERR(fp))
-		return (-PTR_ERR(fp));
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat);
-#else
-	rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
-#endif
-	if (rc) {
-		filp_close(fp, 0);
-		return (-rc);
-	}
-
-	vp = vn_alloc(KM_SLEEP);
-	if (!vp) {
-		filp_close(fp, 0);
-		return (ENOMEM);
-	}
-
-	saved_gfp = mapping_gfp_mask(fp->f_mapping);
-	mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
-
-	mutex_enter(&vp->v_lock);
-	vp->v_type = vn_mode_to_vtype(stat.mode);
-	vp->v_file = fp;
-	vp->v_gfp_mask = saved_gfp;
-	*vpp = vp;
-	mutex_exit(&vp->v_lock);
-
-	return (0);
-} /* vn_open() */
-EXPORT_SYMBOL(vn_open);
-
-int
-vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
-    vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
-{
-	char *realpath;
-	int len, rc;
-
-	ASSERT(vp == rootdir);
-
-	len = strlen(path) + 2;
-	realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
-	if (!realpath)
-		return (ENOMEM);
-
-	(void) snprintf(realpath, len, "/%s", path);
-	rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
-	kfree(realpath);
-
-	return (rc);
-} /* vn_openat() */
-EXPORT_SYMBOL(vn_openat);
-
-int
-vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
-    uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
-{
-	struct file *fp = vp->v_file;
-	loff_t offset = off;
-	int rc;
-
-	ASSERT(uio == UIO_WRITE || uio == UIO_READ);
-	ASSERT(seg == UIO_SYSSPACE);
-	ASSERT((ioflag & ~FAPPEND) == 0);
-
-	if (ioflag & FAPPEND)
-		offset = fp->f_pos;
-
-	if (uio & UIO_WRITE)
-		rc = spl_kernel_write(fp, addr, len, &offset);
-	else
-		rc = spl_kernel_read(fp, addr, len, &offset);
-
-	fp->f_pos = offset;
-
-	if (rc < 0)
-		return (-rc);
-
-	if (residp) {
-		*residp = len - rc;
-	} else {
-		if (rc != len)
-			return (EIO);
-	}
-
-	return (0);
-} /* vn_rdwr() */
-EXPORT_SYMBOL(vn_rdwr);
-
-int
-vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
-{
-	int rc;
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-
-	mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
-	rc = filp_close(vp->v_file, 0);
-	vn_free(vp);
-
-	return (-rc);
-} /* vn_close() */
-EXPORT_SYMBOL(vn_close);
-
-/*
- * vn_seek() does not actually seek it only performs bounds checking on the
- * proposed seek.  We perform minimal checking and allow vn_rdwr() to catch
- * anything more serious.
- */
-int
-vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
-{
-	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
-}
-EXPORT_SYMBOL(vn_seek);
-
-int
-vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
-{
-	struct file *fp;
-	struct kstat stat;
-	int rc;
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-	ASSERT(vap);
-
-	fp = vp->v_file;
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
-	    AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&fp->f_path, &stat);
-#else
-	rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
-#endif
-	if (rc)
-		return (-rc);
-
-	vap->va_type	= vn_mode_to_vtype(stat.mode);
-	vap->va_mode	= stat.mode;
-	vap->va_uid	= KUID_TO_SUID(stat.uid);
-	vap->va_gid	= KGID_TO_SGID(stat.gid);
-	vap->va_fsid	= 0;
-	vap->va_nodeid	= stat.ino;
-	vap->va_nlink	= stat.nlink;
-	vap->va_size	= stat.size;
-	vap->va_blksize	= stat.blksize;
-	vap->va_atime	= stat.atime;
-	vap->va_mtime	= stat.mtime;
-	vap->va_ctime	= stat.ctime;
-	vap->va_rdev	= stat.rdev;
-	vap->va_nblocks	= stat.blocks;
-
-	return (0);
-}
-EXPORT_SYMBOL(vn_getattr);
-
-int
-vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
-{
-	int datasync = 0;
-	int error;
-	int fstrans;
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-
-	if (flags & FDSYNC)
-		datasync = 1;
-
-	/*
-	 * May enter XFS which generates a warning when PF_FSTRANS is set.
-	 * To avoid this the flag is cleared over vfs_sync() and then reset.
-	 */
-	fstrans = __spl_pf_fstrans_check();
-	if (fstrans)
-		current->flags &= ~(__SPL_PF_FSTRANS);
-
-	error = -spl_filp_fsync(vp->v_file, datasync);
-	if (fstrans)
-		current->flags |= __SPL_PF_FSTRANS;
-
-	return (error);
-} /* vn_fsync() */
-EXPORT_SYMBOL(vn_fsync);
-
-int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
-    offset_t offset, void *x6, void *x7)
-{
-	int error = EOPNOTSUPP;
-#ifdef FALLOC_FL_PUNCH_HOLE
-	int fstrans;
-#endif
-
-	if (cmd != F_FREESP || bfp->l_whence != SEEK_SET)
-		return (EOPNOTSUPP);
-
-	ASSERT(vp);
-	ASSERT(vp->v_file);
-	ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
-
-#ifdef FALLOC_FL_PUNCH_HOLE
-	/*
-	 * May enter XFS which generates a warning when PF_FSTRANS is set.
-	 * To avoid this the flag is cleared over vfs_sync() and then reset.
-	 */
-	fstrans = __spl_pf_fstrans_check();
-	if (fstrans)
-		current->flags &= ~(__SPL_PF_FSTRANS);
-
-	/*
-	 * When supported by the underlying file system preferentially
-	 * use the fallocate() callback to preallocate the space.
-	 */
-	error = -spl_filp_fallocate(vp->v_file,
-	    FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
-	    bfp->l_start, bfp->l_len);
-
-	if (fstrans)
-		current->flags |= __SPL_PF_FSTRANS;
-
-	if (error == 0)
-		return (0);
-#endif
-	return (error);
-}
-EXPORT_SYMBOL(vn_space);
-
-/* Function must be called while holding the vn_file_lock */
-static file_t *
-file_find(int fd, struct task_struct *task)
-{
-	file_t *fp;
-
-	list_for_each_entry(fp, &vn_file_list,  f_list) {
-		if (fd == fp->f_fd && fp->f_task == task) {
-			ASSERT(atomic_read(&fp->f_ref) != 0);
-			return (fp);
-		}
-	}
-
-	return (NULL);
-} /* file_find() */
-
-file_t *
-vn_getf(int fd)
-{
-	struct kstat stat;
-	struct file *lfp;
-	file_t *fp;
-	vnode_t *vp;
-	int rc = 0;
-
-	if (fd < 0)
-		return (NULL);
-
-	/* Already open just take an extra reference */
-	spin_lock(&vn_file_lock);
-
-	fp = file_find(fd, current);
-	if (fp) {
-		lfp = fget(fd);
-		fput(fp->f_file);
-		/*
-		 * areleasef() can cause us to see a stale reference when
-		 * userspace has reused a file descriptor before areleasef()
-		 * has run. fput() the stale reference and replace it. We
-		 * retain the original reference count such that the concurrent
-		 * areleasef() will decrement its reference and terminate.
-		 */
-		if (lfp != fp->f_file) {
-			fp->f_file = lfp;
-			fp->f_vnode->v_file = lfp;
-		}
-		atomic_inc(&fp->f_ref);
-		spin_unlock(&vn_file_lock);
-		return (fp);
-	}
-
-	spin_unlock(&vn_file_lock);
-
-	/* File was not yet opened create the object and setup */
-	fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
-	if (fp == NULL)
-		goto out;
-
-	mutex_enter(&fp->f_lock);
-
-	fp->f_fd = fd;
-	fp->f_task = current;
-	fp->f_offset = 0;
-	atomic_inc(&fp->f_ref);
-
-	lfp = fget(fd);
-	if (lfp == NULL)
-		goto out_mutex;
-
-	vp = vn_alloc(KM_SLEEP);
-	if (vp == NULL)
-		goto out_fget;
-
-#if defined(HAVE_4ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE,
-	    AT_STATX_SYNC_AS_STAT);
-#elif defined(HAVE_2ARGS_VFS_GETATTR)
-	rc = vfs_getattr(&lfp->f_path, &stat);
-#else
-	rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
-#endif
-	if (rc)
-		goto out_vnode;
-
-	mutex_enter(&vp->v_lock);
-	vp->v_type = vn_mode_to_vtype(stat.mode);
-	vp->v_file = lfp;
-	mutex_exit(&vp->v_lock);
-
-	fp->f_vnode = vp;
-	fp->f_file = lfp;
-
-	/* Put it on the tracking list */
-	spin_lock(&vn_file_lock);
-	list_add(&fp->f_list, &vn_file_list);
-	spin_unlock(&vn_file_lock);
-
-	mutex_exit(&fp->f_lock);
-	return (fp);
-
-out_vnode:
-	vn_free(vp);
-out_fget:
-	fput(lfp);
-out_mutex:
-	mutex_exit(&fp->f_lock);
-	kmem_cache_free(vn_file_cache, fp);
-out:
-	return (NULL);
-} /* getf() */
-EXPORT_SYMBOL(getf);
-
-static void releasef_locked(file_t *fp)
-{
-	ASSERT(fp->f_file);
-	ASSERT(fp->f_vnode);
-
-	/* Unlinked from list, no refs, safe to free outside mutex */
-	fput(fp->f_file);
-	vn_free(fp->f_vnode);
-
-	kmem_cache_free(vn_file_cache, fp);
-}
-
-void
-vn_releasef(int fd)
-{
-	areleasef(fd, P_FINFO(current));
-}
-EXPORT_SYMBOL(releasef);
-
-void
-vn_areleasef(int fd, uf_info_t *fip)
-{
-	file_t *fp;
-	struct task_struct *task = (struct task_struct *)fip;
-
-	if (fd < 0)
-		return;
-
-	spin_lock(&vn_file_lock);
-	fp = file_find(fd, task);
-	if (fp) {
-		atomic_dec(&fp->f_ref);
-		if (atomic_read(&fp->f_ref) > 0) {
-			spin_unlock(&vn_file_lock);
-			return;
-		}
-
-		list_del(&fp->f_list);
-		releasef_locked(fp);
-	}
-	spin_unlock(&vn_file_lock);
-} /* releasef() */
-EXPORT_SYMBOL(areleasef);
-
-static int
-vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
-	struct vnode *vp = buf;
-
-	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
-
-	return (0);
-} /* vn_cache_constructor() */
-
-static void
-vn_cache_destructor(void *buf, void *cdrarg)
-{
-	struct vnode *vp = buf;
-
-	mutex_destroy(&vp->v_lock);
-} /* vn_cache_destructor() */
-
-static int
-vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
-	file_t *fp = buf;
-
-	atomic_set(&fp->f_ref, 0);
-	mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
-	INIT_LIST_HEAD(&fp->f_list);
-
-	return (0);
-} /* vn_file_cache_constructor() */
-
-static void
-vn_file_cache_destructor(void *buf, void *cdrarg)
-{
-	file_t *fp = buf;
-
-	mutex_destroy(&fp->f_lock);
-} /* vn_file_cache_destructor() */
-
-int
-spl_vn_init(void)
-{
-	spin_lock_init(&vn_file_lock);
-
-	vn_cache = kmem_cache_create("spl_vn_cache",
-	    sizeof (struct vnode), 64, vn_cache_constructor,
-	    vn_cache_destructor, NULL, NULL, NULL, 0);
-
-	vn_file_cache = kmem_cache_create("spl_vn_file_cache",
-	    sizeof (file_t), 64, vn_file_cache_constructor,
-	    vn_file_cache_destructor, NULL, NULL, NULL, 0);
-
-	return (0);
-} /* spl_vn_init() */
-
-void
-spl_vn_fini(void)
-{
-	file_t *fp, *next_fp;
-	int leaked = 0;
-
-	spin_lock(&vn_file_lock);
-
-	list_for_each_entry_safe(fp, next_fp, &vn_file_list,  f_list) {
-		list_del(&fp->f_list);
-		releasef_locked(fp);
-		leaked++;
-	}
-
-	spin_unlock(&vn_file_lock);
-
-	if (leaked > 0)
-		printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
-
-	kmem_cache_destroy(vn_file_cache);
-	kmem_cache_destroy(vn_cache);
-} /* spl_vn_fini() */
diff --git a/module/os/linux/zfs/Makefile.in b/module/os/linux/zfs/Makefile.in
index 153277378..60d92182f 100644
--- a/module/os/linux/zfs/Makefile.in
+++ b/module/os/linux/zfs/Makefile.in
@@ -26,6 +26,7 @@ $(MODULE)-objs += ../os/linux/zfs/zfs_acl.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_debug.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_dir.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_onexit_os.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o
diff --git a/module/os/linux/zfs/mmp_os.c b/module/os/linux/zfs/mmp_os.c
index b63f164b6..9c8545f3c 100644
--- a/module/os/linux/zfs/mmp_os.c
+++ b/module/os/linux/zfs/mmp_os.c
@@ -34,7 +34,7 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
 	if (ret < 0)
 		return (ret);
 
-	if (spa_mode_global != 0)
+	if (spa_mode_global != SPA_MODE_UNINIT)
 		mmp_signal_all_threads();
 
 	return (ret);
diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c
index 8cfc6fdc9..552530226 100644
--- a/module/os/linux/zfs/policy.c
+++ b/module/os/linux/zfs/policy.c
@@ -324,7 +324,7 @@ secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
  * Check privileges for setting xvattr attributes
  */
 int
-secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype)
+secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, mode_t type)
 {
 	return (secpolicy_vnode_chown(cr, owner));
 }
diff --git a/module/os/linux/zfs/spa_misc_os.c b/module/os/linux/zfs/spa_misc_os.c
index 5c222d2fb..97d91f982 100644
--- a/module/os/linux/zfs/spa_misc_os.c
+++ b/module/os/linux/zfs/spa_misc_os.c
@@ -52,7 +52,7 @@ param_set_deadman_ziotime(const char *val, zfs_kernel_param_t *kp)
 	if (error < 0)
 		return (SET_ERROR(error));
 
-	if (spa_mode_global != 0) {
+	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL)
 			spa->spa_deadman_ziotime =
@@ -73,7 +73,7 @@ param_set_deadman_synctime(const char *val, zfs_kernel_param_t *kp)
 	if (error < 0)
 		return (SET_ERROR(error));
 
-	if (spa_mode_global != 0) {
+	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL)
 			spa->spa_deadman_synctime =
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 328e47f52..2c7dda686 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -55,16 +55,14 @@ typedef struct dio_request {
 } dio_request_t;
 
 static fmode_t
-vdev_bdev_mode(int smode)
+vdev_bdev_mode(spa_mode_t spa_mode)
 {
 	fmode_t mode = 0;
 
-	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
-
-	if (smode & FREAD)
+	if (spa_mode & SPA_MODE_READ)
 		mode |= FMODE_READ;
 
-	if (smode & FWRITE)
+	if (spa_mode & SPA_MODE_WRITE)
 		mode |= FMODE_WRITE;
 
 	return (mode);
@@ -849,9 +847,6 @@ vdev_disk_hold(vdev_t *vd)
 	if (vd->vdev_tsd != NULL)
 		return;
 
-	/* XXX: Implement me as a vnode lookup for the device */
-	vd->vdev_name_vp = NULL;
-	vd->vdev_devid_vp = NULL;
 }
 
 static void
@@ -874,7 +869,7 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
 	if ((p = strchr(val, '\n')) != NULL)
 		*p = '\0';
 
-	if (spa_mode_global != 0) {
+	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL) {
 			if (spa_state(spa) != POOL_STATE_ACTIVE ||
diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c
index b79017f3a..fba5cdced 100644
--- a/module/os/linux/zfs/vdev_file.c
+++ b/module/os/linux/zfs/vdev_file.c
@@ -35,6 +35,9 @@
 #include <sys/abd.h>
 #include <sys/fcntl.h>
 #include <sys/vnode.h>
+#include <sys/zfs_file.h>
+
+#include <linux/falloc.h>
 
 /*
  * Virtual device vector for files.
@@ -54,13 +57,29 @@ vdev_file_rele(vdev_t *vd)
 	ASSERT(vd->vdev_path != NULL);
 }
 
+static mode_t
+vdev_file_open_mode(spa_mode_t spa_mode)
+{
+	mode_t mode = 0;
+
+	if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
+		mode = O_RDWR;
+	} else if (spa_mode & SPA_MODE_READ) {
+		mode = O_RDONLY;
+	} else if (spa_mode & SPA_MODE_WRITE) {
+		mode = O_WRONLY;
+	}
+
+	return (mode | O_LARGEFILE);
+}
+
 static int
 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
     uint64_t *ashift)
 {
 	vdev_file_t *vf;
-	vnode_t *vp;
-	vattr_t vattr;
+	zfs_file_t *fp;
+	zfs_file_attr_t zfa;
 	int error;
 
 	/*
@@ -108,38 +127,38 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 	 * to local zone users, so the underlying devices should be as well.
 	 */
 	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
-	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
-	    spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
 
+	error = zfs_file_open(vd->vdev_path,
+	    vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
 	if (error) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 		return (error);
 	}
 
-	vf->vf_vnode = vp;
+	vf->vf_file = fp;
 
 #ifdef _KERNEL
 	/*
 	 * Make sure it's a regular file.
 	 */
-	if (vp->v_type != VREG) {
+	if (zfs_file_getattr(fp, &zfa)) {
+		return (SET_ERROR(ENODEV));
+	}
+	if (!S_ISREG(zfa.zfa_mode)) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 		return (SET_ERROR(ENODEV));
 	}
 #endif
 
 skip_open:
-	/*
-	 * Determine the physical size of the file.
-	 */
-	vattr.va_mask = AT_SIZE;
-	error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
+
+	error =  zfs_file_getattr(vf->vf_file, &zfa);
 	if (error) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 		return (error);
 	}
 
-	*max_psize = *psize = vattr.va_size;
+	*max_psize = *psize = zfa.zfa_size;
 	*ashift = SPA_MINBLOCKSHIFT;
 
 	return (0);
@@ -153,10 +172,8 @@ vdev_file_close(vdev_t *vd)
 	if (vd->vdev_reopening || vf == NULL)
 		return;
 
-	if (vf->vf_vnode != NULL) {
-		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
-		(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
-		    kcred, NULL);
+	if (vf->vf_file != NULL) {
+		(void) zfs_file_close(vf->vf_file);
 	}
 
 	vd->vdev_delayed_close = B_FALSE;
@@ -172,21 +189,24 @@ vdev_file_io_strategy(void *arg)
 	vdev_file_t *vf = vd->vdev_tsd;
 	ssize_t resid;
 	void *buf;
+	loff_t off;
+	ssize_t size;
+	int err;
+
+	off = zio->io_offset;
+	size = zio->io_size;
+	resid = 0;
 
-	if (zio->io_type == ZIO_TYPE_READ)
+	if (zio->io_type == ZIO_TYPE_READ) {
 		buf = abd_borrow_buf(zio->io_abd, zio->io_size);
-	else
+		err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
+		abd_return_buf_copy(zio->io_abd, buf, size);
+	} else {
 		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
-
-	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
-	    UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size,
-	    zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
-
-	if (zio->io_type == ZIO_TYPE_READ)
-		abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
-	else
-		abd_return_buf(zio->io_abd, buf, zio->io_size);
-
+		err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
+		abd_return_buf(zio->io_abd, buf, size);
+	}
+	zio->io_error = err;
 	if (resid != 0 && zio->io_error == 0)
 		zio->io_error = SET_ERROR(ENOSPC);
 
@@ -199,7 +219,7 @@ vdev_file_io_fsync(void *arg)
 	zio_t *zio = (zio_t *)arg;
 	vdev_file_t *vf = zio->io_vd->vdev_tsd;
 
-	zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
+	zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
 
 	zio_interrupt(zio);
 }
@@ -238,8 +258,8 @@ vdev_file_io_start(zio_t *zio)
 				return;
 			}
 
-			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
-			    kcred, NULL);
+			zio->io_error = zfs_file_fsync(vf->vf_file,
+			    O_SYNC | O_DSYNC);
 			break;
 		default:
 			zio->io_error = SET_ERROR(ENOTSUP);
@@ -248,18 +268,12 @@ vdev_file_io_start(zio_t *zio)
 		zio_execute(zio);
 		return;
 	} else if (zio->io_type == ZIO_TYPE_TRIM) {
-		struct flock flck;
+		int mode;
 
 		ASSERT3U(zio->io_size, !=, 0);
-		bzero(&flck, sizeof (flck));
-		flck.l_type = F_FREESP;
-		flck.l_start = zio->io_offset;
-		flck.l_len = zio->io_size;
-		flck.l_whence = SEEK_SET;
-
-		zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &flck,
-		    0, 0, kcred, NULL);
-
+		mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+		zio->io_error = zfs_file_fallocate(vf->vf_file,
+		    mode, zio->io_offset, zio->io_size);
 		zio_execute(zio);
 		return;
 	}
diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c
index 4c21350c0..1bb2b892d 100644
--- a/module/os/linux/zfs/zfs_acl.c
+++ b/module/os/linux/zfs/zfs_acl.c
@@ -37,7 +37,6 @@
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/fs/zfs.h>
-#include <sys/mode.h>
 #include <sys/policy.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_fuid.h>
diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c
index 6bdad737c..89704d0e4 100644
--- a/module/os/linux/zfs/zfs_dir.c
+++ b/module/os/linux/zfs/zfs_dir.c
@@ -32,7 +32,6 @@
 #include <sys/vfs.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
-#include <sys/mode.h>
 #include <sys/kmem.h>
 #include <sys/uio.h>
 #include <sys/pathname.h>
diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c
new file mode 100644
index 000000000..1c9b84d66
--- /dev/null
+++ b/module/os/linux/zfs/zfs_file_os.c
@@ -0,0 +1,427 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zfs_file.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <linux/falloc.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#ifdef HAVE_FDTABLE_HEADER
+#include <linux/fdtable.h>
+#endif
+
+/*
+ * Open file
+ *
+ * path - fully qualified path to file
+ * flags - file attributes O_READ / O_WRITE / O_EXCL
+ * fpp - pointer to return file pointer
+ *
+ * Returns 0 on success underlying error on failure.
+ */
+int
+zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
+{
+	struct file *filp;
+	int saved_umask;
+
+	if (!(flags & O_CREAT) && (flags & O_WRONLY))
+		flags |= O_EXCL;
+
+	if (flags & O_CREAT)
+		saved_umask = xchg(&current->fs->umask, 0);
+
+	filp = filp_open(path, flags, mode);
+
+	if (flags & O_CREAT)
+		(void) xchg(&current->fs->umask, saved_umask);
+
+	if (IS_ERR(filp))
+		return (-PTR_ERR(filp));
+
+	*fpp = filp;
+	return (0);
+}
+
+void
+zfs_file_close(zfs_file_t *fp)
+{
+	filp_close(fp, 0);
+}
+
+static ssize_t
+zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *off)
+{
+#if defined(HAVE_KERNEL_WRITE_PPOS)
+	return (kernel_write(fp, buf, count, off));
+#else
+	mm_segment_t saved_fs;
+	ssize_t rc;
+
+	saved_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	rc = vfs_write(fp, (__force const char __user __user *)buf, count, off);
+
+	set_fs(saved_fs);
+
+	return (rc);
+#endif
+}
+
+/*
+ * Stateful write - use os internal file pointer to determine where to
+ * write and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * resid -  pointer to count of unwritten bytes  (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_pos;
+	ssize_t rc;
+
+	rc = zfs_file_write_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	fp->f_pos = off;
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless write - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to write to (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc  = zfs_file_write_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+static ssize_t
+zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off)
+{
+#if defined(HAVE_KERNEL_READ_PPOS)
+	return (kernel_read(fp, buf, count, off));
+#else
+	mm_segment_t saved_fs;
+	ssize_t rc;
+
+	saved_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	rc = vfs_read(fp, (void __user *)buf, count, off);
+	set_fs(saved_fs);
+
+	return (rc);
+#endif
+}
+
+/*
+ * Stateful read - use os internal file pointer to determine where to
+ * read and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to read
+ * resid -  pointer to count of unread bytes (if short read)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_pos;
+	ssize_t rc;
+
+	rc = zfs_file_read_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	fp->f_pos = off;
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * Stateless read - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to read from (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	ssize_t rc;
+
+	rc = zfs_file_read_impl(fp, buf, count, &off);
+	if (rc < 0)
+		return (-rc);
+
+	if (resid) {
+		*resid = count - rc;
+	} else if (rc != count) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * lseek - set / get file pointer
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * offp - value to seek to, returns current value plus passed offset
+ * whence - see man pages for standard lseek whence values
+ *
+ * Returns 0 on success errno on failure (ESPIPE for non seekable types)
+ */
+int
+zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
+{
+	loff_t rc;
+
+	if (*offp < 0 || *offp > MAXOFFSET_T)
+		return (EINVAL);
+
+	rc = vfs_llseek(fp, *offp, whence);
+	if (rc < 0)
+		return (-rc);
+
+	*offp = rc;
+
+	return (0);
+}
+
+/*
+ * Get file attributes
+ *
+ * filp - file pointer
+ * zfattr - pointer to file attr structure
+ *
+ * Currently only used for fetching size and file mode.
+ *
+ * Returns 0 on success or error code of underlying getattr call on failure.
+ */
+int
+zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr)
+{
+	struct kstat stat;
+	int rc;
+
+#if defined(HAVE_4ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS,
+	    AT_STATX_SYNC_AS_STAT);
+#elif defined(HAVE_2ARGS_VFS_GETATTR)
+	rc = vfs_getattr(&filp->f_path, &stat);
+#else
+	rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat);
+#endif
+	if (rc)
+		return (-rc);
+
+	zfattr->zfa_size = stat.size;
+	zfattr->zfa_mode = stat.mode;
+
+	return (0);
+}
+
+/*
+ * Sync file to disk
+ *
+ * filp - file pointer
+ * flags - O_SYNC and or O_DSYNC
+ *
+ * Returns 0 on success or error code of underlying sync call on failure.
+ */
+int
+zfs_file_fsync(zfs_file_t *filp, int flags)
+{
+	int datasync = 0;
+	int error;
+	int fstrans;
+
+	if (flags & O_DSYNC)
+		datasync = 1;
+
+	/*
+	 * May enter XFS which generates a warning when PF_FSTRANS is set.
+	 * To avoid this the flag is cleared over vfs_sync() and then reset.
+	 */
+	fstrans = __spl_pf_fstrans_check();
+	if (fstrans)
+		current->flags &= ~(__SPL_PF_FSTRANS);
+
+	error = -vfs_fsync(filp, datasync);
+
+	if (fstrans)
+		current->flags |= __SPL_PF_FSTRANS;
+
+	return (error);
+}
+
+/*
+ * fallocate - allocate or free space on disk
+ *
+ * fp - file pointer
+ * mode (non-standard options for hole punching etc)
+ * offset - offset to start allocating or freeing from
+ * len - length to free / allocate
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
+{
+	/*
+	 * May enter XFS which generates a warning when PF_FSTRANS is set.
+	 * To avoid this the flag is cleared over vfs_sync() and then reset.
+	 */
+	int fstrans = __spl_pf_fstrans_check();
+	if (fstrans)
+		current->flags &= ~(__SPL_PF_FSTRANS);
+
+	/*
+	 * When supported by the underlying file system preferentially
+	 * use the fallocate() callback to preallocate the space.
+	 */
+	int error = EOPNOTSUPP;
+	if (fp->f_op->fallocate)
+		error = fp->f_op->fallocate(fp, mode, offset, len);
+
+	if (fstrans)
+		current->flags |= __SPL_PF_FSTRANS;
+
+	return (error);
+}
+
+/*
+ * Request current file pointer offset
+ *
+ * fp - pointer to file
+ *
+ * Returns current file offset.
+ */
+loff_t
+zfs_file_off(zfs_file_t *fp)
+{
+	return (fp->f_pos);
+}
+
+/*
+ * unlink file
+ *
+ * path - fully qualified file path
+ *
+ * Returns 0 on success.
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_unlink(const char *path)
+{
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Get reference to file pointer
+ *
+ * fd - input file descriptor
+ * fpp - pointer to file pointer
+ *
+ * Returns 0 on success EBADF on failure.
+ */
+int
+zfs_file_get(int fd, zfs_file_t **fpp)
+{
+	zfs_file_t *fp;
+
+	fp = fget(fd);
+	if (fp == NULL)
+		return (EBADF);
+
+	*fpp = fp;
+
+	return (0);
+}
+
+/*
+ * Drop reference to file pointer
+ *
+ * fd - input file descriptor
+ */
+void
+zfs_file_put(int fd)
+{
+	struct file *fp;
+
+	if ((fp = fget(fd)) != NULL) {
+		fput(fp);
+		fput(fp);
+	}
+}
diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c
index 4e69eecf3..543748c14 100644
--- a/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/module/os/linux/zfs/zfs_ioctl_os.c
@@ -178,15 +178,15 @@ int
 zfsdev_getminor(int fd, minor_t *minorp)
 {
 	zfsdev_state_t *zs, *fpd;
-	file_t *fp;
+	struct file *fp;
+	int rc;
 
 	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
-	fp = getf(fd);
 
-	if (fp == NULL)
-		return (SET_ERROR(EBADF));
+	if ((rc = zfs_file_get(fd, &fp)))
+		return (rc);
 
-	fpd = fp->f_file->private_data;
+	fpd = fp->private_data;
 	if (fpd == NULL)
 		return (SET_ERROR(EBADF));
 
diff --git a/module/os/linux/zfs/zfs_onexit_os.c b/module/os/linux/zfs/zfs_onexit_os.c
index 95dbe8dbe..879ea28ec 100644
--- a/module/os/linux/zfs/zfs_onexit_os.c
+++ b/module/os/linux/zfs/zfs_onexit_os.c
@@ -60,5 +60,5 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp)
 void
 zfs_onexit_fd_rele(int fd)
 {
-	releasef(fd);
+	zfs_file_put(fd);
 }
diff --git a/module/os/linux/zfs/zfs_vnops.c b/module/os/linux/zfs/zfs_vnops.c
index edbb2fc64..e7d0e8933 100644
--- a/module/os/linux/zfs/zfs_vnops.c
+++ b/module/os/linux/zfs/zfs_vnops.c
@@ -59,7 +59,6 @@
 #include <sys/policy.h>
 #include <sys/sunddi.h>
 #include <sys/sid.h>
-#include <sys/mode.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_sa.h>
@@ -423,7 +422,7 @@ unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
  *	IN:	ip	- inode of file to be read from.
  *		uio	- structure supplying read location, range info,
  *			  and return buffer.
- *		ioflag	- FSYNC flags; used to provide FRSYNC semantics.
+ *		ioflag	- O_SYNC flags; used to provide FRSYNC semantics.
  *			  O_DIRECT flag; used to bypass page cache.
  *		cr	- credentials of caller.
  *
@@ -473,7 +472,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 	 * Only do this for non-snapshots.
 	 *
 	 * Some platforms do not support FRSYNC and instead map it
-	 * to FSYNC, which results in unnecessary calls to zil_commit. We
+	 * to O_SYNC, which results in unnecessary calls to zil_commit. We
 	 * only honor FRSYNC requests on platforms which support it.
 	 */
 	frsync = !!(ioflag & FRSYNC);
@@ -570,7 +569,7 @@ out:
  *	IN:	ip	- inode of file to be written to.
  *		uio	- structure supplying write location, range info,
  *			  and data buffer.
- *		ioflag	- FAPPEND flag set if in append mode.
+ *		ioflag	- O_APPEND flag set if in append mode.
  *			  O_DIRECT flag; used to bypass page cache.
  *		cr	- credentials of caller.
  *
@@ -629,7 +628,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 	 * If immutable or not appending then return EPERM
 	 */
 	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
-	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
+	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
 	    (uio->uio_loffset < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
@@ -638,7 +637,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 	/*
 	 * Validate file offset
 	 */
-	offset_t woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
+	offset_t woff = ioflag & O_APPEND ? zp->z_size : uio->uio_loffset;
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
@@ -667,7 +666,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 	 * If in append mode, set the io offset pointer to eof.
 	 */
 	zfs_locked_range_t *lr;
-	if (ioflag & FAPPEND) {
+	if (ioflag & O_APPEND) {
 		/*
 		 * Obtain an appending range lock to guarantee file append
 		 * semantics.  We reset the write offset once we have the lock.
@@ -961,7 +960,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 		return (error);
 	}
 
-	if (ioflag & (FSYNC | FDSYNC) ||
+	if (ioflag & (O_SYNC | O_DSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
 
@@ -1486,7 +1485,7 @@ top:
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_commit(tx);
 	} else {
-		int aflags = (flag & FAPPEND) ? V_APPEND : 0;
+		int aflags = (flag & O_APPEND) ? V_APPEND : 0;
 
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
@@ -2486,7 +2485,6 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
 	 */
 
 	mutex_enter(&zp->z_lock);
-	vap->va_type = vn_mode_to_vtype(zp->z_mode);
 	vap->va_mode = zp->z_mode;
 	vap->va_fsid = ZTOI(zp)->i_sb->s_dev;
 	vap->va_nodeid = zp->z_id;
@@ -2497,7 +2495,6 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
 	vap->va_nlink = MIN(links, ZFS_LINK_MAX);
 	vap->va_size = i_size_read(ip);
 	vap->va_rdev = ip->i_rdev;
-	vap->va_seq = ip->i_generation;
 
 	/*
 	 * Add in any requested optional attributes and the create time.
diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c
index c623d61f7..53ba1f63e 100644
--- a/module/os/linux/zfs/zfs_znode.c
+++ b/module/os/linux/zfs/zfs_znode.c
@@ -38,7 +38,6 @@
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/errno.h>
-#include <sys/mode.h>
 #include <sys/atomic.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index b098703c2..54e80e50e 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -190,19 +190,19 @@ zfs_io_flags(struct kiocb *kiocb)
 
 #if defined(IOCB_DSYNC)
 	if (kiocb->ki_flags & IOCB_DSYNC)
-		flags |= FDSYNC;
+		flags |= O_DSYNC;
 #endif
 #if defined(IOCB_SYNC)
 	if (kiocb->ki_flags & IOCB_SYNC)
-		flags |= FSYNC;
+		flags |= O_SYNC;
 #endif
 #if defined(IOCB_APPEND)
 	if (kiocb->ki_flags & IOCB_APPEND)
-		flags |= FAPPEND;
+		flags |= O_APPEND;
 #endif
 #if defined(IOCB_DIRECT)
 	if (kiocb->ki_flags & IOCB_DIRECT)
-		flags |= FDIRECT;
+		flags |= O_DIRECT;
 #endif
 	return (flags);
 }
@@ -728,16 +728,14 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
 static long
 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
 {
-	int error = -EOPNOTSUPP;
-
-#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
 	cred_t *cr = CRED();
 	flock64_t bf;
 	loff_t olen;
 	fstrans_cookie_t cookie;
+	int error;
 
 	if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-		return (error);
+		return (-EOPNOTSUPP);
 
 	if (offset < 0 || len <= 0)
 		return (-EINVAL);
@@ -759,14 +757,12 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
+	error = -zfs_space(ip, F_FREESP, &bf, O_RDWR, offset, cr);
 	spl_fstrans_unmark(cookie);
 	spl_inode_unlock(ip);
 
 	crfree(cr);
-#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
 
-	ASSERT3S(error, <=, 0);
 	return (error);
 }
author	Matthew Macy <[email protected]>	2019-11-21 09:32:57 -0800
committer	Brian Behlendorf <[email protected]>	2019-11-21 09:32:57 -0800
commit	da92d5cbb38cea3a860b8a6bb8ee21f9129e7d7c (patch)
tree	cc2d84b481a30b43d4097603e79a55a1975b0b64 /module/os
parent	67a6c3bc9ff401fa04bc41354c5172b51aaed1c9 (diff)