summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChunwei Chen <[email protected]>2015-07-30 22:24:36 +0800
committerBrian Behlendorf <[email protected]>2015-08-24 10:17:06 -0700
commit5475aada9474464f973788c1b2fc6216486fb303 (patch)
treef4b02aff6e841b47cad4894b40c7a1092ad0be3b
parent17888ae30d6111f1fe25087a256724ee9b1a0a84 (diff)
Linux 4.1 compat: loop device on ZFS
Starting from Linux 4.1 allows iov_iter with bio_vec to be passed into iter_read/iter_write. Notably, the loop device will pass bio_vec to backend filesystem. However, current ZFS code assumes iovec without any check, so it will always crash when using loop device. With the restructured uio_t, we can safely pass bio_vec in uio_t with UIO_BVEC set. The uio* functions are modified to handle bio_vec case separately. The const uio_iov causes some warning in xuio related stuff, so explicit convert them to non const. Signed-off-by: Chunwei Chen <[email protected]> Signed-off-by: Richard Yao <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #3511 Closes #3640
-rw-r--r--module/zcommon/zfs_uio.c205
-rw-r--r--module/zfs/dmu.c5
-rw-r--r--module/zfs/zfs_vnops.c6
-rw-r--r--module/zfs/zpl_file.c81
-rw-r--r--module/zfs/zpl_inode.c2
5 files changed, 162 insertions, 137 deletions
diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c
index 90376f2ac..a5634fca0 100644
--- a/module/zcommon/zfs_uio.c
+++ b/module/zcommon/zfs_uio.c
@@ -35,6 +35,9 @@
* software developed by the University of California, Berkeley, and its
* contributors.
*/
+/*
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ */
/*
* The uio support from OpenSolaris has been added as a short term
@@ -46,6 +49,7 @@
#include <sys/types.h>
#include <sys/uio_impl.h>
+#include <linux/kmap_compat.h>
/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -53,20 +57,17 @@
* update to reflect the data which was moved. Returns 0 on success or
* a non-zero errno on failure.
*/
-int
-uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
+static int
+uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
- struct iovec *iov;
+ const struct iovec *iov = uio->uio_iov;
+ size_t skip = uio->uio_skip;
ulong_t cnt;
+ ASSERT3U(skip, <, iov->iov_len);
+
while (n && uio->uio_resid) {
- iov = uio->uio_iov;
- cnt = MIN(iov->iov_len, n);
- if (cnt == 0l) {
- uio->uio_iov++;
- uio->uio_iovcnt--;
- continue;
- }
+ cnt = MIN(iov->iov_len - skip, n);
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
@@ -75,22 +76,29 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
* iov->iov_base = user data pointer
*/
if (rw == UIO_READ) {
- if (copy_to_user(iov->iov_base, p, cnt))
+ if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
- if (copy_from_user(p, iov->iov_base, cnt))
+ if (copy_from_user(p, iov->iov_base+skip, cnt))
return (EFAULT);
}
break;
case UIO_SYSSPACE:
if (rw == UIO_READ)
- bcopy(p, iov->iov_base, cnt);
+ bcopy(p, iov->iov_base + skip, cnt);
else
- bcopy(iov->iov_base, p, cnt);
+ bcopy(iov->iov_base + skip, p, cnt);
break;
+ default:
+ ASSERT(0);
+ }
+ skip += cnt;
+ if (skip == iov->iov_len) {
+ skip = 0;
+ uio->uio_iov = (++iov);
+ uio->uio_iovcnt--;
}
- iov->iov_base += cnt;
- iov->iov_len -= cnt;
+ uio->uio_skip = skip;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
@@ -98,6 +106,50 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
}
return (0);
}
+
+static int
+uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
+{
+ const struct bio_vec *bv = uio->uio_bvec;
+ size_t skip = uio->uio_skip;
+ ulong_t cnt;
+
+ ASSERT3U(skip, <, bv->bv_len);
+
+ while (n && uio->uio_resid) {
+ void *paddr;
+ cnt = MIN(bv->bv_len - skip, n);
+
+ paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
+ if (rw == UIO_READ)
+ bcopy(p, paddr + bv->bv_offset + skip, cnt);
+ else
+ bcopy(paddr + bv->bv_offset + skip, p, cnt);
+ zfs_kunmap_atomic(paddr, KM_USER1);
+
+ skip += cnt;
+ if (skip == bv->bv_len) {
+ skip = 0;
+ uio->uio_bvec = (++bv);
+ uio->uio_iovcnt--;
+ }
+ uio->uio_skip = skip;
+ uio->uio_resid -= cnt;
+ uio->uio_loffset += cnt;
+ p = (caddr_t)p + cnt;
+ n -= cnt;
+ }
+ return (0);
+}
+
+int
+uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
+{
+ if (uio->uio_segflg != UIO_BVEC)
+ return (uiomove_iov(p, n, rw, uio));
+ else
+ return (uiomove_bvec(p, n, rw, uio));
+}
EXPORT_SYMBOL(uiomove);
#define fuword8(uptr, vptr) get_user((*vptr), (uptr))
@@ -111,39 +163,39 @@ EXPORT_SYMBOL(uiomove);
void
uio_prefaultpages(ssize_t n, struct uio *uio)
{
- struct iovec *iov;
+ const struct iovec *iov;
ulong_t cnt, incr;
caddr_t p;
uint8_t tmp;
int iovcnt;
+ size_t skip = uio->uio_skip;
+
+ /* no need to fault in kernel pages */
+ switch (uio->uio_segflg) {
+ case UIO_SYSSPACE:
+ case UIO_BVEC:
+ return;
+ case UIO_USERSPACE:
+ case UIO_USERISPACE:
+ break;
+ default:
+ ASSERT(0);
+ }
iov = uio->uio_iov;
iovcnt = uio->uio_iovcnt;
+ ASSERT3U(skip, <, iov->iov_len);
while ((n > 0) && (iovcnt > 0)) {
- cnt = MIN(iov->iov_len, n);
- if (cnt == 0) {
- /* empty iov entry */
- iov++;
- iovcnt--;
- continue;
- }
+ cnt = MIN(iov->iov_len - skip, n);
n -= cnt;
/*
* touch each page in this segment.
*/
- p = iov->iov_base;
+ p = iov->iov_base + skip;
while (cnt) {
- switch (uio->uio_segflg) {
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- if (fuword8((uint8_t *) p, &tmp))
- return;
- break;
- case UIO_SYSSPACE:
- bcopy(p, &tmp, 1);
- break;
- }
+ if (fuword8((uint8_t *) p, &tmp))
+ return;
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
@@ -152,18 +204,11 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
* touch the last byte in case it straddles a page.
*/
p--;
- switch (uio->uio_segflg) {
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- if (fuword8((uint8_t *) p, &tmp))
- return;
- break;
- case UIO_SYSSPACE:
- bcopy(p, &tmp, 1);
- break;
- }
+ if (fuword8((uint8_t *) p, &tmp))
+ return;
iov++;
iovcnt--;
+ skip = 0;
}
}
EXPORT_SYMBOL(uio_prefaultpages);
@@ -175,49 +220,13 @@ EXPORT_SYMBOL(uio_prefaultpages);
int
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
{
- struct iovec *iov;
- ulong_t cnt;
- int iovcnt;
-
- iovcnt = uio->uio_iovcnt;
- *cbytes = 0;
-
- for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) {
- cnt = MIN(iov->iov_len, n);
- if (cnt == 0)
- continue;
-
- switch (uio->uio_segflg) {
-
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- /*
- * p = kernel data pointer
- * iov->iov_base = user data pointer
- */
- if (rw == UIO_READ) {
- /* UIO_READ = copy data from kernel to user */
- if (copy_to_user(iov->iov_base, p, cnt))
- return (EFAULT);
- } else {
- /* UIO_WRITE = copy data from user to kernel */
- if (copy_from_user(p, iov->iov_base, cnt))
- return (EFAULT);
- }
- break;
+ struct uio uio_copy;
+ int ret;
- case UIO_SYSSPACE:
- if (rw == UIO_READ)
- bcopy(p, iov->iov_base, cnt);
- else
- bcopy(iov->iov_base, p, cnt);
- break;
- }
- p = (caddr_t)p + cnt;
- n -= cnt;
- *cbytes += cnt;
- }
- return (0);
+ bcopy(uio, &uio_copy, sizeof (struct uio));
+ ret = uiomove(p, n, rw, &uio_copy);
+ *cbytes = uio->uio_resid - uio_copy.uio_resid;
+ return (ret);
}
EXPORT_SYMBOL(uiocopy);
@@ -229,21 +238,23 @@ uioskip(uio_t *uiop, size_t n)
{
if (n > uiop->uio_resid)
return;
- while (n != 0) {
- iovec_t *iovp = uiop->uio_iov;
- size_t niovb = MIN(iovp->iov_len, n);
- if (niovb == 0) {
+ uiop->uio_skip += n;
+ if (uiop->uio_segflg != UIO_BVEC) {
+ while (uiop->uio_skip >= uiop->uio_iov->iov_len) {
+ uiop->uio_skip -= uiop->uio_iov->iov_len;
uiop->uio_iov++;
uiop->uio_iovcnt--;
- continue;
}
- iovp->iov_base += niovb;
- uiop->uio_loffset += niovb;
- iovp->iov_len -= niovb;
- uiop->uio_resid -= niovb;
- n -= niovb;
+ } else {
+ while (uiop->uio_skip >= uiop->uio_bvec->bv_len) {
+ uiop->uio_skip -= uiop->uio_bvec->bv_len;
+ uiop->uio_bvec++;
+ uiop->uio_iovcnt--;
+ }
}
+ uiop->uio_loffset += n;
+ uiop->uio_resid -= n;
}
EXPORT_SYMBOL(uioskip);
#endif /* _KERNEL */
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index eb3bc0ed2..ac7499d01 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -23,6 +23,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/
#include <sys/dmu.h>
@@ -933,7 +934,7 @@ dmu_xuio_init(xuio_t *xuio, int nblk)
priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP);
priv->cnt = nblk;
priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP);
- priv->iovp = uio->uio_iov;
+ priv->iovp = (iovec_t *)uio->uio_iov;
XUIO_XUZC_PRIV(xuio) = priv;
if (XUIO_XUZC_RW(xuio) == UIO_READ)
@@ -974,7 +975,7 @@ dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)
ASSERT(i < priv->cnt);
ASSERT(off + n <= arc_buf_size(abuf));
- iov = uio->uio_iov + i;
+ iov = (iovec_t *)uio->uio_iov + i;
iov->iov_base = (char *)abuf->b_data + off;
iov->iov_len = n;
priv->bufs[i] = abuf;
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 1d23d6db3..5e5f3c8db 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -591,10 +592,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
int max_blksz = zsb->z_max_blksz;
int error = 0;
arc_buf_t *abuf;
- iovec_t *aiov = NULL;
+ const iovec_t *aiov = NULL;
xuio_t *xuio = NULL;
int i_iov = 0;
- iovec_t *iovp = uio->uio_iov;
+ const iovec_t *iovp = uio->uio_iov;
int write_eof;
int count = 0;
sa_bulk_attr_t bulk[4];
@@ -714,6 +715,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
if (xuio && abuf == NULL) {
ASSERT(i_iov < iovcnt);
+ ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
aiov = &iovp[i_iov];
abuf = dmu_xuio_arcbuf(xuio, i_iov);
dmu_xuio_clear(xuio, i_iov);
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index 547114012..a23bc7d8d 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/
@@ -202,17 +203,18 @@ zpl_aio_fsync(struct kiocb *kiocb, int datasync)
#error "Unsupported fops->fsync() implementation"
#endif
-static inline ssize_t
+static ssize_t
zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
- unsigned long nr_segs, loff_t *ppos, uio_seg_t segment,
- int flags, cred_t *cr)
+ unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
+ cred_t *cr, size_t skip)
{
ssize_t read;
uio_t uio;
int error;
fstrans_cookie_t cookie;
- uio.uio_iov = (struct iovec *)iovp;
+ uio.uio_iov = iovp;
+ uio.uio_skip = skip;
uio.uio_resid = count;
uio.uio_iovcnt = nr_segs;
uio.uio_loffset = *ppos;
@@ -242,7 +244,7 @@ zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
iov.iov_len = len;
return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
- flags, cr));
+ flags, cr, 0));
}
static ssize_t
@@ -261,24 +263,17 @@ zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
static ssize_t
zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
- unsigned long nr_segs, size_t count)
+ unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
{
cred_t *cr = CRED();
struct file *filp = kiocb->ki_filp;
ssize_t read;
- size_t alloc_size = sizeof (struct iovec) * nr_segs;
- struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP);
- bcopy(iovp, iov_tmp, alloc_size);
-
- ASSERT(iovp);
crhold(cr);
- read = zpl_read_common_iovec(filp->f_mapping->host, iov_tmp, count,
- nr_segs, &kiocb->ki_pos, UIO_USERSPACE, filp->f_flags, cr);
+ read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
+ nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
crfree(cr);
- kmem_free(iov_tmp, alloc_size);
-
return (read);
}
@@ -286,22 +281,32 @@ zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
static ssize_t
zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
{
- return (zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
- iov_iter_count(to)));
+ ssize_t ret;
+ uio_seg_t seg = UIO_USERSPACE;
+ if (to->type & ITER_KVEC)
+ seg = UIO_SYSSPACE;
+ if (to->type & ITER_BVEC)
+ seg = UIO_BVEC;
+ ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
+ iov_iter_count(to), seg, to->iov_offset);
+ if (ret > 0)
+ iov_iter_advance(to, ret);
+ return (ret);
}
#else
static ssize_t
zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
unsigned long nr_segs, loff_t pos)
{
- return (zpl_iter_read_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes));
+ return (zpl_iter_read_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes,
+ UIO_USERSPACE, 0));
}
#endif /* HAVE_VFS_RW_ITERATE */
-static inline ssize_t
+static ssize_t
zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
- unsigned long nr_segs, loff_t *ppos, uio_seg_t segment,
- int flags, cred_t *cr)
+ unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
+ cred_t *cr, size_t skip)
{
ssize_t wrote;
uio_t uio;
@@ -311,7 +316,8 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
if (flags & O_APPEND)
*ppos = i_size_read(ip);
- uio.uio_iov = (struct iovec *)iovp;
+ uio.uio_iov = iovp;
+ uio.uio_skip = skip;
uio.uio_resid = count;
uio.uio_iovcnt = nr_segs;
uio.uio_loffset = *ppos;
@@ -340,7 +346,7 @@ zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
iov.iov_len = len;
return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
- flags, cr));
+ flags, cr, 0));
}
static ssize_t
@@ -359,24 +365,17 @@ zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
static ssize_t
zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
- unsigned long nr_segs, size_t count)
+ unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
{
cred_t *cr = CRED();
struct file *filp = kiocb->ki_filp;
ssize_t wrote;
- size_t alloc_size = sizeof (struct iovec) * nr_segs;
- struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP);
- bcopy(iovp, iov_tmp, alloc_size);
-
- ASSERT(iovp);
crhold(cr);
- wrote = zpl_write_common_iovec(filp->f_mapping->host, iov_tmp, count,
- nr_segs, &kiocb->ki_pos, UIO_USERSPACE, filp->f_flags, cr);
+ wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
+ nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
crfree(cr);
- kmem_free(iov_tmp, alloc_size);
-
return (wrote);
}
@@ -384,15 +383,25 @@ zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
static ssize_t
zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
{
- return (zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
- iov_iter_count(from)));
+ ssize_t ret;
+ uio_seg_t seg = UIO_USERSPACE;
+ if (from->type & ITER_KVEC)
+ seg = UIO_SYSSPACE;
+ if (from->type & ITER_BVEC)
+ seg = UIO_BVEC;
+ ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
+ iov_iter_count(from), seg, from->iov_offset);
+ if (ret > 0)
+ iov_iter_advance(from, ret);
+ return (ret);
}
#else
static ssize_t
zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
unsigned long nr_segs, loff_t pos)
{
- return (zpl_iter_write_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes));
+ return (zpl_iter_write_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes,
+ UIO_USERSPACE, 0));
}
#endif /* HAVE_VFS_RW_ITERATE */
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index 70b5e1239..e81a3cd04 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/
@@ -371,6 +372,7 @@ zpl_follow_link(struct dentry *dentry, void **symlink_cookie)
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
+ uio.uio_skip = 0;
uio.uio_resid = (MAXPATHLEN - 1);
uio.uio_segflg = UIO_SYSSPACE;