summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorilbsmart <[email protected]>2018-10-17 02:11:24 +0800
committerMatthew Ahrens <[email protected]>2018-10-16 11:11:24 -0700
commit779a6c0bf6df76e0dd92c1ccf81f48512b835bb0 (patch)
treeafd6c82c91daeb96b9d0a9b6d48e757f4d0e5c81 /module
parentb2030e5d51ec23fced9f54536d97ac09db4e0552 (diff)
deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence: 1. thread #1, `zfs_write` assign a txg "n". 2. In a same process, thread #2, mmap page fault (which means the `mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed, and wait previous txg "n" completed. 3. thread #1 call `uiomove` to write, however page fault is occurred in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by thread #2, so it stuck and can't complete, then txg "n" will not complete. So thread #1 and thread #2 are deadlocked. Reviewed-by: Chunwei Chen <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Matthew Ahrens <[email protected]> Signed-off-by: Grady Wong <[email protected]> Closes #7939
Diffstat (limited to 'module')
-rw-r--r--module/zcommon/zfs_uio.c31
-rw-r--r--module/zfs/zfs_vnops.c24
2 files changed, 46 insertions, 9 deletions
diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c
index af9716126..a2c1b5c3a 100644
--- a/module/zcommon/zfs_uio.c
+++ b/module/zcommon/zfs_uio.c
@@ -52,6 +52,7 @@
#include <sys/sysmacros.h>
#include <sys/strings.h>
#include <linux/kmap_compat.h>
+#include <linux/uaccess.h>
/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -79,8 +80,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
- if (copy_from_user(p, iov->iov_base+skip, cnt))
- return (EFAULT);
+ if (uio->uio_fault_disable) {
+ if (!access_ok(VERIFY_READ,
+ (iov->iov_base + skip), cnt)) {
+ return (EFAULT);
+ }
+
+ pagefault_disable();
+ if (__copy_from_user_inatomic(p,
+ (iov->iov_base + skip), cnt)) {
+ pagefault_enable();
+ return (EFAULT);
+ }
+ pagefault_enable();
+ } else {
+ if (copy_from_user(p,
+ (iov->iov_base + skip), cnt))
+ return (EFAULT);
+ }
}
break;
case UIO_SYSSPACE:
@@ -158,7 +175,7 @@ EXPORT_SYMBOL(uiomove);
* error will terminate the process as this is only a best attempt to get
* the pages resident.
*/
-void
+int
uio_prefaultpages(ssize_t n, struct uio *uio)
{
const struct iovec *iov;
@@ -172,7 +189,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
- return;
+ return (0);
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
@@ -196,7 +213,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
p = iov->iov_base + skip;
while (cnt) {
if (fuword8((uint8_t *)p, &tmp))
- return;
+ return (EFAULT);
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
@@ -206,8 +223,10 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
*/
p--;
if (fuword8((uint8_t *)p, &tmp))
- return;
+ return (EFAULT);
}
+
+ return (0);
}
EXPORT_SYMBOL(uio_prefaultpages);
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 36f47e77a..f4e650dee 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -650,7 +650,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
xuio = (xuio_t *)uio;
else
#endif
- uio_prefaultpages(MIN(n, max_blksz), uio);
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ ZFS_EXIT(zfsvfs);
+ return (SET_ERROR(EFAULT));
+ }
/*
* If in append mode, set the io offset pointer to eof.
@@ -808,8 +811,19 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ssize_t tx_bytes;
if (abuf == NULL) {
tx_bytes = uio->uio_resid;
+ uio->uio_fault_disable = B_TRUE;
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
+ if (error == EFAULT) {
+ dmu_tx_commit(tx);
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ break;
+ }
+ continue;
+ } else if (error != 0) {
+ dmu_tx_commit(tx);
+ break;
+ }
tx_bytes -= uio->uio_resid;
} else {
tx_bytes = nbytes;
@@ -909,8 +923,12 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ASSERT(tx_bytes == nbytes);
n -= nbytes;
- if (!xuio && n > 0)
- uio_prefaultpages(MIN(n, max_blksz), uio);
+ if (!xuio && n > 0) {
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ error = EFAULT;
+ break;
+ }
+ }
}
zfs_inode_update(zp);