summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/spl/sys/uio.h1
-rw-r--r--include/sys/uio_impl.h2
-rw-r--r--module/zcommon/zfs_uio.c31
-rw-r--r--module/zfs/zfs_vnops.c24
-rw-r--r--tests/zfs-tests/cmd/mmapwrite/mmapwrite.c140
-rwxr-xr-xtests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh8
6 files changed, 152 insertions, 54 deletions
diff --git a/include/spl/sys/uio.h b/include/spl/sys/uio.h
index 64c452b8d..fac26079d 100644
--- a/include/spl/sys/uio.h
+++ b/include/spl/sys/uio.h
@@ -53,6 +53,7 @@ typedef struct uio {
int uio_iovcnt;
offset_t uio_loffset;
uio_seg_t uio_segflg;
+ boolean_t uio_fault_disable;
uint16_t uio_fmode;
uint16_t uio_extflg;
offset_t uio_limit;
diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h
index 37e283da0..cfef0b95d 100644
--- a/include/sys/uio_impl.h
+++ b/include/sys/uio_impl.h
@@ -42,7 +42,7 @@
#include <sys/uio.h>
extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
-extern void uio_prefaultpages(ssize_t, uio_t *);
+extern int uio_prefaultpages(ssize_t, uio_t *);
extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
extern void uioskip(uio_t *, size_t);
diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c
index af9716126..a2c1b5c3a 100644
--- a/module/zcommon/zfs_uio.c
+++ b/module/zcommon/zfs_uio.c
@@ -52,6 +52,7 @@
#include <sys/sysmacros.h>
#include <sys/strings.h>
#include <linux/kmap_compat.h>
+#include <linux/uaccess.h>
/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -79,8 +80,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
- if (copy_from_user(p, iov->iov_base+skip, cnt))
- return (EFAULT);
+ if (uio->uio_fault_disable) {
+ if (!access_ok(VERIFY_READ,
+ (iov->iov_base + skip), cnt)) {
+ return (EFAULT);
+ }
+
+ pagefault_disable();
+ if (__copy_from_user_inatomic(p,
+ (iov->iov_base + skip), cnt)) {
+ pagefault_enable();
+ return (EFAULT);
+ }
+ pagefault_enable();
+ } else {
+ if (copy_from_user(p,
+ (iov->iov_base + skip), cnt))
+ return (EFAULT);
+ }
}
break;
case UIO_SYSSPACE:
@@ -158,7 +175,7 @@ EXPORT_SYMBOL(uiomove);
* error will terminate the process as this is only a best attempt to get
* the pages resident.
*/
-void
+int
uio_prefaultpages(ssize_t n, struct uio *uio)
{
const struct iovec *iov;
@@ -172,7 +189,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
- return;
+ return (0);
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
@@ -196,7 +213,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
p = iov->iov_base + skip;
while (cnt) {
if (fuword8((uint8_t *)p, &tmp))
- return;
+ return (EFAULT);
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
@@ -206,8 +223,10 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
*/
p--;
if (fuword8((uint8_t *)p, &tmp))
- return;
+ return (EFAULT);
}
+
+ return (0);
}
EXPORT_SYMBOL(uio_prefaultpages);
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 36f47e77a..f4e650dee 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -650,7 +650,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
xuio = (xuio_t *)uio;
else
#endif
- uio_prefaultpages(MIN(n, max_blksz), uio);
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ ZFS_EXIT(zfsvfs);
+ return (SET_ERROR(EFAULT));
+ }
/*
* If in append mode, set the io offset pointer to eof.
@@ -808,8 +811,19 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ssize_t tx_bytes;
if (abuf == NULL) {
tx_bytes = uio->uio_resid;
+ uio->uio_fault_disable = B_TRUE;
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
+ if (error == EFAULT) {
+ dmu_tx_commit(tx);
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ break;
+ }
+ continue;
+ } else if (error != 0) {
+ dmu_tx_commit(tx);
+ break;
+ }
tx_bytes -= uio->uio_resid;
} else {
tx_bytes = nbytes;
@@ -909,8 +923,12 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ASSERT(tx_bytes == nbytes);
n -= nbytes;
- if (!xuio && n > 0)
- uio_prefaultpages(MIN(n, max_blksz), uio);
+ if (!xuio && n > 0) {
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ error = EFAULT;
+ break;
+ }
+ }
}
zfs_inode_update(zp);
diff --git a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
index 190d31af3..b9915d5d3 100644
--- a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
+++ b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
@@ -31,74 +31,132 @@
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>
+#include <errno.h>
+#include <err.h>
/*
* --------------------------------------------------------------------
- * Bug Id: 5032643
+ * Bug Issue Id: #7512
+ * The bug time sequence:
+ * 1. context #1, zfs_write assign a txg "n".
+ * 2. In the same process, context #2, mmap page fault (which means the mm_sem
+ * is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous
+ * txg "n" completed.
+ * 3. context #1 call uiomove to write, however page fault is occurred in
+ * uiomove, which means it need mm_sem, but mm_sem is hold by
+ * context #2, so it stuck and can't complete, then txg "n" will not
+ * complete.
*
- * Simply writing to a file and mmaping that file at the same time can
- * result in deadlock. Nothing perverse like writing from the file's
- * own mapping is required.
+ * So context #1 and context #2 trap into the "dead lock".
* --------------------------------------------------------------------
*/
+#define NORMAL_WRITE_TH_NUM 2
+
static void *
-mapper(void *fdp)
+normal_writer(void *filename)
{
- void *addr;
- int fd = *(int *)fdp;
+ char *file_path = filename;
+ int fd = -1;
+ ssize_t write_num = 0;
+ int page_size = getpagesize();
- if ((addr =
- mmap(0, 8192, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
- perror("mmap");
- exit(1);
+ fd = open(file_path, O_RDWR | O_CREAT, 0777);
+ if (fd == -1) {
+ err(1, "failed to open %s", file_path);
}
- for (;;) {
- if (mmap(addr, 8192, PROT_READ,
- MAP_SHARED|MAP_FIXED, fd, 0) == MAP_FAILED) {
- perror("mmap");
- exit(1);
+
+ char *buf = malloc(1);
+ while (1) {
+ write_num = write(fd, buf, 1);
+ if (write_num == 0) {
+ err(1, "write failed!");
+ break;
}
+ lseek(fd, page_size, SEEK_CUR);
+ }
+
+ if (buf) {
+ free(buf);
}
- /* NOTREACHED */
- return ((void *)1);
}
-int
-main(int argc, char **argv)
+static void *
+map_writer(void *filename)
{
- int fd;
- char buf[1024];
- pthread_t tid;
+ int fd = -1;
+ int ret = 0;
+ char *buf = NULL;
+ int page_size = getpagesize();
+ int op_errno = 0;
+ char *file_path = filename;
- memset(buf, 'a', sizeof (buf));
+ while (1) {
+ ret = access(file_path, F_OK);
+ if (ret) {
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ fd = open(file_path, O_RDWR | O_CREAT, 0777);
+ if (fd == -1) {
+ err(1, "open file failed");
+ }
- if (argc != 2) {
- (void) printf("usage: %s <file name>\n", argv[0]);
- exit(1);
- }
+ ret = ftruncate(fd, page_size);
+ if (ret == -1) {
+ err(1, "truncate file failed");
+ }
+ } else {
+ err(1, "access file failed!");
+ }
+ } else {
+ fd = open(file_path, O_RDWR, 0777);
+ if (fd == -1) {
+ err(1, "open file failed");
+ }
+ }
- if ((fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666)) == -1) {
- perror("open");
- exit(1);
+ if ((buf = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ err(1, "map file failed");
+ }
+
+ if (fd != -1)
+ close(fd);
+
+ char s[10] = {0, };
+ memcpy(buf, s, 10);
+ ret = munmap(buf, page_size);
+ if (ret != 0) {
+ err(1, "unmap file failed");
+ }
}
+}
- (void) pthread_setconcurrency(2);
- if (pthread_create(&tid, NULL, mapper, &fd) != 0) {
- perror("pthread_create");
- close(fd);
+int
+main(int argc, char **argv)
+{
+ pthread_t map_write_tid;
+ pthread_t normal_write_tid[NORMAL_WRITE_TH_NUM];
+ int i = 0;
+
+ if (argc != 3) {
+ (void) printf("usage: %s <normal write file name>"
+ "<map write file name>\n", argv[0]);
exit(1);
}
- for (;;) {
- if (write(fd, buf, sizeof (buf)) == -1) {
- perror("write");
- close(fd);
- exit(1);
+
+ for (i = 0; i < NORMAL_WRITE_TH_NUM; i++) {
+ if (pthread_create(&normal_write_tid[i], NULL, normal_writer,
+ argv[1])) {
+ err(1, "pthread_create normal_writer failed.");
}
}
- close(fd);
+ if (pthread_create(&map_write_tid, NULL, map_writer, argv[2])) {
+ err(1, "pthread_create map_writer failed.");
+ }
/* NOTREACHED */
+ pthread_join(map_write_tid, NULL);
return (0);
}
diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
index 1eda97104..24150b827 100755
--- a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
@@ -53,12 +53,14 @@ if ! is_mp; then
fi
log_must chmod 777 $TESTDIR
-mmapwrite $TESTDIR/test-write-file &
+mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file &
PID_MMAPWRITE=$!
-log_note "mmapwrite $TESTDIR/test-write-file pid: $PID_MMAPWRITE"
+log_note "mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file"\
+ "pid: $PID_MMAPWRITE"
log_must sleep 30
log_must kill -9 $PID_MMAPWRITE
-log_must ls -l $TESTDIR/test-write-file
+log_must ls -l $TESTDIR/normal_write_file
+log_must ls -l $TESTDIR/map_write_file
log_pass "write(2) a mmap(2)'ing file succeeded."