aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/os/linux/zfs/sys/zfs_vnops_os.h2
-rw-r--r--module/os/linux/zfs/zfs_vnops_os.c169
-rw-r--r--module/os/linux/zfs/zpl_file.c17
-rw-r--r--tests/runfiles/common.run3
-rw-r--r--tests/zfs-tests/cmd/mmap_sync.c2
-rw-r--r--tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh86
7 files changed, 175 insertions, 105 deletions
diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h
index 197ea9bec..1caec0ef4 100644
--- a/include/os/linux/zfs/sys/zfs_vnops_os.h
+++ b/include/os/linux/zfs/sys/zfs_vnops_os.h
@@ -72,7 +72,7 @@ extern void zfs_inactive(struct inode *ip);
extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
offset_t offset, cred_t *cr);
extern int zfs_fid(struct inode *ip, fid_t *fidp);
-extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
+extern int zfs_getpage(struct inode *ip, struct page *pp);
extern int zfs_putpage(struct inode *ip, struct page *pp,
struct writeback_control *wbc, boolean_t for_sync);
extern int zfs_dirty_inode(struct inode *ip, int flags);
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index 302a88c2d..9904807fa 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -220,43 +220,46 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
}
#if defined(_KERNEL)
+
+static int zfs_fillpage(struct inode *ip, struct page *pp);
+
/*
* When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages. What this means:
- *
- * On Write: If we find a memory mapped page, we write to *both*
- * the page and the dmu buffer.
+ * between the DMU cache and the memory mapped pages. Update all mapped
+ * pages with the contents of the coresponding dmu buffer.
*/
void
update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
{
- struct inode *ip = ZTOI(zp);
- struct address_space *mp = ip->i_mapping;
- struct page *pp;
- uint64_t nbytes;
- int64_t off;
- void *pb;
+ struct address_space *mp = ZTOI(zp)->i_mapping;
+ int64_t off = start & (PAGE_SIZE - 1);
- off = start & (PAGE_SIZE-1);
for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
- nbytes = MIN(PAGE_SIZE - off, len);
+ uint64_t nbytes = MIN(PAGE_SIZE - off, len);
- pp = find_lock_page(mp, start >> PAGE_SHIFT);
+ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
if (pp) {
if (mapping_writably_mapped(mp))
flush_dcache_page(pp);
- pb = kmap(pp);
- (void) dmu_read(os, zp->z_id, start + off, nbytes,
- pb + off, DMU_READ_PREFETCH);
+ void *pb = kmap(pp);
+ int error = dmu_read(os, zp->z_id, start + off,
+ nbytes, pb + off, DMU_READ_PREFETCH);
kunmap(pp);
- if (mapping_writably_mapped(mp))
- flush_dcache_page(pp);
+ if (error) {
+ SetPageError(pp);
+ ClearPageUptodate(pp);
+ } else {
+ ClearPageError(pp);
+ SetPageUptodate(pp);
+
+ if (mapping_writably_mapped(mp))
+ flush_dcache_page(pp);
+
+ mark_page_accessed(pp);
+ }
- mark_page_accessed(pp);
- SetPageUptodate(pp);
- ClearPageError(pp);
unlock_page(pp);
put_page(pp);
}
@@ -267,38 +270,44 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
}
/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages. What this means:
- *
- * On Read: We "read" preferentially from memory mapped pages,
- * else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- * the file is memory mapped.
+ * When a file is memory mapped, we must keep the I/O data synchronized
+ * between the DMU cache and the memory mapped pages. Preferentially read
+ * from memory mapped pages, otherwise fallback to reading through the dmu.
*/
int
mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
{
struct inode *ip = ZTOI(zp);
struct address_space *mp = ip->i_mapping;
- struct page *pp;
- int64_t start, off;
- uint64_t bytes;
+ int64_t start = uio->uio_loffset;
+ int64_t off = start & (PAGE_SIZE - 1);
int len = nbytes;
int error = 0;
- void *pb;
- start = uio->uio_loffset;
- off = start & (PAGE_SIZE-1);
for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
- bytes = MIN(PAGE_SIZE - off, len);
+ uint64_t bytes = MIN(PAGE_SIZE - off, len);
- pp = find_lock_page(mp, start >> PAGE_SHIFT);
+ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
if (pp) {
- ASSERT(PageUptodate(pp));
+ /*
+ * If filemap_fault() retries there exists a window
+ * where the page will be unlocked and not up to date.
+ * In this case we must try and fill the page.
+ */
+ if (unlikely(!PageUptodate(pp))) {
+ error = zfs_fillpage(ip, pp);
+ if (error) {
+ unlock_page(pp);
+ put_page(pp);
+ return (error);
+ }
+ }
+
+ ASSERT(PageUptodate(pp) || PageDirty(pp));
+
unlock_page(pp);
- pb = kmap(pp);
+ void *pb = kmap(pp);
error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
kunmap(pp);
@@ -314,9 +323,11 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
len -= bytes;
off = 0;
+
if (error)
break;
}
+
return (error);
}
#endif /* _KERNEL */
@@ -3959,55 +3970,41 @@ zfs_inactive(struct inode *ip)
* Fill pages with data from the disk.
*/
static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_fillpage(struct inode *ip, struct page *pp)
{
- znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
- objset_t *os;
- struct page *cur_pp;
- u_offset_t io_off, total;
- size_t io_len;
- loff_t i_size;
- unsigned page_idx;
- int err;
-
- os = zfsvfs->z_os;
- io_len = nr_pages << PAGE_SHIFT;
- i_size = i_size_read(ip);
- io_off = page_offset(pl[0]);
+ loff_t i_size = i_size_read(ip);
+ u_offset_t io_off = page_offset(pp);
+ size_t io_len = PAGE_SIZE;
if (io_off + io_len > i_size)
io_len = i_size - io_off;
- /*
- * Iterate over list of pages and read each page individually.
- */
- page_idx = 0;
- for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
- caddr_t va;
-
- cur_pp = pl[page_idx++];
- va = kmap(cur_pp);
- err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
- DMU_READ_PREFETCH);
- kunmap(cur_pp);
- if (err) {
- /* convert checksum errors into IO errors */
- if (err == ECKSUM)
- err = SET_ERROR(EIO);
- return (err);
- }
+ void *va = kmap(pp);
+ int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+ PAGE_SIZE, va, DMU_READ_PREFETCH);
+ kunmap(pp);
+
+ if (error) {
+ /* convert checksum errors into IO errors */
+ if (error == ECKSUM)
+ error = SET_ERROR(EIO);
+
+ SetPageError(pp);
+ ClearPageUptodate(pp);
+ } else {
+ ClearPageError(pp);
+ SetPageUptodate(pp);
}
- return (0);
+ return (error);
}
/*
- * Uses zfs_fillpage to read data from the file and fill the pages.
+ * Uses zfs_fillpage to read data from the file and fill the page.
*
* IN: ip - inode of file to get data from.
- * pl - list of pages to read
- * nr_pages - number of pages to read
+ * pp - page to read
*
* RETURN: 0 on success, error code on failure.
*
@@ -4015,24 +4012,22 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
* vp - atime updated
*/
int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_getpage(struct inode *ip, struct page *pp)
{
- znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
- int err;
-
- if (pl == NULL)
- return (0);
-
- if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
- return (err);
+ znode_t *zp = ITOZ(ip);
+ int error;
- err = zfs_fillpage(ip, pl, nr_pages);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
- dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE);
+ error = zfs_fillpage(ip, pp);
+ if (error == 0)
+ dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
zfs_exit(zfsvfs, FTAG);
- return (err);
+
+ return (error);
}
/*
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index e42b15042..0a50f80ea 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -657,29 +657,16 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
static inline int
zpl_readpage_common(struct page *pp)
{
- struct inode *ip;
- struct page *pl[1];
- int error = 0;
fstrans_cookie_t cookie;
ASSERT(PageLocked(pp));
- ip = pp->mapping->host;
- pl[0] = pp;
cookie = spl_fstrans_mark();
- error = -zfs_getpage(ip, pl, 1);
+ int error = -zfs_getpage(pp->mapping->host, pp);
spl_fstrans_unmark(cookie);
- if (error) {
- SetPageError(pp);
- ClearPageUptodate(pp);
- } else {
- ClearPageError(pp);
- SetPageUptodate(pp);
- flush_dcache_page(pp);
- }
-
unlock_page(pp);
+
return (error);
}
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 7a7cf927c..ed6ec4205 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -687,7 +687,8 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
tags = ['functional', 'migration']
[tests/functional/mmap]
-tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos', 'mmap_sync_001_pos']
+tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos',
+ 'mmap_sync_001_pos', 'mmap_write_001_pos']
tags = ['functional', 'mmap']
[tests/functional/mount]
diff --git a/tests/zfs-tests/cmd/mmap_sync.c b/tests/zfs-tests/cmd/mmap_sync.c
index e4d190aef..226e71be2 100644
--- a/tests/zfs-tests/cmd/mmap_sync.c
+++ b/tests/zfs-tests/cmd/mmap_sync.c
@@ -59,7 +59,7 @@ main(int argc, char *argv[])
return (1);
}
- int run_time_mins = 5;
+ int run_time_mins = 1;
if (argc >= 2) {
run_time_mins = atoi(argv[1]);
}
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index ad2ec4670..1a039742a 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1491,6 +1491,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/migration/setup.ksh \
functional/mmap/cleanup.ksh \
functional/mmap/mmap_libaio_001_pos.ksh \
+ functional/mmap/mmap_mixed.ksh \
functional/mmap/mmap_read_001_pos.ksh \
functional/mmap/mmap_seek_001_pos.ksh \
functional/mmap/mmap_sync_001_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
new file mode 100755
index 000000000..6c8246d48
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
@@ -0,0 +1,86 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/mmap/mmap.cfg
+
+#
+# DESCRIPTION:
+# Verify mixed buffered and mmap IO.
+#
+# STRATEGY:
+# 1. Create an empty file.
+# 2. Start a background buffered read/write fio to the file.
+# 3. Start a background mmap read/write fio to the file.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ log_must rm -f "$tmp_file"
+}
+
+log_assert "Verify mixed buffered and mmap IO"
+
+log_onexit cleanup
+
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+tmp_file=$mntpnt/file
+bs=$((128 * 1024))
+blocks=64
+size=$((bs * blocks))
+runtime=60
+
+log_must dd if=/dev/zero of=$tmp_file bs=$bs count=$blocks
+
+# Buffered IO writes
+log_must eval "fio --filename=$tmp_file --name=buffer-write \
+ --rw=randwrite --size=$size --bs=$bs --direct=0 --numjobs=1 \
+ --ioengine=sync --fallocate=none --group_reporting --minimal \
+ --runtime=$runtime --time_based --norandommap &"
+
+# Buffered IO reads
+log_must eval "fio --filename=$tmp_file --name=buffer-read \
+ --rw=randread --size=$size --bs=$bs --direct=0 --numjobs=1 \
+ --ioengine=sync --fallocate=none --group_reporting --minimal \
+ --runtime=$runtime --time_based --norandommap &"
+
+# mmap IO writes
+log_must eval "fio --filename=$tmp_file --name=mmap-write \
+ --rw=randwrite --size=$size --bs=$bs --numjobs=1 \
+ --ioengine=mmap --fallocate=none --group_reporting --minimal \
+ --runtime=$runtime --time_based --norandommap &"
+
+# mmap IO reads
+log_must eval "fio --filename=$tmp_file --name=mmap-read \
+ --rw=randread --size=$size --bs=$bs --numjobs=1 \
+ --ioengine=mmap --fallocate=none --group_reporting --minimal \
+ --runtime=$runtime --time_based --norandommap &"
+
+log_must wait
+
+log_pass "Verfied mixed buffered and mmap IO"