aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2011-08-04 16:25:43 -0700
committerBrian Behlendorf <[email protected]>2011-08-08 13:24:52 -0700
commitf3ab88d6461dec46dea240763843f66300facfab (patch)
treeff5e53dfd289253d96b38347158047b1e26a3f51
parent76659dc110ef2ada13bcb8e4e2ec60d8216c6836 (diff)
Correctly lock pages for .readpages()
Unlike the .readpage() callback which is passed a single locked page to be populated. The .readpages() callback is passed a list of unlocked pages which are all marked for read-ahead (PG_readahead set). It is the responsibly of .readpages() to ensure to pages are properly locked before being populated. Prior to this change the requested read-ahead pages would be updated outside of the page lock which is unsafe. The unlocked pages would then be unlocked again which is harmless but should have been immediately detected as bug. Unfortunately, newer kernels failed detect this issue because the check is done with a VM_BUG_ON which is disabled by default. Luckily, the old Debian Lenny 2.6.26 kernel caught this because it simply uses a BUG_ON. The straight forward fix for this is to update the .readpages() callback to use the read_cache_pages() helper function. The helper function will ensure that each page in the list is properly locked before it is passed to the .readpage() callback. In addition resolving the bug, this results in a nice simplification of the existing code. The downside to this change is that instead of passing one large read request to the dmu multiple smaller ones are submitted. All of these requests however are marked for readahead so the lower layers should issue a large I/O regardless. Thus most of the request should hit the ARC cache. Futher optimization of this code can be done in the future is a perform analysis determines it to be worthwhile. But for the moment, it is preferable that code be correct and understandable. Signed-off-by: Brian Behlendorf <[email protected]> Closes #355
-rw-r--r--module/zfs/zpl_file.c67
1 files changed, 13 insertions, 54 deletions
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index 7eaf65c6e..53921d1a7 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -260,60 +260,6 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
return (error);
}
-static struct page **
-pages_vector_from_list(struct list_head *pages, unsigned nr_pages)
-{
- struct page **pl;
- struct page *t;
- unsigned page_idx;
-
- pl = kmalloc(sizeof(*pl) * nr_pages, GFP_NOFS);
- if (!pl)
- return ERR_PTR(-ENOMEM);
-
- page_idx = 0;
- list_for_each_entry_reverse(t, pages, lru) {
- pl[page_idx] = t;
- page_idx++;
- }
-
- return pl;
-}
-
-static int
-zpl_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
-{
- struct inode *ip;
- struct page **pl;
- struct page *p, *n;
- int error;
-
- ip = mapping->host;
-
- pl = pages_vector_from_list(pages, nr_pages);
- if (IS_ERR(pl))
- return PTR_ERR(pl);
-
- error = -zfs_getpage(ip, pl, nr_pages);
- if (error)
- goto error;
-
- list_for_each_entry_safe_reverse(p, n, pages, lru) {
-
- list_del(&p->lru);
-
- flush_dcache_page(p);
- SetPageUptodate(p);
- unlock_page(p);
- page_cache_release(p);
- }
-
-error:
- kfree(pl);
- return error;
-}
-
/*
* Populate a page with data for the Linux page cache. This function is
* only used to support mmap(2). There will be an identical copy of the
@@ -349,6 +295,19 @@ zpl_readpage(struct file *filp, struct page *pp)
return error;
}
+/*
+ * Populate a set of pages with data for the Linux page cache. This
+ * function will only be called for read ahead and never for demand
+ * paging. For simplicity, the code relies on read_cache_pages() to
+ * correctly lock each page for IO and call zpl_readpage().
+ */
+static int
+zpl_readpages(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ return (read_cache_pages(mapping, pages, zpl_readpage, filp));
+}
+
int
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{