diff options
author | Rob Norris <[email protected]> | 2023-12-11 16:05:54 +1100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2024-03-28 13:29:46 -0700 |
commit | 52a2af6fd16470dd4ba8f7a1def459b43081ae5b (patch) | |
tree | 0297e82d9575101d462bee46fbf92984c4d034e2 /module/os/linux/zfs | |
parent | 220bb7341eb4b41f017cec8f492e0daf18660d4e (diff) |
abd: add page iterator
The regular ABD iterators yield data buffers, so they have to map and
unmap pages into kernel memory. If the caller only wants to count
chunks, or can use page pointers directly, then the map/unmap is just
unnecessary overhead.
This adds adb_iterate_page_func, which yields unmapped struct page
instead.
Reviewed-by: Alexander Motin <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Rob Norris <[email protected]>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit 390b448726c580999dd337be7a40b0e95cf1d50b)
Diffstat (limited to 'module/os/linux/zfs')
-rw-r--r-- | module/os/linux/zfs/abd_os.c | 104 |
1 files changed, 96 insertions, 8 deletions
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 24390fbbf..dae128012 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2014 by Chunwei Chen. All rights reserved. * Copyright (c) 2019 by Delphix. All rights reserved. + * Copyright (c) 2023, 2024, Klara Inc. */ /* @@ -59,6 +60,7 @@ #include <sys/zfs_znode.h> #ifdef _KERNEL #include <linux/kmap_compat.h> +#include <linux/mm_compat.h> #include <linux/scatterlist.h> #endif @@ -895,14 +897,9 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd) { ASSERT(!abd_is_gang(abd)); abd_verify(abd); + memset(aiter, 0, sizeof (struct abd_iter)); aiter->iter_abd = abd; - aiter->iter_mapaddr = NULL; - aiter->iter_mapsize = 0; - aiter->iter_pos = 0; - if (abd_is_linear(abd)) { - aiter->iter_offset = 0; - aiter->iter_sg = NULL; - } else { + if (!abd_is_linear(abd)) { aiter->iter_offset = ABD_SCATTER(abd).abd_offset; aiter->iter_sg = ABD_SCATTER(abd).abd_sgl; } @@ -915,6 +912,7 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd) boolean_t abd_iter_at_end(struct abd_iter *aiter) { + ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size); return (aiter->iter_pos == aiter->iter_abd->abd_size); } @@ -926,8 +924,15 @@ abd_iter_at_end(struct abd_iter *aiter) void abd_iter_advance(struct abd_iter *aiter, size_t amount) { + /* + * Ensure that last chunk is not in use. abd_iterate_*() must clear + * this state (directly or abd_iter_unmap()) before advancing. + */ ASSERT3P(aiter->iter_mapaddr, ==, NULL); ASSERT0(aiter->iter_mapsize); + ASSERT3P(aiter->iter_page, ==, NULL); + ASSERT0(aiter->iter_page_doff); + ASSERT0(aiter->iter_page_dsize); /* There's nothing left to advance to, so do nothing */ if (abd_iter_at_end(aiter)) @@ -1010,6 +1015,88 @@ abd_cache_reap_now(void) #if defined(_KERNEL) /* + * Yield the next page struct and data offset and size within it, without + * mapping it into the address space. + */ +void +abd_iter_page(struct abd_iter *aiter) +{ + if (abd_iter_at_end(aiter)) { + aiter->iter_page = NULL; + aiter->iter_page_doff = 0; + aiter->iter_page_dsize = 0; + return; + } + + struct page *page; + size_t doff, dsize; + + if (abd_is_linear(aiter->iter_abd)) { + ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset); + + /* memory address at iter_pos */ + void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos; + + /* struct page for address */ + page = is_vmalloc_addr(paddr) ? + vmalloc_to_page(paddr) : virt_to_page(paddr); + + /* offset of address within the page */ + doff = offset_in_page(paddr); + + /* total data remaining in abd from this position */ + dsize = aiter->iter_abd->abd_size - aiter->iter_offset; + } else { + ASSERT(!abd_is_gang(aiter->iter_abd)); + + /* current scatter page */ + page = sg_page(aiter->iter_sg); + + /* position within page */ + doff = aiter->iter_offset; + + /* remaining data in scatterlist */ + dsize = MIN(aiter->iter_sg->length - aiter->iter_offset, + aiter->iter_abd->abd_size - aiter->iter_pos); + } + ASSERT(page); + + if (PageTail(page)) { + /* + * This page is part of a "compound page", which is a group of + * pages that can be referenced from a single struct page *. + * Its organised as a "head" page, followed by a series of + * "tail" pages. + * + * In OpenZFS, compound pages are allocated using the + * __GFP_COMP flag, which we get from scatter ABDs and SPL + * vmalloc slabs (ie >16K allocations). So a great many of the + * IO buffers we get are going to be of this type. + * + * The tail pages are just regular PAGE_SIZE pages, and can be + * safely used as-is. However, the head page has length + * covering itself and all the tail pages. If this ABD chunk + * spans multiple pages, then we can use the head page and a + * >PAGE_SIZE length, which is far more efficient. + * + * To do this, we need to adjust the offset to be counted from + * the head page. struct page for compound pages are stored + * contiguously, so we can just adjust by a simple offset. + */ + struct page *head = compound_head(page); + doff += ((page - head) * PAGESIZE); + page = head; + } + + /* final page and position within it */ + aiter->iter_page = page; + aiter->iter_page_doff = doff; + + /* amount of data in the chunk, up to the end of the page */ + aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff); +} + +/* * bio_nr_pages for ABD. * @off is the offset in @abd */ @@ -1163,4 +1250,5 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size, module_param(zfs_abd_scatter_max_order, uint, 0644); MODULE_PARM_DESC(zfs_abd_scatter_max_order, "Maximum order allocation used for a scatter ABD."); -#endif + +#endif /* _KERNEL */ |