summaryrefslogtreecommitdiffstats
path: root/module/zfs/zap.c
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2019-06-12 13:13:09 -0700
committerBrian Behlendorf <[email protected]>2019-06-12 13:13:09 -0700
commitd9b4bf0665a0b1a7af2bbb34c60bca612956022d (patch)
treec90c5694edb1dd655ecbe46caac64c2be50229d7 /module/zfs/zap.c
parentd9cd66e45f285356624d26eb92e10e2baf2738ee (diff)
fat zap should prefetch when iterating
When iterating over a ZAP object, we're almost always certain to iterate over the entire object. If there are multiple leaf blocks, we can realize a performance win by issuing reads for all the leaf blocks in parallel when the iteration begins. For example, if we have 10,000 snapshots, "zfs destroy -nv pool/fs@1%9999" can take 30 minutes when the cache is cold. This change provides a >3x performance improvement, by issuing the reads for all ~64 blocks of each ZAP object in parallel. Reviewed-by: Andreas Dilger <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> External-issue: DLPX-58347 Closes #8862
Diffstat (limited to 'module/zfs/zap.c')
-rw-r--r--module/zfs/zap.c56
1 files changed, 55 insertions, 1 deletions
diff --git a/module/zfs/zap.c b/module/zfs/zap.c
index 6d8c49804..30f62ac43 100644
--- a/module/zfs/zap.c
+++ b/module/zfs/zap.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -49,6 +49,36 @@
#include <sys/zap_impl.h>
#include <sys/zap_leaf.h>
+/*
+ * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object
+ * (all leaf blocks) when we start iterating over it.
+ *
+ * For zap_cursor_init(), the callers all intend to iterate through all the
+ * entries. There are a few cases where an error (typically i/o error) could
+ * cause it to bail out early.
+ *
+ * For zap_cursor_init_serialized(), there are callers that do the iteration
+ * outside of ZFS. Typically they would iterate over everything, but we
+ * don't have control of that. E.g. zfs_ioc_snapshot_list_next(),
+ * zcp_snapshots_iter(), and other iterators over things in the MOS - these
+ * are called by /sbin/zfs and channel programs. The other example is
+ * zfs_readdir() which iterates over directory entries for the getdents()
+ * syscall. /sbin/ls iterates to the end (unless it receives a signal), but
+ * userland doesn't have to.
+ *
+ * Given that the ZAP entries aren't returned in a specific order, the only
+ * legitimate use cases for partial iteration would be:
+ *
+ * 1. Pagination: e.g. you only want to display 100 entries at a time, so you
+ * get the first 100 and then wait for the user to hit "next page", which
+ * they may never do).
+ *
+ * 2. You want to know if there are more than X entries, without relying on
+ * the zfs-specific implementation of the directory's st_size (which is
+ * the number of entries).
+ */
+int zap_iterate_prefetch = B_TRUE;
+
int fzap_default_block_shift = 14; /* 16k blocksize */
extern inline zap_phys_t *zap_f_phys(zap_t *zap);
@@ -1189,6 +1219,21 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
/* retrieve the next entry at or after zc_hash/zc_cd */
/* if no entry, return ENOENT */
+ /*
+ * If we are reading from the beginning, we're almost certain to
+ * iterate over the entire ZAP object. If there are multiple leaf
+ * blocks (freeblk > 2), prefetch the whole object (up to
+ * dmu_prefetch_max bytes), so that we read the leaf blocks
+ * concurrently. (Unless noprefetch was requested via
+ * zap_cursor_init_noprefetch()).
+ */
+ if (zc->zc_hash == 0 && zap_iterate_prefetch &&
+ zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
+ dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0,
+ zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+
if (zc->zc_leaf &&
(ZAP_HASH_IDX(zc->zc_hash,
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=
@@ -1333,3 +1378,12 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
}
}
}
+
+#if defined(_KERNEL)
+/* BEGIN CSTYLED */
+module_param(zap_iterate_prefetch, int, 0644);
+MODULE_PARM_DESC(zap_iterate_prefetch,
+ "When iterating ZAP object, prefetch it");
+
+/* END CSTYLED */
+#endif