summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2015-06-18 09:21:19 -0700
committerBrian Behlendorf <[email protected]>2015-06-22 10:22:49 -0700
commit218b4e0a7608f7ef37ec72042a68c45e539a5d1c (patch)
tree749c64001d694546ef2104418ad05249c8b35912
parent4c6a70091030847ca5d6933916255194483ce91f (diff)
Add zfs_sb_prune_aliases() function
For kernels which do not implement a per-suberblock shrinker, those older than Linux 3.1, the shrink_dcache_parent() function was used to attempt to reclaim dentries. This was found not be entirely reliable and could lead to performance issues on older kernels running meta-data heavy workloads. To address this issue a zfs_sb_prune_aliases() function has been added to implement this functionality. It relies on traversing the list of znodes for a filesystem and adding them to a private list with a reference held. The private list can then be safely walked outside the z_znodes_lock to prune dentires and drop the last reference so the inode can be freed. This provides the same synchronous behavior as the per-filesystem shrinker and has the advantage of depending on only long standing interfaces. Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Tim Chase <[email protected]> Closes #3501
-rw-r--r--config/kernel-d-prune-aliases.m419
-rw-r--r--config/kernel.m41
-rw-r--r--module/zfs/zfs_vfsops.c75
3 files changed, 84 insertions, 11 deletions
diff --git a/config/kernel-d-prune-aliases.m4 b/config/kernel-d-prune-aliases.m4
new file mode 100644
index 000000000..d9c521b1d
--- /dev/null
+++ b/config/kernel-d-prune-aliases.m4
@@ -0,0 +1,19 @@
+dnl #
+dnl # 2.6.12 API change
+dnl # d_prune_aliases() helper function available.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES],
+ [AC_MSG_CHECKING([whether d_prune_aliases() is available])
+ ZFS_LINUX_TRY_COMPILE_SYMBOL([
+ #include <linux/dcache.h>
+ ], [
+ struct inode *ip = NULL;
+ d_prune_aliases(ip);
+ ], [d_prune_aliases], [fs/dcache.c], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1,
+ [d_prune_aliases() is available])
+ ], [
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index fe42e171e..51f8a2bf7 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_D_MAKE_ROOT
ZFS_AC_KERNEL_D_OBTAIN_ALIAS
+ ZFS_AC_KERNEL_D_PRUNE_ALIASES
ZFS_AC_KERNEL_D_SET_D_OP
ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA
ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 88f655a8c..ae1bc324b 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1072,6 +1072,67 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp)
}
EXPORT_SYMBOL(zfs_root);
+#if !defined(HAVE_SPLIT_SHRINKER_CALLBACK) && !defined(HAVE_SHRINK) && \
+ defined(HAVE_D_PRUNE_ALIASES)
+/*
+ * Linux kernels older than 3.1 do not support a per-filesystem shrinker.
+ * To accommodate this we must improvise and manually walk the list of znodes
+ * attempting to prune dentries in order to be able to drop the inodes.
+ *
+ * To avoid scanning the same znodes multiple times they are always rotated
+ * to the end of the z_all_znodes list. New znodes are inserted at the
+ * end of the list so we're always scanning the oldest znodes first.
+ */
+static int
+zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
+{
+ znode_t **zp_array, *zp;
+ int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
+ int objects = 0;
+ int i = 0, j = 0;
+
+ zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
+
+ mutex_enter(&zsb->z_znodes_lock);
+ while ((zp = list_head(&zsb->z_all_znodes)) != NULL) {
+
+ if ((i++ > nr_to_scan) || (j >= max_array))
+ break;
+
+ ASSERT(list_link_active(&zp->z_link_node));
+ list_remove(&zsb->z_all_znodes, zp);
+ list_insert_tail(&zsb->z_all_znodes, zp);
+
+ /* Skip active znodes and .zfs entries */
+ if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
+ continue;
+
+ if (igrab(ZTOI(zp)) == NULL)
+ continue;
+
+ zp_array[j] = zp;
+ j++;
+ }
+ mutex_exit(&zsb->z_znodes_lock);
+
+ for (i = 0; i < j; i++) {
+ zp = zp_array[i];
+
+ ASSERT3P(zp, !=, NULL);
+ d_prune_aliases(ZTOI(zp));
+
+ if (atomic_read(&ZTOI(zp)->i_count) == 1)
+ objects++;
+
+ iput(ZTOI(zp));
+ }
+
+ kmem_free(zp_array, max_array * sizeof (znode_t *));
+
+ return (objects);
+}
+#endif /* HAVE_D_PRUNE_ALIASES */
+
/*
* The ARC has requested that the filesystem drop entries from the dentry
* and inode caches. This can occur when the ARC needs to free meta data
@@ -1106,18 +1167,10 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
*objects = (*shrinker->scan_objects)(shrinker, &sc);
#elif defined(HAVE_SHRINK)
*objects = (*shrinker->shrink)(shrinker, &sc);
+#elif defined(HAVE_D_PRUNE_ALIASES)
+ *objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
#else
- /*
- * Linux kernels older than 3.1 do not support a per-filesystem
- * shrinker. Therefore, we must fall back to the only available
- * interface which is to discard all unused dentries and inodes.
- * This behavior clearly isn't ideal but it's required so the ARC
- * may free memory. The performance impact is mitigated by the
- * fact that the frequently accessed dentry and inode buffers will
- * still be in the ARC making them relatively cheap to recreate.
- */
- *objects = 0;
- shrink_dcache_parent(sb->s_root);
+#error "No available dentry and inode cache pruning mechanism."
#endif
ZFS_EXIT(zsb);