summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/zfs_vfsops.h1
-rw-r--r--include/sys/zpl.h2
-rw-r--r--man/man5/zfs-module-parameters.515
-rw-r--r--module/zfs/arc.c6
-rw-r--r--module/zfs/zfs_vfsops.c37
-rw-r--r--module/zfs/zpl_super.c41
6 files changed, 59 insertions, 43 deletions
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index eeeffbe4c..4b88260de 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -73,6 +73,7 @@ typedef struct zfs_sb {
uint64_t z_nr_znodes; /* number of znodes in the fs */
unsigned long z_rollback_time; /* last online rollback time */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
+ arc_prune_t *z_arc_prune; /* called by ARC to prune caches */
struct inode *z_ctldir; /* .zfs directory inode */
avl_tree_t z_ctldir_snaps; /* .zfs/snapshot entries */
kmutex_t z_ctldir_lock; /* .zfs ctldir lock */
diff --git a/include/sys/zpl.h b/include/sys/zpl.h
index 3fc5d979f..c7701aae5 100644
--- a/include/sys/zpl.h
+++ b/include/sys/zpl.h
@@ -63,7 +63,7 @@ extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;
/* zpl_super.c */
-extern void zpl_prune_sbs(int64_t bytes_to_scan, void *private);
+extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
typedef struct zpl_mount_data {
const char *z_osname; /* Dataset name */
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 321b6285c..4b3dc3666 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -386,7 +386,11 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable.
\fBzfs_arc_meta_limit\fR (ulong)
.ad
.RS 12n
-Meta limit for arc size
+The maximum allowed size in bytes that meta data buffers are allowed to
+consume in the ARC. When this limit is reached meta data buffers will
+be reclaimed even if the overall arc_c_max has not been reached. This
+value defaults to 0 which indicates that 3/4 of the ARC may be used
+for meta data.
.sp
Default value: \fB0\fR.
.RE
@@ -397,9 +401,14 @@ Default value: \fB0\fR.
\fBzfs_arc_meta_prune\fR (int)
.ad
.RS 12n
-Bytes of meta data to prune
+The number of dentries and inodes to be scanned looking for entries
+which can be dropped. This may be required when the ARC reaches the
+\fBzfs_arc_meta_limit\fR because dentries and inodes can pin buffers
+in the ARC. Increasing this value will cause to dentry and inode caches
+to be pruned more aggressively. Setting this value to 0 will disable
+pruning the inode and dentry caches.
.sp
-Default value: \fB1,048,576\fR.
+Default value: \fB10,000\fR.
.RE
.sp
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 188086767..f9f0008c0 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -158,8 +158,8 @@ static kmutex_t arc_reclaim_thr_lock;
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
static uint8_t arc_thread_exit;
-/* number of bytes to prune from caches when at arc_meta_limit is reached */
-int zfs_arc_meta_prune = 1048576;
+/* number of objects to prune from caches when arc_meta_limit is reached */
+int zfs_arc_meta_prune = 10000;
typedef enum arc_reclaim_strategy {
ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */
@@ -5607,7 +5607,7 @@ module_param(zfs_arc_meta_limit, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
module_param(zfs_arc_meta_prune, int, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune");
+MODULE_PARM_DESC(zfs_arc_meta_prune, "Meta objects to scan for prune");
module_param(zfs_arc_grow_retry, int, 0644);
MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 4df324a68..e98f4bf6a 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1068,29 +1068,52 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp)
}
EXPORT_SYMBOL(zfs_root);
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
+/*
+ * The ARC has requested that the filesystem drop entries from the dentry
+ * and inode caches. This can occur when the ARC needs to free meta data
+ * blocks but can't because they are all pinned by entries in these caches.
+ */
int
zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
{
zfs_sb_t *zsb = sb->s_fs_info;
+ int error = 0;
+#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
struct shrinker *shrinker = &sb->s_shrink;
struct shrink_control sc = {
.nr_to_scan = nr_to_scan,
.gfp_mask = GFP_KERNEL,
};
+#endif
ZFS_ENTER(zsb);
-#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
+
+#if defined(HAVE_SPLIT_SHRINKER_CALLBACK)
*objects = (*shrinker->scan_objects)(shrinker, &sc);
-#else
+#elif defined(HAVE_SHRINK)
*objects = (*shrinker->shrink)(shrinker, &sc);
+#else
+ /*
+ * Linux kernels older than 3.1 do not support a per-filesystem
+ * shrinker. Therefore, we must fall back to the only available
+ * interface which is to discard all unused dentries and inodes.
+ * This behavior clearly isn't ideal but it's required so the ARC
+ * may free memory. The performance impact is mitigated by the
+ * fact that the frequently accessed dentry and inode buffers will
+ * still be in the ARC making them relatively cheap to recreate.
+ */
+ *objects = 0;
+ shrink_dcache_parent(sb->s_root);
#endif
ZFS_EXIT(zsb);
- return (0);
+ dprintf_ds(zsb->z_os->os_dsl_dataset,
+ "pruning, nr_to_scan=%lu objects=%d error=%d\n",
+ nr_to_scan, *objects, error);
+
+ return (error);
}
EXPORT_SYMBOL(zfs_sb_prune);
-#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */
/*
* Teardown the zfs_sb_t.
@@ -1286,6 +1309,8 @@ zfs_domount(struct super_block *sb, void *data, int silent)
if (!zsb->z_issnap)
zfsctl_create(zsb);
+
+ zsb->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
out:
if (error) {
dmu_objset_disown(zsb->z_os, zsb);
@@ -1324,6 +1349,7 @@ zfs_umount(struct super_block *sb)
zfs_sb_t *zsb = sb->s_fs_info;
objset_t *os;
+ arc_remove_prune_callback(zsb->z_arc_prune);
VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0);
os = zsb->z_os;
bdi_destroy(sb->s_bdi);
@@ -1682,7 +1708,6 @@ zfs_init(void)
zfs_znode_init();
dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
register_filesystem(&zpl_fs_type);
- (void) arc_add_prune_callback(zpl_prune_sbs, NULL);
}
void
diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c
index 47cc2fcf4..ef0f9d311 100644
--- a/module/zfs/zpl_super.c
+++ b/module/zfs/zpl_super.c
@@ -110,6 +110,12 @@ zpl_evict_inode(struct inode *ip)
#else
static void
+zpl_drop_inode(struct inode *ip)
+{
+ generic_delete_inode(ip);
+}
+
+static void
zpl_clear_inode(struct inode *ip)
{
fstrans_cookie_t cookie;
@@ -125,7 +131,6 @@ zpl_inode_delete(struct inode *ip)
truncate_setsize(ip, 0);
clear_inode(ip);
}
-
#endif /* HAVE_EVICT_INODE */
static void
@@ -276,37 +281,13 @@ zpl_kill_sb(struct super_block *sb)
#endif /* HAVE_S_INSTANCES_LIST_HEAD */
}
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-/*
- * Linux 3.1 - 3.x API
- *
- * The Linux 3.1 API introduced per-sb cache shrinkers to replace the
- * global ones. This allows us a mechanism to cleanly target a specific
- * zfs file system when the dnode and inode caches grow too large.
- *
- * In addition, the 3.0 kernel added the iterate_supers_type() helper
- * function which is used to safely walk all of the zfs file systems.
- */
-static void
-zpl_prune_sb(struct super_block *sb, void *arg)
-{
- int objects = 0;
- int error;
-
- error = -zfs_sb_prune(sb, *(unsigned long *)arg, &objects);
- ASSERT3S(error, <=, 0);
-}
-#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */
-
void
-zpl_prune_sbs(int64_t bytes_to_scan, void *private)
+zpl_prune_sb(int64_t nr_to_scan, void *arg)
{
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
- unsigned long nr_to_scan = (bytes_to_scan / sizeof (znode_t));
+ struct super_block *sb = (struct super_block *)arg;
+ int objects = 0;
- iterate_supers_type(&zpl_fs_type, zpl_prune_sb, &nr_to_scan);
- kmem_reap();
-#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */
+ (void) -zfs_sb_prune(sb, nr_to_scan, &objects);
}
#ifdef HAVE_NR_CACHED_OBJECTS
@@ -343,10 +324,10 @@ const struct super_operations zpl_super_operations = {
.destroy_inode = zpl_inode_destroy,
.dirty_inode = zpl_dirty_inode,
.write_inode = NULL,
- .drop_inode = NULL,
#ifdef HAVE_EVICT_INODE
.evict_inode = zpl_evict_inode,
#else
+ .drop_inode = zpl_drop_inode,
.clear_inode = zpl_clear_inode,
.delete_inode = zpl_inode_delete,
#endif /* HAVE_EVICT_INODE */