aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlek P <[email protected]>2019-02-12 10:41:15 -0800
committerBrian Behlendorf <[email protected]>2019-02-12 10:41:15 -0800
commitdcec0a12c8e480262288ac76f6350652079ea173 (patch)
tree1f23e1c575dc61d00484b627f61e1b5e9ba6bac0
parent425d3237ee88abc53d8522a7139c926d278b4b7f (diff)
port async unlinked drain from illumos-nexenta
This patch is an async implementation of the existing sync zfs_unlinked_drain() function. This function is called at mount time and is responsible for freeing znodes that we didn't get to freeing before. We don't have to hold mounting of the dataset until the unlinked list is fully drained as is done now. Since we can process the unlinked set asynchronously this results in a better user experience when mounting a dataset with entries in the unlinked set. Reviewed by: Jorgen Lundman <[email protected]> Reviewed by: Tom Caputi <[email protected]> Reviewed by: Brian Behlendorf <[email protected]> Reviewed-by: Matt Ahrens <[email protected]> Reviewed by: Paul Dagnelie <[email protected]> Signed-off-by: Alek Pinchuk <[email protected]> Closes #8142
-rw-r--r--include/sys/dataset_kstats.h16
-rw-r--r--include/sys/dsl_pool.h2
-rw-r--r--include/sys/zfs_dir.h1
-rw-r--r--include/sys/zfs_vfsops.h3
-rw-r--r--man/man5/zfs-module-parameters.515
-rw-r--r--module/zfs/dataset_kstats.c29
-rw-r--r--module/zfs/dsl_pool.c10
-rw-r--r--module/zfs/zfs_dir.c65
-rw-r--r--module/zfs/zfs_vfsops.c36
-rw-r--r--module/zfs/zfs_znode.c11
-rw-r--r--tests/runfiles/linux.run2
-rw-r--r--tests/zfs-tests/tests/functional/mount/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh119
13 files changed, 300 insertions, 10 deletions
diff --git a/include/sys/dataset_kstats.h b/include/sys/dataset_kstats.h
index 5dd9a8e61..667d1b85f 100644
--- a/include/sys/dataset_kstats.h
+++ b/include/sys/dataset_kstats.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
*/
#ifndef _SYS_DATASET_KSTATS_H
@@ -35,6 +36,8 @@ typedef struct dataset_aggsum_stats_t {
aggsum_t das_nwritten;
aggsum_t das_reads;
aggsum_t das_nread;
+ aggsum_t das_nunlinks;
+ aggsum_t das_nunlinked;
} dataset_aggsum_stats_t;
typedef struct dataset_kstat_values {
@@ -43,6 +46,16 @@ typedef struct dataset_kstat_values {
kstat_named_t dkv_nwritten;
kstat_named_t dkv_reads;
kstat_named_t dkv_nread;
+ /*
+ * nunlinks is initialized to the unlinked set size on mount and
+ * is incremented whenever a new entry is added to the unlinked set
+ */
+ kstat_named_t dkv_nunlinks;
+ /*
+ * nunlinked is initialized to zero on mount and is incremented when an
+ * entry is removed from the unlinked set
+ */
+ kstat_named_t dkv_nunlinked;
} dataset_kstat_values_t;
typedef struct dataset_kstats {
@@ -56,4 +69,7 @@ void dataset_kstats_destroy(dataset_kstats_t *);
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
+void dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *, int64_t);
+void dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *, int64_t);
+
#endif /* _SYS_DATASET_KSTATS_H */
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index 56317cf73..63ba3509a 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -96,6 +96,7 @@ typedef struct dsl_pool {
struct dsl_dataset *dp_origin_snap;
uint64_t dp_root_dir_obj;
struct taskq *dp_iput_taskq;
+ struct taskq *dp_unlinked_drain_taskq;
/* No lock needed - sync context only */
blkptr_t dp_meta_rootbp;
@@ -176,6 +177,7 @@ boolean_t dsl_pool_config_held(dsl_pool_t *dp);
boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
+taskq_t *dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp);
int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t now, dmu_tx_t *tx);
diff --git a/include/sys/zfs_dir.h b/include/sys/zfs_dir.h
index 9ce3accfc..bcd4ec2c1 100644
--- a/include/sys/zfs_dir.h
+++ b/include/sys/zfs_dir.h
@@ -64,6 +64,7 @@ extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
+extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index 0a4f52f2f..cad0aaece 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -117,6 +117,8 @@ struct zfsvfs {
boolean_t z_replay; /* set during ZIL replay */
boolean_t z_use_sa; /* version allow system attributes */
boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */
+ boolean_t z_draining; /* is true when drain is active */
+ boolean_t z_drain_cancel; /* signal the unlinked drain to stop */
uint64_t z_version; /* ZPL version */
uint64_t z_shares_dir; /* hidden shares dir */
dataset_kstats_t z_kstat; /* fs kstats */
@@ -132,6 +134,7 @@ struct zfsvfs {
uint64_t z_hold_size; /* znode hold array size */
avl_tree_t *z_hold_trees; /* znode hold trees */
kmutex_t *z_hold_locks; /* znode hold locks */
+ taskqid_t z_drain_task; /* task id for the unlink drain task */
};
#define ZSB_XATTR 0x0001 /* Enable user xattrs */
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 7dd333f04..f6b043183 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1152,6 +1152,21 @@ Default value: 20
.sp
.ne 2
.na
+\fBzfs_unlink_suspend_progress\fR (uint)
+.ad
+.RS 12n
+When enabled, files will not be asynchronously removed from the list of pending
+unlinks and the space they consume will be leaked. Once this option has been
+disabled and the dataset is remounted, the pending unlinks will be processed
+and the freed space returned to the pool.
+This option is used by the test suite to facilitate testing.
+.sp
+Uses \fB0\fR (default) to allow progress and \fB1\fR to pause progress.
+.RE
+
+.sp
+.ne 2
+.na
\fBzfs_delete_blocks\fR (ulong)
.ad
.RS 12n
diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c
index ac0ad84ed..522825c42 100644
--- a/module/zfs/dataset_kstats.c
+++ b/module/zfs/dataset_kstats.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
*/
#include <sys/dataset_kstats.h>
@@ -34,6 +35,8 @@ static dataset_kstat_values_t empty_dataset_kstats = {
{ "nwritten", KSTAT_DATA_UINT64 },
{ "reads", KSTAT_DATA_UINT64 },
{ "nread", KSTAT_DATA_UINT64 },
+ { "nunlinks", KSTAT_DATA_UINT64 },
+ { "nunlinked", KSTAT_DATA_UINT64 },
};
static int
@@ -54,6 +57,10 @@ dataset_kstats_update(kstat_t *ksp, int rw)
aggsum_value(&dk->dk_aggsums.das_reads);
dkv->dkv_nread.value.ui64 =
aggsum_value(&dk->dk_aggsums.das_nread);
+ dkv->dkv_nunlinks.value.ui64 =
+ aggsum_value(&dk->dk_aggsums.das_nunlinks);
+ dkv->dkv_nunlinked.value.ui64 =
+ aggsum_value(&dk->dk_aggsums.das_nunlinked);
return (0);
}
@@ -136,6 +143,8 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
aggsum_init(&dk->dk_aggsums.das_reads, 0);
aggsum_init(&dk->dk_aggsums.das_nread, 0);
+ aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
+ aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
}
void
@@ -156,6 +165,8 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
aggsum_fini(&dk->dk_aggsums.das_nwritten);
aggsum_fini(&dk->dk_aggsums.das_reads);
aggsum_fini(&dk->dk_aggsums.das_nread);
+ aggsum_fini(&dk->dk_aggsums.das_nunlinks);
+ aggsum_fini(&dk->dk_aggsums.das_nunlinked);
}
void
@@ -183,3 +194,21 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk,
aggsum_add(&dk->dk_aggsums.das_reads, 1);
aggsum_add(&dk->dk_aggsums.das_nread, nread);
}
+
+void
+dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+ if (dk->dk_kstats == NULL)
+ return;
+
+ aggsum_add(&dk->dk_aggsums.das_nunlinks, delta);
+}
+
+void
+dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+ if (dk->dk_kstats == NULL)
+ return;
+
+ aggsum_add(&dk->dk_aggsums.das_nunlinked, delta);
+}
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index 78e782c81..10e967ab9 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -223,6 +223,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+ dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
+ max_ncpus, defclsyspri, max_ncpus, INT_MAX,
+ TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
return (dp);
}
@@ -413,6 +416,7 @@ dsl_pool_close(dsl_pool_t *dp)
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);
+ taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_iput_taskq);
if (dp->dp_blkstats != NULL) {
mutex_destroy(&dp->dp_blkstats->zab_lock);
@@ -1097,6 +1101,12 @@ dsl_pool_iput_taskq(dsl_pool_t *dp)
return (dp->dp_iput_taskq);
}
+taskq_t *
+dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp)
+{
+ return (dp->dp_unlinked_drain_taskq);
+}
+
/*
* Walk through the pool-wide zap object of temporary snapshot user holds
* and release them.
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index bd173e7c3..63ac97754 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -458,26 +458,31 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
VERIFY3U(0, ==,
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+ dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
}
/*
* Clean up any znodes that had no links when we either crashed or
* (force) umounted the file system.
*/
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+static void
+zfs_unlinked_drain_task(void *arg)
{
+ zfsvfs_t *zfsvfs = arg;
zap_cursor_t zc;
zap_attribute_t zap;
dmu_object_info_t doi;
znode_t *zp;
int error;
+ ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
+
/*
* Iterate over the contents of the unlinked set.
*/
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
- zap_cursor_retrieve(&zc, &zap) == 0;
+ zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
zap_cursor_advance(&zc)) {
/*
@@ -507,9 +512,61 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
continue;
zp->z_unlinked = B_TRUE;
+
+ /*
+ * iput() is Linux's equivalent to illumos' VN_RELE(). It will
+ * decrement the inode's ref count and may cause the inode to be
+ * synchronously freed. We interrupt freeing of this inode, by
+ * checking the return value of dmu_objset_zfs_unmounting() in
+ * dmu_free_long_range(), when an unmount is requested.
+ */
iput(ZTOI(zp));
+ ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
}
zap_cursor_fini(&zc);
+
+ zfsvfs->z_draining = B_FALSE;
+ zfsvfs->z_drain_task = TASKQID_INVALID;
+}
+
+/*
+ * Sets z_draining then tries to dispatch async unlinked drain.
+ * If that fails executes synchronous unlinked drain.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+ ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+ ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
+
+ zfsvfs->z_draining = B_TRUE;
+ zfsvfs->z_drain_cancel = B_FALSE;
+
+ zfsvfs->z_drain_task = taskq_dispatch(
+ dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
+ zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
+ if (zfsvfs->z_drain_task == TASKQID_INVALID) {
+ zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
+ zfs_unlinked_drain_task(zfsvfs);
+ }
+}
+
+/*
+ * Wait for the unlinked drain taskq task to stop. This will interrupt the
+ * unlinked set processing if it is in progress.
+ */
+void
+zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
+{
+ ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+ if (zfsvfs->z_draining) {
+ zfsvfs->z_drain_cancel = B_TRUE;
+ taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
+ dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
+ zfsvfs->z_drain_task = TASKQID_INVALID;
+ zfsvfs->z_draining = B_FALSE;
+ }
}
/*
@@ -684,6 +741,8 @@ zfs_rmnode(znode_t *zp)
VERIFY3U(0, ==,
zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+ dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
+
zfs_znode_delete(zp, tx);
dmu_tx_commit(tx);
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 766cbab74..cdc1bc707 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1178,6 +1178,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
return (error);
}
+ zfsvfs->z_drain_task = TASKQID_INVALID;
+ zfsvfs->z_draining = B_FALSE;
+ zfsvfs->z_drain_cancel = B_TRUE;
+
*zfvp = zfsvfs;
return (0);
}
@@ -1200,14 +1204,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
* operations out since we closed the ZIL.
*/
if (mounting) {
+ ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
+ dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+
/*
* During replay we remove the read only flag to
* allow replays to succeed.
*/
- if (readonly != 0)
+ if (readonly != 0) {
readonly_changed_cb(zfsvfs, B_FALSE);
- else
+ } else {
+ zap_stats_t zs;
+ if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+ &zs) == 0) {
+ dataset_kstats_update_nunlinks_kstat(
+ &zfsvfs->z_kstat, zs.zs_num_entries);
+ }
+ dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+ "num_entries in unlinked set: %llu",
+ zs.zs_num_entries);
zfs_unlinked_drain(zfsvfs);
+ }
/*
* Parse and replay the intent log.
@@ -1250,9 +1267,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/* restore readonly bit */
if (readonly != 0)
readonly_changed_cb(zfsvfs, B_TRUE);
-
- ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
- dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
}
/*
@@ -1633,6 +1647,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
{
znode_t *zp;
+ zfs_unlinked_drain_stop_wait(zfsvfs);
+
/*
* If someone has not already unmounted this file system,
* drain the iput_taskq to ensure all active references to the
@@ -1884,6 +1900,7 @@ zfs_preumount(struct super_block *sb)
/* zfsvfs is NULL when zfs_domount fails during mount */
if (zfsvfs) {
+ zfs_unlinked_drain_stop_wait(zfsvfs);
zfsctl_destroy(sb->s_fs_info);
/*
* Wait for iput_async before entering evict_inodes in
@@ -2159,6 +2176,15 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
}
mutex_exit(&zfsvfs->z_znodes_lock);
+ if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
+ /*
+ * zfs_suspend_fs() could have interrupted freeing
+ * of dnodes. We need to restart this freeing so
+ * that we don't "leak" the space.
+ */
+ zfs_unlinked_drain(zfsvfs);
+ }
+
bail:
/* release the VFS ops */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 7b13927be..761fbcb33 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -92,6 +92,12 @@ static kmem_cache_t *znode_hold_cache = NULL;
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
/*
+ * This is used by the test suite so that it can delay znodes from being
+ * freed in order to inspect the unlinked set.
+ */
+int zfs_unlink_suspend_progress = 0;
+
+/*
* This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
* z_rangelock. It will modify the offset and length of the lock to reflect
* znode-specific information, and convert RL_APPEND to RL_WRITER. This is
@@ -1339,7 +1345,7 @@ zfs_zinactive(znode_t *zp)
*/
if (zp->z_unlinked) {
ASSERT(!zfsvfs->z_issnap);
- if (!zfs_is_readonly(zfsvfs)) {
+ if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
mutex_exit(&zp->z_lock);
zfs_znode_hold_exit(zfsvfs, zh);
zfs_rmnode(zp);
@@ -2214,4 +2220,7 @@ EXPORT_SYMBOL(zfs_obj_to_path);
/* CSTYLED */
module_param(zfs_object_mutex_size, uint, 0644);
MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
+module_param(zfs_unlink_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
+"(debug - leaks space into the unlinked set)");
#endif
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 8ab5e7033..8663c24f9 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -644,7 +644,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
tags = ['functional', 'mmp']
[tests/functional/mount]
-tests = ['umount_001', 'umountall_001']
+tests = ['umount_001', 'umount_unlinked_drain', 'umountall_001']
tags = ['functional', 'mount']
[tests/functional/mv_files]
diff --git a/tests/zfs-tests/tests/functional/mount/Makefile.am b/tests/zfs-tests/tests/functional/mount/Makefile.am
index 9898e0510..bdafa69ba 100644
--- a/tests/zfs-tests/tests/functional/mount/Makefile.am
+++ b/tests/zfs-tests/tests/functional/mount/Makefile.am
@@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
umount_001.ksh \
+ umount_unlinked_drain.ksh \
umountall_001.ksh
diff --git a/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh b/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
new file mode 100755
index 000000000..0d2628079
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
@@ -0,0 +1,119 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Datto Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test async unlinked drain to ensure mounting is not held up when there are
+# entries in the unlinked set. We also try to test that the list is able to be
+# filled up and drained at the same time.
+#
+# STRATEGY:
+# 1. Use zfs_unlink_suspend_progress tunable to disable freeing to build up
+# the unlinked set
+# 2. Make sure mount happens even when there are entries in the unlinked set
+# 3. Drain and build up the unlinked list at the same time to test for races
+#
+
+function cleanup
+{
+ log_must set_tunable32 zfs_unlink_suspend_progress $default_unlink_sp
+ for fs in $(seq 1 3); do
+ mounted $TESTDIR.$fs || zfs mount $TESTPOOL/$TESTFS.$fs
+ rm -f $TESTDIR.$fs/file-*
+ zfs set xattr=on $TESTPOOL/$TESTFS.$fs
+ done
+}
+
+function unlinked_size_is
+{
+ MAX_ITERS=5 # iteration to do before we consider reported number stable
+ iters=0
+ last_usize=0
+ while [[ $iters -le $MAX_ITERS ]]; do
+ kstat_file=$(grep -nrwl /proc/spl/kstat/zfs/$2/objset-0x* -e $3)
+ nunlinks=`cat $kstat_file | grep nunlinks | awk '{print $3}'`
+ nunlinked=`cat $kstat_file | grep nunlinked | awk '{print $3}'`
+ usize=$(($nunlinks - $nunlinked))
+ if [[ $iters == $MAX_ITERS && $usize == $1 ]]; then
+ return 0
+ fi
+ if [[ $usize == $last_usize ]]; then
+ (( iters++ ))
+ else
+ iters=0
+ fi
+ last_usize=$usize
+ done
+
+ log_note "Unexpected unlinked set size: $last_usize, expected $1"
+ return 1
+}
+
+
+UNLINK_SP_PARAM=/sys/module/zfs/parameters/zfs_unlink_suspend_progress
+default_unlink_sp=$(get_tunable zfs_unlink_suspend_progress)
+
+log_onexit cleanup
+
+log_assert "Unlinked list drain does not hold up mounting of fs"
+
+for fs in 1 2 3; do
+ set -A xattrs on sa off
+ for xa in ${xattrs[@]}; do
+ # setup fs and ensure all deleted files got into unliked set
+ log_must mounted $TESTDIR.$fs
+
+ log_must zfs set xattr=$xa $TESTPOOL/$TESTFS.$fs
+
+ if [[ $xa == off ]]; then
+ for fn in $(seq 1 175); do
+ log_must mkfile 128k $TESTDIR.$fs/file-$fn
+ done
+ else
+ log_must xattrtest -f 175 -x 3 -r -k -p $TESTDIR.$fs
+ fi
+
+ log_must set_tunable32 zfs_unlink_suspend_progress 1
+ log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+ # build up unlinked set
+ for fn in $(seq 1 100); do
+ log_must eval "rm $TESTDIR.$fs/file-$fn &"
+ done
+ log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+ # test that we can mount fs without emptying the unlinked list
+ log_must zfs umount $TESTPOOL/$TESTFS.$fs
+ log_must unmounted $TESTDIR.$fs
+ log_must zfs mount $TESTPOOL/$TESTFS.$fs
+ log_must mounted $TESTDIR.$fs
+ log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+ # confirm we can drain and add to unlinked set at the same time
+ log_must set_tunable32 zfs_unlink_suspend_progress 0
+ log_must zfs umount $TESTPOOL/$TESTFS.$fs
+ log_must zfs mount $TESTPOOL/$TESTFS.$fs
+ for fn in $(seq 101 175); do
+ log_must eval "rm $TESTDIR.$fs/file-$fn &"
+ done
+ log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+ done
+done
+
+log_pass "Confirmed unlinked list drain does not hold up mounting of fs"