aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dataset_kstats.c29
-rw-r--r--module/zfs/dsl_pool.c10
-rw-r--r--module/zfs/zfs_dir.c65
-rw-r--r--module/zfs/zfs_vfsops.c36
-rw-r--r--module/zfs/zfs_znode.c11
5 files changed, 142 insertions, 9 deletions
diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c
index ac0ad84ed..522825c42 100644
--- a/module/zfs/dataset_kstats.c
+++ b/module/zfs/dataset_kstats.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
*/
#include <sys/dataset_kstats.h>
@@ -34,6 +35,8 @@ static dataset_kstat_values_t empty_dataset_kstats = {
{ "nwritten", KSTAT_DATA_UINT64 },
{ "reads", KSTAT_DATA_UINT64 },
{ "nread", KSTAT_DATA_UINT64 },
+ { "nunlinks", KSTAT_DATA_UINT64 },
+ { "nunlinked", KSTAT_DATA_UINT64 },
};
static int
@@ -54,6 +57,10 @@ dataset_kstats_update(kstat_t *ksp, int rw)
aggsum_value(&dk->dk_aggsums.das_reads);
dkv->dkv_nread.value.ui64 =
aggsum_value(&dk->dk_aggsums.das_nread);
+ dkv->dkv_nunlinks.value.ui64 =
+ aggsum_value(&dk->dk_aggsums.das_nunlinks);
+ dkv->dkv_nunlinked.value.ui64 =
+ aggsum_value(&dk->dk_aggsums.das_nunlinked);
return (0);
}
@@ -136,6 +143,8 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
aggsum_init(&dk->dk_aggsums.das_reads, 0);
aggsum_init(&dk->dk_aggsums.das_nread, 0);
+ aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
+ aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
}
void
@@ -156,6 +165,8 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
aggsum_fini(&dk->dk_aggsums.das_nwritten);
aggsum_fini(&dk->dk_aggsums.das_reads);
aggsum_fini(&dk->dk_aggsums.das_nread);
+ aggsum_fini(&dk->dk_aggsums.das_nunlinks);
+ aggsum_fini(&dk->dk_aggsums.das_nunlinked);
}
void
@@ -183,3 +194,21 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk,
aggsum_add(&dk->dk_aggsums.das_reads, 1);
aggsum_add(&dk->dk_aggsums.das_nread, nread);
}
+
+void
+dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+ if (dk->dk_kstats == NULL)
+ return;
+
+ aggsum_add(&dk->dk_aggsums.das_nunlinks, delta);
+}
+
+void
+dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+ if (dk->dk_kstats == NULL)
+ return;
+
+ aggsum_add(&dk->dk_aggsums.das_nunlinked, delta);
+}
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index 78e782c81..10e967ab9 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -223,6 +223,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+ dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
+ max_ncpus, defclsyspri, max_ncpus, INT_MAX,
+ TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
return (dp);
}
@@ -413,6 +416,7 @@ dsl_pool_close(dsl_pool_t *dp)
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);
+ taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_iput_taskq);
if (dp->dp_blkstats != NULL) {
mutex_destroy(&dp->dp_blkstats->zab_lock);
@@ -1097,6 +1101,12 @@ dsl_pool_iput_taskq(dsl_pool_t *dp)
return (dp->dp_iput_taskq);
}
+taskq_t *
+dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp)
+{
+ return (dp->dp_unlinked_drain_taskq);
+}
+
/*
* Walk through the pool-wide zap object of temporary snapshot user holds
* and release them.
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index bd173e7c3..63ac97754 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -458,26 +458,31 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
VERIFY3U(0, ==,
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+ dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
}
/*
* Clean up any znodes that had no links when we either crashed or
* (force) umounted the file system.
*/
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+static void
+zfs_unlinked_drain_task(void *arg)
{
+ zfsvfs_t *zfsvfs = arg;
zap_cursor_t zc;
zap_attribute_t zap;
dmu_object_info_t doi;
znode_t *zp;
int error;
+ ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
+
/*
* Iterate over the contents of the unlinked set.
*/
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
- zap_cursor_retrieve(&zc, &zap) == 0;
+ zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
zap_cursor_advance(&zc)) {
/*
@@ -507,9 +512,61 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
continue;
zp->z_unlinked = B_TRUE;
+
+ /*
+ * iput() is Linux's equivalent to illumos' VN_RELE(). It will
+ * decrement the inode's ref count and may cause the inode to be
+ * synchronously freed. We interrupt freeing of this inode, by
+ * checking the return value of dmu_objset_zfs_unmounting() in
+ * dmu_free_long_range(), when an unmount is requested.
+ */
iput(ZTOI(zp));
+ ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
}
zap_cursor_fini(&zc);
+
+ zfsvfs->z_draining = B_FALSE;
+ zfsvfs->z_drain_task = TASKQID_INVALID;
+}
+
+/*
+ * Sets z_draining then tries to dispatch async unlinked drain.
+ * If that fails executes synchronous unlinked drain.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+ ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+ ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
+
+ zfsvfs->z_draining = B_TRUE;
+ zfsvfs->z_drain_cancel = B_FALSE;
+
+ zfsvfs->z_drain_task = taskq_dispatch(
+ dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
+ zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
+ if (zfsvfs->z_drain_task == TASKQID_INVALID) {
+ zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
+ zfs_unlinked_drain_task(zfsvfs);
+ }
+}
+
+/*
+ * Wait for the unlinked drain taskq task to stop. This will interrupt the
+ * unlinked set processing if it is in progress.
+ */
+void
+zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
+{
+ ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+ if (zfsvfs->z_draining) {
+ zfsvfs->z_drain_cancel = B_TRUE;
+ taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
+ dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
+ zfsvfs->z_drain_task = TASKQID_INVALID;
+ zfsvfs->z_draining = B_FALSE;
+ }
}
/*
@@ -684,6 +741,8 @@ zfs_rmnode(znode_t *zp)
VERIFY3U(0, ==,
zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+ dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
+
zfs_znode_delete(zp, tx);
dmu_tx_commit(tx);
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 766cbab74..cdc1bc707 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1178,6 +1178,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
return (error);
}
+ zfsvfs->z_drain_task = TASKQID_INVALID;
+ zfsvfs->z_draining = B_FALSE;
+ zfsvfs->z_drain_cancel = B_TRUE;
+
*zfvp = zfsvfs;
return (0);
}
@@ -1200,14 +1204,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
* operations out since we closed the ZIL.
*/
if (mounting) {
+ ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
+ dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+
/*
* During replay we remove the read only flag to
* allow replays to succeed.
*/
- if (readonly != 0)
+ if (readonly != 0) {
readonly_changed_cb(zfsvfs, B_FALSE);
- else
+ } else {
+ zap_stats_t zs;
+ if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+ &zs) == 0) {
+ dataset_kstats_update_nunlinks_kstat(
+ &zfsvfs->z_kstat, zs.zs_num_entries);
+ }
+ dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+ "num_entries in unlinked set: %llu",
+ zs.zs_num_entries);
zfs_unlinked_drain(zfsvfs);
+ }
/*
* Parse and replay the intent log.
@@ -1250,9 +1267,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
/* restore readonly bit */
if (readonly != 0)
readonly_changed_cb(zfsvfs, B_TRUE);
-
- ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
- dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
}
/*
@@ -1633,6 +1647,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
{
znode_t *zp;
+ zfs_unlinked_drain_stop_wait(zfsvfs);
+
/*
* If someone has not already unmounted this file system,
* drain the iput_taskq to ensure all active references to the
@@ -1884,6 +1900,7 @@ zfs_preumount(struct super_block *sb)
/* zfsvfs is NULL when zfs_domount fails during mount */
if (zfsvfs) {
+ zfs_unlinked_drain_stop_wait(zfsvfs);
zfsctl_destroy(sb->s_fs_info);
/*
* Wait for iput_async before entering evict_inodes in
@@ -2159,6 +2176,15 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
}
mutex_exit(&zfsvfs->z_znodes_lock);
+ if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
+ /*
+ * zfs_suspend_fs() could have interrupted freeing
+ * of dnodes. We need to restart this freeing so
+ * that we don't "leak" the space.
+ */
+ zfs_unlinked_drain(zfsvfs);
+ }
+
bail:
/* release the VFS ops */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 7b13927be..761fbcb33 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -92,6 +92,12 @@ static kmem_cache_t *znode_hold_cache = NULL;
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
/*
+ * This is used by the test suite so that it can delay znodes from being
+ * freed in order to inspect the unlinked set.
+ */
+int zfs_unlink_suspend_progress = 0;
+
+/*
* This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
* z_rangelock. It will modify the offset and length of the lock to reflect
* znode-specific information, and convert RL_APPEND to RL_WRITER. This is
@@ -1339,7 +1345,7 @@ zfs_zinactive(znode_t *zp)
*/
if (zp->z_unlinked) {
ASSERT(!zfsvfs->z_issnap);
- if (!zfs_is_readonly(zfsvfs)) {
+ if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
mutex_exit(&zp->z_lock);
zfs_znode_hold_exit(zfsvfs, zh);
zfs_rmnode(zp);
@@ -2214,4 +2220,7 @@ EXPORT_SYMBOL(zfs_obj_to_path);
/* CSTYLED */
module_param(zfs_object_mutex_size, uint, 0644);
MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
+module_param(zfs_unlink_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
+"(debug - leaks space into the unlinked set)");
#endif