diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dataset_kstats.c | 29 | ||||
-rw-r--r-- | module/zfs/dsl_pool.c | 10 | ||||
-rw-r--r-- | module/zfs/zfs_dir.c | 65 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 36 | ||||
-rw-r--r-- | module/zfs/zfs_znode.c | 11 |
5 files changed, 142 insertions, 9 deletions
diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c index ac0ad84ed..522825c42 100644 --- a/module/zfs/dataset_kstats.c +++ b/module/zfs/dataset_kstats.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2018 by Delphix. All rights reserved. + * Copyright (c) 2018 Datto Inc. */ #include <sys/dataset_kstats.h> @@ -34,6 +35,8 @@ static dataset_kstat_values_t empty_dataset_kstats = { { "nwritten", KSTAT_DATA_UINT64 }, { "reads", KSTAT_DATA_UINT64 }, { "nread", KSTAT_DATA_UINT64 }, + { "nunlinks", KSTAT_DATA_UINT64 }, + { "nunlinked", KSTAT_DATA_UINT64 }, }; static int @@ -54,6 +57,10 @@ dataset_kstats_update(kstat_t *ksp, int rw) aggsum_value(&dk->dk_aggsums.das_reads); dkv->dkv_nread.value.ui64 = aggsum_value(&dk->dk_aggsums.das_nread); + dkv->dkv_nunlinks.value.ui64 = + aggsum_value(&dk->dk_aggsums.das_nunlinks); + dkv->dkv_nunlinked.value.ui64 = + aggsum_value(&dk->dk_aggsums.das_nunlinked); return (0); } @@ -136,6 +143,8 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) aggsum_init(&dk->dk_aggsums.das_nwritten, 0); aggsum_init(&dk->dk_aggsums.das_reads, 0); aggsum_init(&dk->dk_aggsums.das_nread, 0); + aggsum_init(&dk->dk_aggsums.das_nunlinks, 0); + aggsum_init(&dk->dk_aggsums.das_nunlinked, 0); } void @@ -156,6 +165,8 @@ dataset_kstats_destroy(dataset_kstats_t *dk) aggsum_fini(&dk->dk_aggsums.das_nwritten); aggsum_fini(&dk->dk_aggsums.das_reads); aggsum_fini(&dk->dk_aggsums.das_nread); + aggsum_fini(&dk->dk_aggsums.das_nunlinks); + aggsum_fini(&dk->dk_aggsums.das_nunlinked); } void @@ -183,3 +194,21 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk, aggsum_add(&dk->dk_aggsums.das_reads, 1); aggsum_add(&dk->dk_aggsums.das_nread, nread); } + +void +dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta) +{ + if (dk->dk_kstats == NULL) + return; + + aggsum_add(&dk->dk_aggsums.das_nunlinks, delta); +} + +void +dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta) +{ + if (dk->dk_kstats == NULL) + return; + + aggsum_add(&dk->dk_aggsums.das_nunlinked, delta); +} diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 78e782c81..10e967ab9 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -223,6 +223,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri, max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC); + dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain", + max_ncpus, defclsyspri, max_ncpus, INT_MAX, + TASKQ_PREPOPULATE | TASKQ_DYNAMIC); return (dp); } @@ -413,6 +416,7 @@ dsl_pool_close(dsl_pool_t *dp) rrw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); cv_destroy(&dp->dp_spaceavail_cv); + taskq_destroy(dp->dp_unlinked_drain_taskq); taskq_destroy(dp->dp_iput_taskq); if (dp->dp_blkstats != NULL) { mutex_destroy(&dp->dp_blkstats->zab_lock); @@ -1097,6 +1101,12 @@ dsl_pool_iput_taskq(dsl_pool_t *dp) return (dp->dp_iput_taskq); } +taskq_t * +dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp) +{ + return (dp->dp_unlinked_drain_taskq); +} + /* * Walk through the pool-wide zap object of temporary snapshot user holds * and release them. diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index bd173e7c3..63ac97754 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -458,26 +458,31 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) VERIFY3U(0, ==, zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); + + dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1); } /* * Clean up any znodes that had no links when we either crashed or * (force) umounted the file system. */ -void -zfs_unlinked_drain(zfsvfs_t *zfsvfs) +static void +zfs_unlinked_drain_task(void *arg) { + zfsvfs_t *zfsvfs = arg; zap_cursor_t zc; zap_attribute_t zap; dmu_object_info_t doi; znode_t *zp; int error; + ASSERT3B(zfsvfs->z_draining, ==, B_TRUE); + /* * Iterate over the contents of the unlinked set. */ for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); - zap_cursor_retrieve(&zc, &zap) == 0; + zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel; zap_cursor_advance(&zc)) { /* @@ -507,9 +512,61 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) continue; zp->z_unlinked = B_TRUE; + + /* + * iput() is Linux's equivalent to illumos' VN_RELE(). It will + * decrement the inode's ref count and may cause the inode to be + * synchronously freed. We interrupt freeing of this inode, by + * checking the return value of dmu_objset_zfs_unmounting() in + * dmu_free_long_range(), when an unmount is requested. + */ iput(ZTOI(zp)); + ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE); } zap_cursor_fini(&zc); + + zfsvfs->z_draining = B_FALSE; + zfsvfs->z_drain_task = TASKQID_INVALID; +} + +/* + * Sets z_draining then tries to dispatch async unlinked drain. + * If that fails executes synchronous unlinked drain. + */ +void +zfs_unlinked_drain(zfsvfs_t *zfsvfs) +{ + ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE); + ASSERT3B(zfsvfs->z_draining, ==, B_FALSE); + + zfsvfs->z_draining = B_TRUE; + zfsvfs->z_drain_cancel = B_FALSE; + + zfsvfs->z_drain_task = taskq_dispatch( + dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)), + zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP); + if (zfsvfs->z_drain_task == TASKQID_INVALID) { + zfs_dbgmsg("async zfs_unlinked_drain dispatch failed"); + zfs_unlinked_drain_task(zfsvfs); + } +} + +/* + * Wait for the unlinked drain taskq task to stop. This will interrupt the + * unlinked set processing if it is in progress. + */ +void +zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs) +{ + ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE); + + if (zfsvfs->z_draining) { + zfsvfs->z_drain_cancel = B_TRUE; + taskq_cancel_id(dsl_pool_unlinked_drain_taskq( + dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task); + zfsvfs->z_drain_task = TASKQID_INVALID; + zfsvfs->z_draining = B_FALSE; + } } /* @@ -684,6 +741,8 @@ zfs_rmnode(znode_t *zp) VERIFY3U(0, ==, zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); + dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1); + zfs_znode_delete(zp, tx); dmu_tx_commit(tx); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 766cbab74..cdc1bc707 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1178,6 +1178,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) return (error); } + zfsvfs->z_drain_task = TASKQID_INVALID; + zfsvfs->z_draining = B_FALSE; + zfsvfs->z_drain_cancel = B_TRUE; + *zfvp = zfsvfs; return (0); } @@ -1200,14 +1204,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * operations out since we closed the ZIL. */ if (mounting) { + ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); + dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); + /* * During replay we remove the read only flag to * allow replays to succeed. */ - if (readonly != 0) + if (readonly != 0) { readonly_changed_cb(zfsvfs, B_FALSE); - else + } else { + zap_stats_t zs; + if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, + &zs) == 0) { + dataset_kstats_update_nunlinks_kstat( + &zfsvfs->z_kstat, zs.zs_num_entries); + } + dprintf_ds(zfsvfs->z_os->os_dsl_dataset, + "num_entries in unlinked set: %llu", + zs.zs_num_entries); zfs_unlinked_drain(zfsvfs); + } /* * Parse and replay the intent log. @@ -1250,9 +1267,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) /* restore readonly bit */ if (readonly != 0) readonly_changed_cb(zfsvfs, B_TRUE); - - ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); - dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); } /* @@ -1633,6 +1647,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) { znode_t *zp; + zfs_unlinked_drain_stop_wait(zfsvfs); + /* * If someone has not already unmounted this file system, * drain the iput_taskq to ensure all active references to the @@ -1884,6 +1900,7 @@ zfs_preumount(struct super_block *sb) /* zfsvfs is NULL when zfs_domount fails during mount */ if (zfsvfs) { + zfs_unlinked_drain_stop_wait(zfsvfs); zfsctl_destroy(sb->s_fs_info); /* * Wait for iput_async before entering evict_inodes in @@ -2159,6 +2176,15 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) } mutex_exit(&zfsvfs->z_znodes_lock); + if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) { + /* + * zfs_suspend_fs() could have interrupted freeing + * of dnodes. We need to restart this freeing so + * that we don't "leak" the space. + */ + zfs_unlinked_drain(zfsvfs); + } + bail: /* release the VFS ops */ rw_exit(&zfsvfs->z_teardown_inactive_lock); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 7b13927be..761fbcb33 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -92,6 +92,12 @@ static kmem_cache_t *znode_hold_cache = NULL; unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ; /* + * This is used by the test suite so that it can delay znodes from being + * freed in order to inspect the unlinked set. + */ +int zfs_unlink_suspend_progress = 0; + +/* * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on * z_rangelock. It will modify the offset and length of the lock to reflect * znode-specific information, and convert RL_APPEND to RL_WRITER. This is @@ -1339,7 +1345,7 @@ zfs_zinactive(znode_t *zp) */ if (zp->z_unlinked) { ASSERT(!zfsvfs->z_issnap); - if (!zfs_is_readonly(zfsvfs)) { + if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) { mutex_exit(&zp->z_lock); zfs_znode_hold_exit(zfsvfs, zh); zfs_rmnode(zp); @@ -2214,4 +2220,7 @@ EXPORT_SYMBOL(zfs_obj_to_path); /* CSTYLED */ module_param(zfs_object_mutex_size, uint, 0644); MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); +module_param(zfs_unlink_suspend_progress, int, 0644); +MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks " +"(debug - leaks space into the unlinked set)"); #endif |