aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorPaul Dagnelie <[email protected]>2020-04-01 10:02:06 -0700
committerGitHub <[email protected]>2020-04-01 10:02:06 -0700
commit5a42ef04fd390dc96fbbf31bc9f3d05695998211 (patch)
treeee4aec968084618faa92988b08a3c41c9b904327 /module
parentc9e3efdb3a6111b9795becc6594b3c52ba004522 (diff)
Add 'zfs wait' command
Add a mechanism to wait for delete queue to drain. When doing redacted send/recv, many workflows involve deleting files that contain sensitive data. Because of the way zfs handles file deletions, snapshots taken quickly after a rm operation can sometimes still contain the file in question, especially if the file is very large. This can result in issues for redacted send/recv users who expect the deleted files to be redacted in the send streams, and not appear in their clones. This change duplicates much of the zpool wait related logic into a zfs wait command, which can be used to wait until the internal deleteq has been drained. Additional wait activities may be added in the future. Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: John Gallagher <[email protected]> Signed-off-by: Paul Dagnelie <[email protected]> Closes #9707
Diffstat (limited to 'module')
-rw-r--r--module/os/linux/zfs/zfs_dir.c11
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c6
-rw-r--r--module/zfs/dsl_dataset.c28
-rw-r--r--module/zfs/dsl_destroy.c4
-rw-r--r--module/zfs/dsl_dir.c113
-rw-r--r--module/zfs/zfs_ioctl.c82
6 files changed, 234 insertions, 10 deletions
diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c
index 7ebf38ddb..591e35fd1 100644
--- a/module/os/linux/zfs/zfs_dir.c
+++ b/module/os/linux/zfs/zfs_dir.c
@@ -52,6 +52,8 @@
#include <sys/zfs_fuid.h>
#include <sys/sa.h>
#include <sys/zfs_sa.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
/*
* zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
@@ -739,6 +741,8 @@ zfs_rmnode(znode_t *zp)
zfs_unlinked_add(xzp, tx);
}
+ mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
/*
* Remove this znode from the unlinked set. If a has rollback has
* occurred while a file is open and unlinked. Then when the file
@@ -749,6 +753,13 @@ zfs_rmnode(znode_t *zp)
zp->z_id, tx);
VERIFY(error == 0 || error == ENOENT);
+ uint64_t count;
+ if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
+ cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
+ }
+
+ mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
zfs_znode_delete(zp, tx);
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 478e07862..b6757d1bc 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -55,6 +55,7 @@
#include <sys/zfs_quota.h>
#include <sys/sunddi.h>
#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
#include <sys/spa_boot.h>
#include <sys/objlist.h>
#include <sys/zpl.h>
@@ -872,6 +873,8 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
"num_entries in unlinked set: %llu",
zs.zs_num_entries);
zfs_unlinked_drain(zfsvfs);
+ dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
+ dd->dd_activity_cancelled = B_FALSE;
}
/*
@@ -1423,6 +1426,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
}
dmu_objset_evict_dbufs(zfsvfs->z_os);
+ dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
+ dsl_dir_cancel_waiters(dd);
return (0);
}
@@ -1813,6 +1818,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
if (err != 0)
goto bail;
+ ds->ds_dir->dd_activity_cancelled = B_FALSE;
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
zfs_set_fuid_feature(zfsvfs);
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 3e5a67bdb..2d6e95e31 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname,
static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
{
- boolean_t held;
+ boolean_t held = B_FALSE;
if (!dmu_tx_is_syncing(tx))
return (0);
- if (owner != NULL) {
- VERIFY3P(ds->ds_owner, ==, owner);
- dsl_dataset_long_rele(ds, owner);
- }
-
- held = dsl_dataset_long_held(ds);
-
- if (owner != NULL)
- dsl_dataset_long_hold(ds, owner);
+ dsl_dir_t *dd = ds->ds_dir;
+ mutex_enter(&dd->dd_activity_lock);
+ uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
+ (owner != NULL ? 1 : 0);
+ /*
+ * The value of dd_activity_waiters can chance as soon as we drop the
+ * lock, but we're fine with that; new waiters coming in or old
+ * waiters leaving doesn't cause problems, since we're going to cancel
+ * waiters later anyway. The goal of this check is to verify that no
+ * non-waiters have long-holds, and all new long-holds will be
+ * prevented because we're holding the pool config as writer.
+ */
+ if (holds != dd->dd_activity_waiters)
+ held = B_TRUE;
+ mutex_exit(&dd->dd_activity_lock);
if (held)
return (SET_ERROR(EBUSY));
@@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
DMU_MAX_ACCESS * spa_asize_inflation);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
+ dsl_dir_cancel_waiters(origin_head->ds_dir);
+
/*
* Swap per-dataset feature flags.
*/
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index 01b5f080d..883928f0e 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
return (SET_ERROR(EBUSY));
+ ASSERT0(ds->ds_dir->dd_activity_waiters);
+
mos = ds->ds_dir->dd_pool->dp_meta_objset;
/*
@@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, " ");
+ dsl_dir_cancel_waiters(ds->ds_dir);
+
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 172ebc72c..63ecb1d39 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -51,6 +51,9 @@
#include <sys/zthr.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
/*
* Filesystem and Snapshot Limits
@@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu)
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
}
@@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
}
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
dsl_prop_init(dd);
dsl_dir_snap_cmtime_update(dd);
@@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@@ -310,6 +319,8 @@ errout:
if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
}
}
+static int
+dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
+ zfs_wait_activity_t activity, boolean_t *in_progress)
+{
+ int error = 0;
+
+ ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
+
+ switch (activity) {
+ case ZFS_WAIT_DELETEQ: {
+#ifdef _KERNEL
+ objset_t *os;
+ error = dmu_objset_from_ds(ds, &os);
+ if (error != 0)
+ break;
+
+ mutex_enter(&os->os_user_ptr_lock);
+ void *user = dmu_objset_get_user(os);
+ mutex_exit(&os->os_user_ptr_lock);
+ if (dmu_objset_type(os) != DMU_OST_ZFS ||
+ user == NULL || zfs_get_vfs_flag_unmounted(os)) {
+ *in_progress = B_FALSE;
+ return (0);
+ }
+
+ uint64_t readonly = B_FALSE;
+ error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
+ NULL);
+
+ if (error != 0)
+ break;
+
+ if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
+ *in_progress = B_FALSE;
+ return (0);
+ }
+
+ uint64_t count, unlinked_obj;
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+ &unlinked_obj);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ break;
+ }
+ error = zap_count(os, unlinked_obj, &count);
+
+ if (error == 0)
+ *in_progress = (count != 0);
+ break;
+#else
+ /*
+ * The delete queue is ZPL specific, and libzpool doesn't have
+ * it. It doesn't make sense to wait for it.
+ */
+ *in_progress = B_FALSE;
+ break;
+#endif
+ }
+ default:
+ panic("unrecognized value for activity %d", activity);
+ }
+
+ return (error);
+}
+
+int
+dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
+ boolean_t *waited)
+{
+ int error = 0;
+ boolean_t in_progress;
+ dsl_pool_t *dp = dd->dd_pool;
+ for (;;) {
+ dsl_pool_config_enter(dp, FTAG);
+ error = dsl_dir_activity_in_progress(dd, ds, activity,
+ &in_progress);
+ dsl_pool_config_exit(dp, FTAG);
+ if (error != 0 || !in_progress)
+ break;
+
+ *waited = B_TRUE;
+
+ if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
+ 0 || dd->dd_activity_cancelled) {
+ error = SET_ERROR(EINTR);
+ break;
+ }
+ }
+ return (error);
+}
+
+void
+dsl_dir_cancel_waiters(dsl_dir_t *dd)
+{
+ mutex_enter(&dd->dd_activity_lock);
+ dd->dd_activity_cancelled = B_TRUE;
+ cv_broadcast(&dd->dd_activity_cv);
+ while (dd->dd_activity_waiters > 0)
+ cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
+ mutex_exit(&dd->dd_activity_lock);
+}
+
#if defined(_KERNEL)
EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index d57aef509..fb9435341 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -4073,6 +4073,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
}
/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ * "wait_activity" -> int32_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
+ {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
+};
+
+static int
+zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int32_t activity;
+ boolean_t waited = B_FALSE;
+ int error;
+ dsl_pool_t *dp;
+ dsl_dir_t *dd;
+ dsl_dataset_t *ds;
+
+ if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
+ return (SET_ERROR(EINVAL));
+
+ if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
+ return (error);
+
+ if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+
+ dd = ds->ds_dir;
+ mutex_enter(&dd->dd_activity_lock);
+ dd->dd_activity_waiters++;
+
+ /*
+ * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
+ * aren't evicted while we're waiting. Normally this is prevented by
+ * holding the pool, but we can't do that while we're waiting since
+ * that would prevent TXGs from syncing out. Some of the functionality
+ * of long-holds (e.g. preventing deletion) is unnecessary for this
+ * case, since we would cancel the waiters before proceeding with a
+ * deletion. An alternative mechanism for keeping the dataset around
+ * could be developed but this is simpler.
+ */
+ dsl_dataset_long_hold(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ error = dsl_dir_wait(dd, ds, activity, &waited);
+
+ dsl_dataset_long_rele(ds, FTAG);
+ dd->dd_activity_waiters--;
+ if (dd->dd_activity_waiters == 0)
+ cv_signal(&dd->dd_activity_cv);
+ mutex_exit(&dd->dd_activity_lock);
+
+ dsl_dataset_rele(ds, FTAG);
+
+ if (error == 0)
+ fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
+
+ return (error);
+}
+
+/*
* fsname is name of dataset to rollback (to most recent snapshot)
*
* innvl may contain name of expected target snapshot
@@ -6915,6 +6992,11 @@ zfs_ioctl_init(void)
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
+ zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
+ zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+ zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,