aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dsl_dataset.c28
-rw-r--r--module/zfs/dsl_destroy.c4
-rw-r--r--module/zfs/dsl_dir.c113
-rw-r--r--module/zfs/zfs_ioctl.c82
4 files changed, 217 insertions, 10 deletions
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 3e5a67bdb..2d6e95e31 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname,
static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
{
- boolean_t held;
+ boolean_t held = B_FALSE;
if (!dmu_tx_is_syncing(tx))
return (0);
- if (owner != NULL) {
- VERIFY3P(ds->ds_owner, ==, owner);
- dsl_dataset_long_rele(ds, owner);
- }
-
- held = dsl_dataset_long_held(ds);
-
- if (owner != NULL)
- dsl_dataset_long_hold(ds, owner);
+ dsl_dir_t *dd = ds->ds_dir;
+ mutex_enter(&dd->dd_activity_lock);
+ uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
+ (owner != NULL ? 1 : 0);
+ /*
+ * The value of dd_activity_waiters can chance as soon as we drop the
+ * lock, but we're fine with that; new waiters coming in or old
+ * waiters leaving doesn't cause problems, since we're going to cancel
+ * waiters later anyway. The goal of this check is to verify that no
+ * non-waiters have long-holds, and all new long-holds will be
+ * prevented because we're holding the pool config as writer.
+ */
+ if (holds != dd->dd_activity_waiters)
+ held = B_TRUE;
+ mutex_exit(&dd->dd_activity_lock);
if (held)
return (SET_ERROR(EBUSY));
@@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
DMU_MAX_ACCESS * spa_asize_inflation);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
+ dsl_dir_cancel_waiters(origin_head->ds_dir);
+
/*
* Swap per-dataset feature flags.
*/
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index 01b5f080d..883928f0e 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
return (SET_ERROR(EBUSY));
+ ASSERT0(ds->ds_dir->dd_activity_waiters);
+
mos = ds->ds_dir->dd_pool->dp_meta_objset;
/*
@@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, " ");
+ dsl_dir_cancel_waiters(ds->ds_dir);
+
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 172ebc72c..63ecb1d39 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -51,6 +51,9 @@
#include <sys/zthr.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
/*
* Filesystem and Snapshot Limits
@@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu)
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
}
@@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
}
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
dsl_prop_init(dd);
dsl_dir_snap_cmtime_update(dd);
@@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@@ -310,6 +319,8 @@ errout:
if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
}
}
+static int
+dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
+ zfs_wait_activity_t activity, boolean_t *in_progress)
+{
+ int error = 0;
+
+ ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
+
+ switch (activity) {
+ case ZFS_WAIT_DELETEQ: {
+#ifdef _KERNEL
+ objset_t *os;
+ error = dmu_objset_from_ds(ds, &os);
+ if (error != 0)
+ break;
+
+ mutex_enter(&os->os_user_ptr_lock);
+ void *user = dmu_objset_get_user(os);
+ mutex_exit(&os->os_user_ptr_lock);
+ if (dmu_objset_type(os) != DMU_OST_ZFS ||
+ user == NULL || zfs_get_vfs_flag_unmounted(os)) {
+ *in_progress = B_FALSE;
+ return (0);
+ }
+
+ uint64_t readonly = B_FALSE;
+ error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
+ NULL);
+
+ if (error != 0)
+ break;
+
+ if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
+ *in_progress = B_FALSE;
+ return (0);
+ }
+
+ uint64_t count, unlinked_obj;
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+ &unlinked_obj);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ break;
+ }
+ error = zap_count(os, unlinked_obj, &count);
+
+ if (error == 0)
+ *in_progress = (count != 0);
+ break;
+#else
+ /*
+ * The delete queue is ZPL specific, and libzpool doesn't have
+ * it. It doesn't make sense to wait for it.
+ */
+ *in_progress = B_FALSE;
+ break;
+#endif
+ }
+ default:
+ panic("unrecognized value for activity %d", activity);
+ }
+
+ return (error);
+}
+
+int
+dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
+ boolean_t *waited)
+{
+ int error = 0;
+ boolean_t in_progress;
+ dsl_pool_t *dp = dd->dd_pool;
+ for (;;) {
+ dsl_pool_config_enter(dp, FTAG);
+ error = dsl_dir_activity_in_progress(dd, ds, activity,
+ &in_progress);
+ dsl_pool_config_exit(dp, FTAG);
+ if (error != 0 || !in_progress)
+ break;
+
+ *waited = B_TRUE;
+
+ if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
+ 0 || dd->dd_activity_cancelled) {
+ error = SET_ERROR(EINTR);
+ break;
+ }
+ }
+ return (error);
+}
+
+void
+dsl_dir_cancel_waiters(dsl_dir_t *dd)
+{
+ mutex_enter(&dd->dd_activity_lock);
+ dd->dd_activity_cancelled = B_TRUE;
+ cv_broadcast(&dd->dd_activity_cv);
+ while (dd->dd_activity_waiters > 0)
+ cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
+ mutex_exit(&dd->dd_activity_lock);
+}
+
#if defined(_KERNEL)
EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index d57aef509..fb9435341 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -4073,6 +4073,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
}
/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ * "wait_activity" -> int32_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
+ {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
+};
+
+static int
+zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int32_t activity;
+ boolean_t waited = B_FALSE;
+ int error;
+ dsl_pool_t *dp;
+ dsl_dir_t *dd;
+ dsl_dataset_t *ds;
+
+ if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
+ return (SET_ERROR(EINVAL));
+
+ if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
+ return (error);
+
+ if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+
+ dd = ds->ds_dir;
+ mutex_enter(&dd->dd_activity_lock);
+ dd->dd_activity_waiters++;
+
+ /*
+ * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
+ * aren't evicted while we're waiting. Normally this is prevented by
+ * holding the pool, but we can't do that while we're waiting since
+ * that would prevent TXGs from syncing out. Some of the functionality
+ * of long-holds (e.g. preventing deletion) is unnecessary for this
+ * case, since we would cancel the waiters before proceeding with a
+ * deletion. An alternative mechanism for keeping the dataset around
+ * could be developed but this is simpler.
+ */
+ dsl_dataset_long_hold(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ error = dsl_dir_wait(dd, ds, activity, &waited);
+
+ dsl_dataset_long_rele(ds, FTAG);
+ dd->dd_activity_waiters--;
+ if (dd->dd_activity_waiters == 0)
+ cv_signal(&dd->dd_activity_cv);
+ mutex_exit(&dd->dd_activity_lock);
+
+ dsl_dataset_rele(ds, FTAG);
+
+ if (error == 0)
+ fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
+
+ return (error);
+}
+
+/*
* fsname is name of dataset to rollback (to most recent snapshot)
*
* innvl may contain name of expected target snapshot
@@ -6915,6 +6992,11 @@ zfs_ioctl_init(void)
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
+ zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
+ zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+ zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,