diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dsl_dataset.c | 28 | ||||
-rw-r--r-- | module/zfs/dsl_destroy.c | 4 | ||||
-rw-r--r-- | module/zfs/dsl_dir.c | 113 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 82 |
4 files changed, 217 insertions, 10 deletions
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 3e5a67bdb..2d6e95e31 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname, static int dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) { - boolean_t held; + boolean_t held = B_FALSE; if (!dmu_tx_is_syncing(tx)) return (0); - if (owner != NULL) { - VERIFY3P(ds->ds_owner, ==, owner); - dsl_dataset_long_rele(ds, owner); - } - - held = dsl_dataset_long_held(ds); - - if (owner != NULL) - dsl_dataset_long_hold(ds, owner); + dsl_dir_t *dd = ds->ds_dir; + mutex_enter(&dd->dd_activity_lock); + uint64_t holds = zfs_refcount_count(&ds->ds_longholds) - + (owner != NULL ? 1 : 0); + /* + * The value of dd_activity_waiters can chance as soon as we drop the + * lock, but we're fine with that; new waiters coming in or old + * waiters leaving doesn't cause problems, since we're going to cancel + * waiters later anyway. The goal of this check is to verify that no + * non-waiters have long-holds, and all new long-holds will be + * prevented because we're holding the pool config as writer. + */ + if (holds != dd->dd_activity_waiters) + held = B_TRUE; + mutex_exit(&dd->dd_activity_lock); if (held) return (SET_ERROR(EBUSY)); @@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, DMU_MAX_ACCESS * spa_asize_inflation); ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); + dsl_dir_cancel_waiters(origin_head->ds_dir); + /* * Swap per-dataset feature flags. */ diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 01b5f080d..883928f0e 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) if (zfs_refcount_count(&ds->ds_longholds) != expected_holds) return (SET_ERROR(EBUSY)); + ASSERT0(ds->ds_dir->dd_activity_waiters); + mos = ds->ds_dir->dd_pool->dp_meta_objset; /* @@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) /* We need to log before removing it from the namespace. */ spa_history_log_internal_ds(ds, "destroy", tx, " "); + dsl_dir_cancel_waiters(ds->ds_dir); + rmorigin = (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 172ebc72c..63ecb1d39 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -51,6 +51,9 @@ #include <sys/zthr.h> #include "zfs_namecheck.h" #include "zfs_prop.h" +#ifdef _KERNEL +#include <sys/zfs_vfsops.h> +#endif /* * Filesystem and Snapshot Limits @@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); + cv_destroy(&dd->dd_activity_cv); + mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); } @@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, } mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL); dsl_prop_init(dd); dsl_dir_snap_cmtime_update(dd); @@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); + cv_destroy(&dd->dd_activity_cv); + mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dd = winner; @@ -310,6 +319,8 @@ errout: if (dsl_deadlist_is_open(&dd->dd_livelist)) dsl_dir_livelist_close(dd); dsl_prop_fini(dd); + cv_destroy(&dd->dd_activity_cv); + mutex_destroy(&dd->dd_activity_lock); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); @@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total) } } +static int +dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds, + zfs_wait_activity_t activity, boolean_t *in_progress) +{ + int error = 0; + + ASSERT(MUTEX_HELD(&dd->dd_activity_lock)); + + switch (activity) { + case ZFS_WAIT_DELETEQ: { +#ifdef _KERNEL + objset_t *os; + error = dmu_objset_from_ds(ds, &os); + if (error != 0) + break; + + mutex_enter(&os->os_user_ptr_lock); + void *user = dmu_objset_get_user(os); + mutex_exit(&os->os_user_ptr_lock); + if (dmu_objset_type(os) != DMU_OST_ZFS || + user == NULL || zfs_get_vfs_flag_unmounted(os)) { + *in_progress = B_FALSE; + return (0); + } + + uint64_t readonly = B_FALSE; + error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly, + NULL); + + if (error != 0) + break; + + if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) { + *in_progress = B_FALSE; + return (0); + } + + uint64_t count, unlinked_obj; + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, + &unlinked_obj); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + break; + } + error = zap_count(os, unlinked_obj, &count); + + if (error == 0) + *in_progress = (count != 0); + break; +#else + /* + * The delete queue is ZPL specific, and libzpool doesn't have + * it. It doesn't make sense to wait for it. + */ + *in_progress = B_FALSE; + break; +#endif + } + default: + panic("unrecognized value for activity %d", activity); + } + + return (error); +} + +int +dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, + boolean_t *waited) +{ + int error = 0; + boolean_t in_progress; + dsl_pool_t *dp = dd->dd_pool; + for (;;) { + dsl_pool_config_enter(dp, FTAG); + error = dsl_dir_activity_in_progress(dd, ds, activity, + &in_progress); + dsl_pool_config_exit(dp, FTAG); + if (error != 0 || !in_progress) + break; + + *waited = B_TRUE; + + if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) == + 0 || dd->dd_activity_cancelled) { + error = SET_ERROR(EINTR); + break; + } + } + return (error); +} + +void +dsl_dir_cancel_waiters(dsl_dir_t *dd) +{ + mutex_enter(&dd->dd_activity_lock); + dd->dd_activity_cancelled = B_TRUE; + cv_broadcast(&dd->dd_activity_cv); + while (dd->dd_activity_waiters > 0) + cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock); + mutex_exit(&dd->dd_activity_lock); +} + #if defined(_KERNEL) EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index d57aef509..fb9435341 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -4073,6 +4073,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl) } /* + * This ioctl waits for activity of a particular type to complete. If there is + * no activity of that type in progress, it returns immediately, and the + * returned value "waited" is false. If there is activity in progress, and no + * tag is passed in, the ioctl blocks until all activity of that type is + * complete, and then returns with "waited" set to true. + * + * If a thread waiting in the ioctl receives a signal, the call will return + * immediately, and the return value will be EINTR. + * + * innvl: { + * "wait_activity" -> int32_t + * } + * + * outnvl: "waited" -> boolean_t + */ +static const zfs_ioc_key_t zfs_keys_fs_wait[] = { + {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0}, +}; + +static int +zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + int32_t activity; + boolean_t waited = B_FALSE; + int error; + dsl_pool_t *dp; + dsl_dir_t *dd; + dsl_dataset_t *ds; + + if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0) + return (SET_ERROR(EINVAL)); + + if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0) + return (SET_ERROR(EINVAL)); + + if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0) + return (error); + + if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + + dd = ds->ds_dir; + mutex_enter(&dd->dd_activity_lock); + dd->dd_activity_waiters++; + + /* + * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t + * aren't evicted while we're waiting. Normally this is prevented by + * holding the pool, but we can't do that while we're waiting since + * that would prevent TXGs from syncing out. Some of the functionality + * of long-holds (e.g. preventing deletion) is unnecessary for this + * case, since we would cancel the waiters before proceeding with a + * deletion. An alternative mechanism for keeping the dataset around + * could be developed but this is simpler. + */ + dsl_dataset_long_hold(ds, FTAG); + dsl_pool_rele(dp, FTAG); + + error = dsl_dir_wait(dd, ds, activity, &waited); + + dsl_dataset_long_rele(ds, FTAG); + dd->dd_activity_waiters--; + if (dd->dd_activity_waiters == 0) + cv_signal(&dd->dd_activity_cv); + mutex_exit(&dd->dd_activity_lock); + + dsl_dataset_rele(ds, FTAG); + + if (error == 0) + fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited); + + return (error); +} + +/* * fsname is name of dataset to rollback (to most recent snapshot) * * innvl may contain name of expected target snapshot @@ -6915,6 +6992,11 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait)); + zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS, + zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, + zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, |