summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zfs/zfs_main.c91
-rw-r--r--configure.ac1
-rw-r--r--include/libzfs.h3
-rw-r--r--include/libzfs_core.h1
-rw-r--r--include/sys/dsl_dir.h8
-rw-r--r--include/sys/fs/zfs.h12
-rw-r--r--lib/libzfs/libzfs_dataset.c28
-rw-r--r--lib/libzfs_core/libzfs_core.c20
-rw-r--r--man/man8/Makefile.am1
-rw-r--r--man/man8/zfs-wait.871
-rw-r--r--man/man8/zfs.85
-rw-r--r--module/os/linux/zfs/zfs_dir.c11
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c6
-rw-r--r--module/zfs/dsl_dataset.c28
-rw-r--r--module/zfs/dsl_destroy.c4
-rw-r--r--module/zfs/dsl_dir.c113
-rw-r--r--module/zfs/zfs_ioctl.c82
-rw-r--r--tests/runfiles/common.run4
-rw-r--r--tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c14
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/Makefile.am1
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am8
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh20
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh21
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib80
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh57
25 files changed, 679 insertions, 11 deletions
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 5e5bbc972..ae71cdc88 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -122,6 +122,7 @@ static int zfs_do_change_key(int argc, char **argv);
static int zfs_do_project(int argc, char **argv);
static int zfs_do_version(int argc, char **argv);
static int zfs_do_redact(int argc, char **argv);
+static int zfs_do_wait(int argc, char **argv);
#ifdef __FreeBSD__
static int zfs_do_jail(int argc, char **argv);
@@ -183,7 +184,8 @@ typedef enum {
HELP_VERSION,
HELP_REDACT,
HELP_JAIL,
- HELP_UNJAIL
+ HELP_UNJAIL,
+ HELP_WAIT,
} zfs_help_t;
typedef struct zfs_command {
@@ -248,6 +250,7 @@ static zfs_command_t command_table[] = {
{ "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
{ "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
{ "redact", zfs_do_redact, HELP_REDACT },
+ { "wait", zfs_do_wait, HELP_WAIT },
#ifdef __FreeBSD__
{ "jail", zfs_do_jail, HELP_JAIL },
@@ -410,6 +413,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
case HELP_UNJAIL:
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
+ case HELP_WAIT:
+ return (gettext("\twait [-t <activity>] <filesystem>\n"));
}
abort();
@@ -8317,6 +8322,90 @@ zfs_do_project(int argc, char **argv)
return (ret);
}
+static int
+zfs_do_wait(int argc, char **argv)
+{
+ boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
+ int error, i;
+ char c;
+
+ /* By default, wait for all types of activity. */
+ for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
+ enabled[i] = B_TRUE;
+
+ while ((c = getopt(argc, argv, "t:")) != -1) {
+ switch (c) {
+ case 't':
+ {
+ static char *col_subopts[] = { "deleteq", NULL };
+ char *value;
+
+ /* Reset activities array */
+ bzero(&enabled, sizeof (enabled));
+ while (*optarg != '\0') {
+ int activity = getsubopt(&optarg, col_subopts,
+ &value);
+
+ if (activity < 0) {
+ (void) fprintf(stderr,
+ gettext("invalid activity '%s'\n"),
+ value);
+ usage(B_FALSE);
+ }
+
+ enabled[activity] = B_TRUE;
+ }
+ break;
+ }
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("missing 'filesystem' "
+ "argument\n"));
+ usage(B_FALSE);
+ }
+ if (argc > 1) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM);
+ if (zhp == NULL)
+ return (1);
+
+ for (;;) {
+ boolean_t missing = B_FALSE;
+ boolean_t any_waited = B_FALSE;
+
+ for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) {
+ boolean_t waited;
+
+ if (!enabled[i])
+ continue;
+
+ error = zfs_wait_status(zhp, i, &missing, &waited);
+ if (error != 0 || missing)
+ break;
+
+ any_waited = (any_waited || waited);
+ }
+
+ if (error != 0 || missing || !any_waited)
+ break;
+ }
+
+ zfs_close(zhp);
+
+ return (error);
+}
+
/*
* Display version message
*/
diff --git a/configure.ac b/configure.ac
index eeb0a3843..370a1970f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -264,6 +264,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
+ tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile
diff --git a/include/libzfs.h b/include/libzfs.h
index 236a73130..7633579d4 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -507,6 +507,9 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
+extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t,
+ boolean_t *, boolean_t *);
+
/*
* zfs encryption management
*/
diff --git a/include/libzfs_core.h b/include/libzfs_core.h
index c4b4f8e71..18ce6994a 100644
--- a/include/libzfs_core.h
+++ b/include/libzfs_core.h
@@ -133,6 +133,7 @@ int lzc_pool_checkpoint_discard(const char *);
int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);
+int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
#ifdef __cplusplus
}
diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
index bb6921027..88fd61035 100644
--- a/include/sys/dsl_dir.h
+++ b/include/sys/dsl_dir.h
@@ -121,6 +121,11 @@ struct dsl_dir {
bplist_t dd_pending_frees;
bplist_t dd_pending_allocs;
+ kmutex_t dd_activity_lock;
+ kcondvar_t dd_activity_cv;
+ boolean_t dd_activity_cancelled;
+ uint64_t dd_activity_waiters;
+
/* protected by dd_lock; keep at end of struct for better locality */
char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
};
@@ -192,6 +197,9 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd);
void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj);
void dsl_dir_livelist_close(dsl_dir_t *dd);
void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total);
+int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
+ boolean_t *waited);
+void dsl_dir_cancel_waiters(dsl_dir_t *dd);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 3484b13e3..477356aa7 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1282,6 +1282,7 @@ typedef enum zfs_ioc {
ZFS_IOC_REDACT, /* 0x5a51 */
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
ZFS_IOC_WAIT, /* 0x5a53 */
+ ZFS_IOC_WAIT_FS, /* 0x5a54 */
/*
* Per-platform (Optional) - 6/128 numbers reserved.
@@ -1358,6 +1359,11 @@ typedef enum {
ZPOOL_WAIT_NUM_ACTIVITIES
} zpool_wait_activity_t;
+typedef enum {
+ ZFS_WAIT_DELETEQ,
+ ZFS_WAIT_NUM_ACTIVITIES
+} zfs_wait_activity_t;
+
/*
* Bookmark name values.
*/
@@ -1416,6 +1422,12 @@ typedef enum {
#define ZPOOL_WAIT_WAITED "wait_waited"
/*
+ * The following are names used when invoking ZFS_IOC_WAIT_FS.
+ */
+#define ZFS_WAIT_ACTIVITY "wait_activity"
+#define ZFS_WAIT_WAITED "wait_waited"
+
+/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
#define ZFS_ONLINE_CHECKREMOVE 0x1
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 48d656323..45e7a79fb 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -5599,3 +5599,31 @@ zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize,
volsize += numdb;
return (volsize);
}
+
+/*
+ * Wait for the given activity and return the status of the wait (whether or not
+ * any waiting was done) in the 'waited' parameter. Non-existent fses are
+ * reported via the 'missing' parameter, rather than by printing an error
+ * message. This is convenient when this function is called in a loop over a
+ * long period of time (as it is, for example, by zfs's wait cmd). In that
+ * scenario, a fs being exported or destroyed should be considered a normal
+ * event, so we don't want to print an error when we find that the fs doesn't
+ * exist.
+ */
+int
+zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity,
+ boolean_t *missing, boolean_t *waited)
+{
+ int error = lzc_wait_fs(zhp->zfs_name, activity, waited);
+ *missing = (error == ENOENT);
+ if (*missing)
+ return (0);
+
+ if (error != 0) {
+ (void) zfs_standard_error_fmt(zhp->zfs_hdl, error,
+ dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"),
+ zhp->zfs_name);
+ }
+
+ return (error);
+}
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
index f65db4ff4..18143d364 100644
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -1621,3 +1621,23 @@ lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
{
return (wait_common(pool, activity, B_TRUE, tag, waited));
}
+
+int
+lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
+{
+ nvlist_t *args = fnvlist_alloc();
+ nvlist_t *result = NULL;
+
+ fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
+
+ int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
+
+ if (error == 0 && waited != NULL)
+ *waited = fnvlist_lookup_boolean_value(result,
+ ZFS_WAIT_WAITED);
+
+ fnvlist_free(args);
+ fnvlist_free(result);
+
+ return (error);
+}
diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am
index f81a1f672..8239c2157 100644
--- a/man/man8/Makefile.am
+++ b/man/man8/Makefile.am
@@ -41,6 +41,7 @@ dist_man_MANS = \
zfs-unmount.8 \
zfs-upgrade.8 \
zfs-userspace.8 \
+ zfs-wait.8 \
zgenhostid.8 \
zinject.8 \
zpool.8 \
diff --git a/man/man8/zfs-wait.8 b/man/man8/zfs-wait.8
new file mode 100644
index 000000000..dcc679bb0
--- /dev/null
+++ b/man/man8/zfs-wait.8
@@ -0,0 +1,71 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
+.\" Copyright (c) 2017 Datto Inc.
+.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
+.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\"
+.Dd August 9, 2019
+.Dt ZFS-WAIT 8
+.Os Linux
+.Sh NAME
+.Nm zfs Ns Pf - Cm wait
+.Nd Wait for background activity to stop in a ZFS filesystem
+.Sh SYNOPSIS
+.Nm
+.Cm wait
+.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
+.Ar fs
+.Sh DESCRIPTION
+.Bl -tag -width Ds
+.It Xo
+.Nm
+.Cm wait
+.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
+.Ar fs
+.Xc
+Waits until all background activity of the given types has ceased in the given
+filesystem.
+The activity could cease because it has completed or because the filesystem has
+been destroyed or unmounted.
+If no activities are specified, the command waits until background activity of
+every type listed below has ceased.
+If there is no activity of the given types in progress, the command returns
+immediately.
+.Pp
+These are the possible values for
+.Ar activity ,
+along with what each one waits for:
+.Bd -literal
+ deleteq The filesystem's internal delete queue to empty
+.Ed
+.Pp
+Note that the internal delete queue does not finish draining until
+all large files have had time to be fully destroyed and all open file
+handles to unlinked files are closed.
+.El
+.El
+.Sh SEE ALSO
+.Xr lsof 8
diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
index eb6e0e33e..587f16c4e 100644
--- a/man/man8/zfs.8
+++ b/man/man8/zfs.8
@@ -281,6 +281,11 @@ Attaches a filesystem to a jail.
.It Xr zfs-unjail 8
Detaches a filesystem from a jail.
.El
+.Ss Waiting
+.Bl -tag -width ""
+.It Xr zfs-wait 8
+Wait for background activity in a filesystem to complete.
+.El
.Sh EXIT STATUS
The
.Nm
diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c
index 7ebf38ddb..591e35fd1 100644
--- a/module/os/linux/zfs/zfs_dir.c
+++ b/module/os/linux/zfs/zfs_dir.c
@@ -52,6 +52,8 @@
#include <sys/zfs_fuid.h>
#include <sys/sa.h>
#include <sys/zfs_sa.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
/*
* zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
@@ -739,6 +741,8 @@ zfs_rmnode(znode_t *zp)
zfs_unlinked_add(xzp, tx);
}
+ mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
/*
* Remove this znode from the unlinked set. If a has rollback has
* occurred while a file is open and unlinked. Then when the file
@@ -749,6 +753,13 @@ zfs_rmnode(znode_t *zp)
zp->z_id, tx);
VERIFY(error == 0 || error == ENOENT);
+ uint64_t count;
+ if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
+ cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
+ }
+
+ mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
zfs_znode_delete(zp, tx);
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 478e07862..b6757d1bc 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -55,6 +55,7 @@
#include <sys/zfs_quota.h>
#include <sys/sunddi.h>
#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
#include <sys/spa_boot.h>
#include <sys/objlist.h>
#include <sys/zpl.h>
@@ -872,6 +873,8 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
"num_entries in unlinked set: %llu",
zs.zs_num_entries);
zfs_unlinked_drain(zfsvfs);
+ dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
+ dd->dd_activity_cancelled = B_FALSE;
}
/*
@@ -1423,6 +1426,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
}
dmu_objset_evict_dbufs(zfsvfs->z_os);
+ dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
+ dsl_dir_cancel_waiters(dd);
return (0);
}
@@ -1813,6 +1818,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
if (err != 0)
goto bail;
+ ds->ds_dir->dd_activity_cancelled = B_FALSE;
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
zfs_set_fuid_feature(zfsvfs);
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 3e5a67bdb..2d6e95e31 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname,
static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
{
- boolean_t held;
+ boolean_t held = B_FALSE;
if (!dmu_tx_is_syncing(tx))
return (0);
- if (owner != NULL) {
- VERIFY3P(ds->ds_owner, ==, owner);
- dsl_dataset_long_rele(ds, owner);
- }
-
- held = dsl_dataset_long_held(ds);
-
- if (owner != NULL)
- dsl_dataset_long_hold(ds, owner);
+ dsl_dir_t *dd = ds->ds_dir;
+ mutex_enter(&dd->dd_activity_lock);
+ uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
+ (owner != NULL ? 1 : 0);
+ /*
+ * The value of dd_activity_waiters can chance as soon as we drop the
+ * lock, but we're fine with that; new waiters coming in or old
+ * waiters leaving doesn't cause problems, since we're going to cancel
+ * waiters later anyway. The goal of this check is to verify that no
+ * non-waiters have long-holds, and all new long-holds will be
+ * prevented because we're holding the pool config as writer.
+ */
+ if (holds != dd->dd_activity_waiters)
+ held = B_TRUE;
+ mutex_exit(&dd->dd_activity_lock);
if (held)
return (SET_ERROR(EBUSY));
@@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
DMU_MAX_ACCESS * spa_asize_inflation);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
+ dsl_dir_cancel_waiters(origin_head->ds_dir);
+
/*
* Swap per-dataset feature flags.
*/
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index 01b5f080d..883928f0e 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
return (SET_ERROR(EBUSY));
+ ASSERT0(ds->ds_dir->dd_activity_waiters);
+
mos = ds->ds_dir->dd_pool->dp_meta_objset;
/*
@@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, " ");
+ dsl_dir_cancel_waiters(ds->ds_dir);
+
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 172ebc72c..63ecb1d39 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -51,6 +51,9 @@
#include <sys/zthr.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
/*
* Filesystem and Snapshot Limits
@@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu)
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
}
@@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
}
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
dsl_prop_init(dd);
dsl_dir_snap_cmtime_update(dd);
@@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@@ -310,6 +319,8 @@ errout:
if (dsl_deadlist_is_open(&dd->dd_livelist))
dsl_dir_livelist_close(dd);
dsl_prop_fini(dd);
+ cv_destroy(&dd->dd_activity_cv);
+ mutex_destroy(&dd->dd_activity_lock);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
}
}
+static int
+dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
+ zfs_wait_activity_t activity, boolean_t *in_progress)
+{
+ int error = 0;
+
+ ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
+
+ switch (activity) {
+ case ZFS_WAIT_DELETEQ: {
+#ifdef _KERNEL
+ objset_t *os;
+ error = dmu_objset_from_ds(ds, &os);
+ if (error != 0)
+ break;
+
+ mutex_enter(&os->os_user_ptr_lock);
+ void *user = dmu_objset_get_user(os);
+ mutex_exit(&os->os_user_ptr_lock);
+ if (dmu_objset_type(os) != DMU_OST_ZFS ||
+ user == NULL || zfs_get_vfs_flag_unmounted(os)) {
+ *in_progress = B_FALSE;
+ return (0);
+ }
+
+ uint64_t readonly = B_FALSE;
+ error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
+ NULL);
+
+ if (error != 0)
+ break;
+
+ if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
+ *in_progress = B_FALSE;
+ return (0);
+ }
+
+ uint64_t count, unlinked_obj;
+ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+ &unlinked_obj);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ break;
+ }
+ error = zap_count(os, unlinked_obj, &count);
+
+ if (error == 0)
+ *in_progress = (count != 0);
+ break;
+#else
+ /*
+ * The delete queue is ZPL specific, and libzpool doesn't have
+ * it. It doesn't make sense to wait for it.
+ */
+ *in_progress = B_FALSE;
+ break;
+#endif
+ }
+ default:
+ panic("unrecognized value for activity %d", activity);
+ }
+
+ return (error);
+}
+
+int
+dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
+ boolean_t *waited)
+{
+ int error = 0;
+ boolean_t in_progress;
+ dsl_pool_t *dp = dd->dd_pool;
+ for (;;) {
+ dsl_pool_config_enter(dp, FTAG);
+ error = dsl_dir_activity_in_progress(dd, ds, activity,
+ &in_progress);
+ dsl_pool_config_exit(dp, FTAG);
+ if (error != 0 || !in_progress)
+ break;
+
+ *waited = B_TRUE;
+
+ if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
+ 0 || dd->dd_activity_cancelled) {
+ error = SET_ERROR(EINTR);
+ break;
+ }
+ }
+ return (error);
+}
+
+void
+dsl_dir_cancel_waiters(dsl_dir_t *dd)
+{
+ mutex_enter(&dd->dd_activity_lock);
+ dd->dd_activity_cancelled = B_TRUE;
+ cv_broadcast(&dd->dd_activity_cv);
+ while (dd->dd_activity_waiters > 0)
+ cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
+ mutex_exit(&dd->dd_activity_lock);
+}
+
#if defined(_KERNEL)
EXPORT_SYMBOL(dsl_dir_set_quota);
EXPORT_SYMBOL(dsl_dir_set_reservation);
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index d57aef509..fb9435341 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -4073,6 +4073,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
}
/*
+ * This ioctl waits for activity of a particular type to complete. If there is
+ * no activity of that type in progress, it returns immediately, and the
+ * returned value "waited" is false. If there is activity in progress, and no
+ * tag is passed in, the ioctl blocks until all activity of that type is
+ * complete, and then returns with "waited" set to true.
+ *
+ * If a thread waiting in the ioctl receives a signal, the call will return
+ * immediately, and the return value will be EINTR.
+ *
+ * innvl: {
+ * "wait_activity" -> int32_t
+ * }
+ *
+ * outnvl: "waited" -> boolean_t
+ */
+static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
+ {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
+};
+
+static int
+zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int32_t activity;
+ boolean_t waited = B_FALSE;
+ int error;
+ dsl_pool_t *dp;
+ dsl_dir_t *dd;
+ dsl_dataset_t *ds;
+
+ if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
+ return (SET_ERROR(EINVAL));
+
+ if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
+ return (error);
+
+ if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+
+ dd = ds->ds_dir;
+ mutex_enter(&dd->dd_activity_lock);
+ dd->dd_activity_waiters++;
+
+ /*
+ * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
+ * aren't evicted while we're waiting. Normally this is prevented by
+ * holding the pool, but we can't do that while we're waiting since
+ * that would prevent TXGs from syncing out. Some of the functionality
+ * of long-holds (e.g. preventing deletion) is unnecessary for this
+ * case, since we would cancel the waiters before proceeding with a
+ * deletion. An alternative mechanism for keeping the dataset around
+ * could be developed but this is simpler.
+ */
+ dsl_dataset_long_hold(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ error = dsl_dir_wait(dd, ds, activity, &waited);
+
+ dsl_dataset_long_rele(ds, FTAG);
+ dd->dd_activity_waiters--;
+ if (dd->dd_activity_waiters == 0)
+ cv_signal(&dd->dd_activity_cv);
+ mutex_exit(&dd->dd_activity_lock);
+
+ dsl_dataset_rele(ds, FTAG);
+
+ if (error == 0)
+ fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
+
+ return (error);
+}
+
+/*
* fsname is name of dataset to rollback (to most recent snapshot)
*
* innvl may contain name of expected target snapshot
@@ -6915,6 +6992,11 @@ zfs_ioctl_init(void)
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
+ zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
+ zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
+ zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 84ea70f07..af720ad9b 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -288,6 +288,10 @@ tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos',
'zfs_upgrade_007_neg']
tags = ['functional', 'cli_root', 'zfs_upgrade']
+[tests/functional/cli_root/zfs_wait]
+tests = ['zfs_wait_deleteq']
+tags = ['functional', 'cli_root', 'zfs_wait']
+
[tests/functional/cli_root/zpool]
tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors']
tags = ['functional', 'cli_root', 'zpool']
diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
index 47e8ff5e2..3f6147509 100644
--- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
+++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
@@ -740,6 +740,18 @@ test_wait(const char *pool)
}
static void
+test_wait_fs(const char *dataset)
+{
+ nvlist_t *required = fnvlist_alloc();
+
+ fnvlist_add_int32(required, "wait_activity", 2);
+
+ IOC_INPUT_TEST(ZFS_IOC_WAIT_FS, dataset, required, NULL, EINVAL);
+
+ nvlist_free(required);
+}
+
+static void
zfs_ioc_input_tests(const char *pool)
{
char filepath[] = "/tmp/ioc_test_file_XXXXXX";
@@ -826,6 +838,7 @@ zfs_ioc_input_tests(const char *pool)
test_vdev_trim(pool);
test_wait(pool);
+ test_wait_fs(dataset);
/*
* cleanup
@@ -980,6 +993,7 @@ validate_ioc_values(void)
CHECK(ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT);
CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS);
CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT);
+ CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS);
CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK);
diff --git a/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/Makefile.am
index 01af9d6b9..8d99df09f 100644
--- a/tests/zfs-tests/tests/functional/cli_root/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/Makefile.am
@@ -32,6 +32,7 @@ SUBDIRS = \
zfs_unmount \
zfs_unshare \
zfs_upgrade \
+ zfs_wait \
zpool \
zpool_add \
zpool_attach \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am
new file mode 100644
index 000000000..d401fe68b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile.am
@@ -0,0 +1,8 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_wait
+dist_pkgdata_SCRIPTS = \
+ setup.ksh \
+ cleanup.ksh \
+ zfs_wait_deleteq.ksh
+
+dist_pkgdata_DATA = \
+ zfs_wait.kshlib
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh
new file mode 100755
index 000000000..456d2d0c2
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh
@@ -0,0 +1,20 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh
new file mode 100755
index 000000000..cca05fee7
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh
@@ -0,0 +1,21 @@
+#!/bin/ksh -p
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+DISK=${DISKS%% *}
+
+default_setup $DISK
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
new file mode 100644
index 000000000..9f62a7c92
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
@@ -0,0 +1,80 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+#
+
+typeset -a disk_array=($(find_disks $DISKS))
+
+typeset -r DISK1=${disk_array[0]}
+typeset -r DISK2=${disk_array[1]}
+typeset -r DISK3=${disk_array[2]}
+
+#
+# When the condition it is waiting for becomes true, 'zfs wait' should return
+# promptly. We want to enforce this, but any check will be racey because it will
+# take some small but indeterminate amount of time for the waiting thread to be
+# woken up and for the process to exit.
+#
+# To deal with this, we provide a grace period after the condition becomes true
+# during which 'zfs wait' can exit. If it hasn't exited by the time the grace
+# period expires we assume something is wrong and fail the test. While there is
+# no value that can really be correct, the idea is we choose something large
+# enough that it shouldn't cause issues in practice.
+#
+typeset -r WAIT_EXIT_GRACE=2.0
+
+function proc_exists # pid
+{
+ ps -p $1 >/dev/null
+}
+
+function proc_must_exist # pid
+{
+ proc_exists $1 || log_fail "zpool process exited too soon"
+}
+
+function proc_must_not_exist # pid
+{
+ proc_exists $1 && log_fail "zpool process took too long to exit"
+}
+
+function get_time
+{
+ date +'%H:%M:%S'
+}
+
+function kill_if_running
+{
+ typeset pid=$1
+ [[ $pid ]] && proc_exists $pid && log_must kill -s TERM $pid
+}
+
+# Log a command and then start it running in the background
+function log_bkgrnd
+{
+ log_note "$(get_time) Starting cmd in background '$@'"
+ "$@" &
+}
+
+# Check that a background process has completed and exited with a status of 0
+function bkgrnd_proc_succeeded
+{
+ typeset pid=$1
+
+ log_must sleep $WAIT_EXIT_GRACE
+
+ proc_must_not_exist $pid
+ wait $pid || log_fail "process exited with status $?"
+ log_note "$(get_time) wait completed successfully"
+}
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh
new file mode 100755
index 000000000..00c5a109c
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh
@@ -0,0 +1,57 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs wait' works when waiting for checkpoint discard to complete.
+#
+# STRATEGY:
+# 1. Create a file
+# 2. Open a file descriptor pointing to that file.
+# 3. Delete the file.
+# 4. Start a background process waiting for the delete queue to empty.
+# 5. Verify that the command doesn't return immediately.
+# 6. Close the open file descriptor.
+# 7. Verify that the command returns soon after the descriptor is closed.
+#
+
+function cleanup
+{
+ kill_if_running $pid
+ exec 3<&-
+}
+
+
+typeset -r TESTFILE="/$TESTPOOL/testfile"
+typeset pid
+
+log_onexit cleanup
+
+log_must touch $TESTFILE
+exec 3<> $TESTFILE
+log_must rm $TESTFILE
+log_bkgrnd zfs wait -t deleteq $TESTPOOL
+pid=$!
+proc_must_exist $pid
+
+exec 3<&-
+log_must sleep 0.5
+bkgrnd_proc_succeeded $pid
+
+log_pass "'zfs wait -t discard' works."