aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dsl_synctask.c
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2019-06-22 16:51:46 -0700
committerBrian Behlendorf <[email protected]>2019-06-22 16:51:46 -0700
commit186898bbb580a830c02d994e961d717f7cf5dcca (patch)
tree3af5af5af4d7bed1bafb671c86f3876f01e0dc57 /module/zfs/dsl_synctask.c
parentcb9e5b7e84654a8c7dba0f9a0d1227f3c8fa1012 (diff)
OpenZFS 9425 - channel programs can be interrupted
Problem Statement ================= ZFS Channel program scripts currently require a timeout, so that hung or long-running scripts return a timeout error instead of causing ZFS to get wedged. This limit can currently be set up to 100 million Lua instructions. Even with a limit in place, it would be desirable to have a sys admin (support engineer) be able to cancel a script that is taking a long time. Proposed Solution ================= Make it possible to abort a channel program by sending an interrupt signal.In the underlying txg_wait_sync function, switch the cv_wait to a cv_wait_sig to catch the signal. Once a signal is encountered, the dsl_sync_task function can install a Lua hook that will get called before the Lua interpreter executes a new line of code. The dsl_sync_task can resume with a standard txg_wait_sync call and wait for the txg to complete. Meanwhile, the hook will abort the script and indicate that the channel program was canceled. The kernel returns a EINTR to indicate that the channel program run was canceled. Porting notes: Added missing return value from cv_wait_sig() Authored by: Don Brady <[email protected]> Reviewed by: Sebastien Roy <[email protected]> Reviewed by: Serapheim Dimitropoulos <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Sara Hartse <[email protected]> Reviewed by: Brian Behlendorf <[email protected]> Approved by: Robert Mustacchi <[email protected]> Ported-by: Don Brady <[email protected]> Signed-off-by: Don Brady <[email protected]> OpenZFS-issue: https://www.illumos.org/issues/9425 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/d0cb1fb926 Closes #8904
Diffstat (limited to 'module/zfs/dsl_synctask.c')
-rw-r--r--module/zfs/dsl_synctask.c24
1 files changed, 21 insertions, 3 deletions
diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c
index b63ce5cad..b225eed37 100644
--- a/module/zfs/dsl_synctask.c
+++ b/module/zfs/dsl_synctask.c
@@ -41,7 +41,7 @@ dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
static int
dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc,
- dsl_syncfunc_t *syncfunc, void *arg,
+ dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
int blocks_modified, zfs_space_check_t space_check, boolean_t early)
{
spa_t *spa;
@@ -85,6 +85,11 @@ top:
dmu_tx_commit(tx);
+ if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) {
+ /* current contract is to call func once */
+ sigfunc(arg, tx);
+ sigfunc = NULL; /* in case we're performing an EAGAIN retry */
+ }
txg_wait_synced(dp, dst.dst_txg);
if (dst.dst_error == EAGAIN) {
@@ -124,7 +129,7 @@ dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg,
int blocks_modified, zfs_space_check_t space_check)
{
- return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg,
+ return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
blocks_modified, space_check, B_FALSE));
}
@@ -146,10 +151,23 @@ dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg,
int blocks_modified, zfs_space_check_t space_check)
{
- return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg,
+ return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
blocks_modified, space_check, B_TRUE));
}
+/*
+ * A standard synctask that can be interrupted from a signal. The sigfunc
+ * is called once if a signal occurred while waiting for the task to sync.
+ */
+int
+dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc,
+ dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
+ int blocks_modified, zfs_space_check_t space_check)
+{
+ return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg,
+ blocks_modified, space_check, B_FALSE));
+}
+
static void
dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx,