aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2019-06-22 16:51:46 -0700
committerBrian Behlendorf <[email protected]>2019-06-22 16:51:46 -0700
commit186898bbb580a830c02d994e961d717f7cf5dcca (patch)
tree3af5af5af4d7bed1bafb671c86f3876f01e0dc57 /include/sys
parentcb9e5b7e84654a8c7dba0f9a0d1227f3c8fa1012 (diff)
OpenZFS 9425 - channel programs can be interrupted
Problem Statement ================= ZFS Channel program scripts currently require a timeout, so that hung or long-running scripts return a timeout error instead of causing ZFS to get wedged. This limit can currently be set up to 100 million Lua instructions. Even with a limit in place, it would be desirable to have a sys admin (support engineer) be able to cancel a script that is taking a long time. Proposed Solution ================= Make it possible to abort a channel program by sending an interrupt signal.In the underlying txg_wait_sync function, switch the cv_wait to a cv_wait_sig to catch the signal. Once a signal is encountered, the dsl_sync_task function can install a Lua hook that will get called before the Lua interpreter executes a new line of code. The dsl_sync_task can resume with a standard txg_wait_sync call and wait for the txg to complete. Meanwhile, the hook will abort the script and indicate that the channel program was canceled. The kernel returns a EINTR to indicate that the channel program run was canceled. Porting notes: Added missing return value from cv_wait_sig() Authored by: Don Brady <[email protected]> Reviewed by: Sebastien Roy <[email protected]> Reviewed by: Serapheim Dimitropoulos <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Sara Hartse <[email protected]> Reviewed by: Brian Behlendorf <[email protected]> Approved by: Robert Mustacchi <[email protected]> Ported-by: Don Brady <[email protected]> Signed-off-by: Don Brady <[email protected]> OpenZFS-issue: https://www.illumos.org/issues/9425 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/d0cb1fb926 Closes #8904
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/dsl_synctask.h3
-rw-r--r--include/sys/txg.h5
-rw-r--r--include/sys/zcp.h31
-rw-r--r--include/sys/zfs_context.h2
4 files changed, 40 insertions, 1 deletions
diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h
index da6c7a40d..957963ffe 100644
--- a/include/sys/dsl_synctask.h
+++ b/include/sys/dsl_synctask.h
@@ -37,6 +37,7 @@ struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *);
+typedef void (dsl_sigfunc_t)(void *, dmu_tx_t *);
typedef enum zfs_space_check {
/*
@@ -116,6 +117,8 @@ int dsl_early_sync_task(const char *, dsl_checkfunc_t *,
dsl_syncfunc_t *, void *, int, zfs_space_check_t);
void dsl_early_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *,
void *, int, zfs_space_check_t, dmu_tx_t *);
+int dsl_sync_task_sig(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *,
+ dsl_sigfunc_t *, void *, int, zfs_space_check_t);
#ifdef __cplusplus
}
diff --git a/include/sys/txg.h b/include/sys/txg.h
index 760d5208b..260a3b43c 100644
--- a/include/sys/txg.h
+++ b/include/sys/txg.h
@@ -88,6 +88,11 @@ extern void txg_kick(struct dsl_pool *dp);
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
/*
+ * Wait as above. Returns true if the thread was signaled while waiting.
+ */
+extern boolean_t txg_wait_synced_sig(struct dsl_pool *dp, uint64_t txg);
+
+/*
* Wait until the given transaction group, or one after it, is
* the open transaction group. Try to make this happen as soon
* as possible (eg. kick off any necessary syncs immediately) when
diff --git a/include/sys/zcp.h b/include/sys/zcp.h
index b9c8ef006..b720d8637 100644
--- a/include/sys/zcp.h
+++ b/include/sys/zcp.h
@@ -52,6 +52,12 @@ typedef struct zcp_cleanup_handler {
list_node_t zch_node;
} zcp_cleanup_handler_t;
+typedef struct zcp_alloc_arg {
+ boolean_t aa_must_succeed;
+ int64_t aa_alloc_remaining;
+ int64_t aa_alloc_limit;
+} zcp_alloc_arg_t;
+
typedef struct zcp_run_info {
dsl_pool_t *zri_pool;
@@ -94,6 +100,11 @@ typedef struct zcp_run_info {
boolean_t zri_timed_out;
/*
+ * Channel program was canceled by user
+ */
+ boolean_t zri_canceled;
+
+ /*
* Boolean indicating whether or not we are running in syncing
* context.
*/
@@ -104,6 +115,26 @@ typedef struct zcp_run_info {
* triggered in the event of a fatal error.
*/
list_t zri_cleanup_handlers;
+
+ /*
+ * The Lua state context of our channel program.
+ */
+ lua_State *zri_state;
+
+ /*
+ * Lua memory allocator arguments.
+ */
+ zcp_alloc_arg_t *zri_allocargs;
+
+ /*
+ * Contains output values from zcp script or error string.
+ */
+ nvlist_t *zri_outnvl;
+
+ /*
+ * The errno number returned to caller of zcp_eval().
+ */
+ int zri_result;
} zcp_run_info_t;
zcp_run_info_t *zcp_run_info(lua_State *);
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 530c8c291..224f5cb83 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -315,8 +315,8 @@ extern void cv_broadcast(kcondvar_t *cv);
#define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at)
#define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at)
-#define cv_wait_sig(cv, mp) cv_wait(cv, mp)
#define cv_wait_io(cv, mp) cv_wait(cv, mp)
+#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp)
#define cv_timedwait_sig_hires(cv, mp, t, r, f) \
cv_timedwait_hires(cv, mp, t, r, f)