aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorlidongyang <[email protected]>2017-12-23 05:19:51 +1100
committerBrian Behlendorf <[email protected]>2017-12-22 10:19:51 -0800
commit823d48bfb182137c53b9432498f1f0564eaa8bfc (patch)
tree35f32420b30cdaad0fea38ec0ccc2dd0abbe52a4 /include
parent44b61ea506212c287333e03d2cf8933216810800 (diff)
Call commit callbacks from the tail of the list
Our zfs backed Lustre MDT had soft lockups while under heavy metadata workloads while handling transaction callbacks from osd_zfs. The problem is zfs is not taking advantage of the fast path in Lustre's trans callback handling, where Lustre will skip the calls to ptlrpc_commit_replies() when it already saw a higher transaction number. This patch corrects this, it also has a positive impact on metadata performance on Lustre with osd_zfs, plus some cleanup in the headers. A similar issue for ext4/ldiskfs is described on: https://jira.hpdd.intel.com/browse/LU-6527 Reviewed-by: Olaf Faaland <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Li Dongyang <[email protected]> Closes #6986
Diffstat (limited to 'include')
-rw-r--r--include/sys/dmu.h5
-rw-r--r--include/sys/dmu_tx.h4
2 files changed, 5 insertions, 4 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index f252a9cb6..61c02e8a7 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -748,11 +748,16 @@ void dmu_tx_mark_netfree(dmu_tx_t *tx);
* to stable storage and will also be called if the dmu_tx is aborted.
* If there is any error which prevents the transaction from being committed to
* disk, the callback will be called with a value of error != 0.
+ *
+ * When multiple callbacks are registered to the transaction, the callbacks
+ * will be called in reverse order to let Lustre, the only user of commit
+ * callback currently, take the fast path of its commit callback handling.
*/
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
void *dcb_data);
+void dmu_tx_do_callbacks(list_t *cb_list, int error);
/*
* Free up the data blocks for a defined range of a file. If size is
diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
index f16e1e858..d82a79310 100644
--- a/include/sys/dmu_tx.h
+++ b/include/sys/dmu_tx.h
@@ -145,10 +145,6 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
-void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
- void *dcb_data);
-void dmu_tx_do_callbacks(list_t *cb_list, int error);
-
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
*/