summaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
authorlidongyang <[email protected]>2017-12-23 05:19:51 +1100
committerTony Hutter <[email protected]>2018-01-30 10:27:31 -0600
commit8d82a19def540bba43c8c7597142ff53f7a0b7e5 (patch)
tree7e8c14ea864a460cb3ac84e3639b332b5d124120 /include/sys
parentc2aacf20872856559972a27f7c3f9b4a6fe10cd2 (diff)
Call commit callbacks from the tail of the list
Our zfs backed Lustre MDT had soft lockups while under heavy metadata workloads while handling transaction callbacks from osd_zfs. The problem is zfs is not taking advantage of the fast path in Lustre's trans callback handling, where Lustre will skip the calls to ptlrpc_commit_replies() when it already saw a higher transaction number. This patch corrects this, it also has a positive impact on metadata performance on Lustre with osd_zfs, plus some cleanup in the headers. A similar issue for ext4/ldiskfs is described on: https://jira.hpdd.intel.com/browse/LU-6527 Reviewed-by: Olaf Faaland <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Li Dongyang <[email protected]> Closes #6986
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/dmu.h5
-rw-r--r--include/sys/dmu_tx.h4
2 files changed, 5 insertions, 4 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index d24615262..bcdf7d646 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -713,11 +713,16 @@ void dmu_tx_mark_netfree(dmu_tx_t *tx);
* to stable storage and will also be called if the dmu_tx is aborted.
* If there is any error which prevents the transaction from being committed to
* disk, the callback will be called with a value of error != 0.
+ *
+ * When multiple callbacks are registered to the transaction, the callbacks
+ * will be called in reverse order to let Lustre, the only user of commit
+ * callback currently, take the fast path of its commit callback handling.
*/
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
void *dcb_data);
+void dmu_tx_do_callbacks(list_t *cb_list, int error);
/*
* Free up the data blocks for a defined range of a file. If size is
diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
index f16e1e858..d82a79310 100644
--- a/include/sys/dmu_tx.h
+++ b/include/sys/dmu_tx.h
@@ -145,10 +145,6 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
-void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
- void *dcb_data);
-void dmu_tx_do_callbacks(list_t *cb_list, int error);
-
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
*/