aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zil.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/zil.c')
-rw-r--r--module/zfs/zil.c91
1 files changed, 40 insertions, 51 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 5642f082b..8742fc662 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -91,15 +91,7 @@
* committed to stable storage. Please refer to the zil_commit_waiter()
* function (and the comments within it) for more details.
*/
-static uint_t zfs_commit_timeout_pct = 5;
-
-/*
- * Minimal time we care to delay commit waiting for more ZIL records.
- * At least FreeBSD kernel can't sleep for less than 2us at its best.
- * So requests to sleep for less then 5us is a waste of CPU time with
- * a risk of significant log latency increase due to oversleep.
- */
-static uint64_t zil_min_commit_timeout = 5000;
+static uint_t zfs_commit_timeout_pct = 10;
/*
* See zil.h for more information about these fields.
@@ -2732,6 +2724,19 @@ zil_commit_writer_stall(zilog_t *zilog)
ASSERT(list_is_empty(&zilog->zl_lwb_list));
}
+static void
+zil_burst_done(zilog_t *zilog)
+{
+ if (!list_is_empty(&zilog->zl_itx_commit_list) ||
+ zilog->zl_cur_used == 0)
+ return;
+
+ if (zilog->zl_parallel)
+ zilog->zl_parallel--;
+
+ zilog->zl_cur_used = 0;
+}
+
/*
* This function will traverse the commit list, creating new lwbs as
* needed, and committing the itxs from the commit list to these newly
@@ -2746,7 +2751,6 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
list_t nolwb_waiters;
lwb_t *lwb, *plwb;
itx_t *itx;
- boolean_t first = B_TRUE;
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
@@ -2772,9 +2776,22 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
zil_commit_activate_saxattr_feature(zilog);
ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
lwb->lwb_state == LWB_STATE_OPENED);
- first = (lwb->lwb_state == LWB_STATE_NEW) &&
- ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) == NULL ||
- plwb->lwb_state == LWB_STATE_FLUSH_DONE);
+
+ /*
+ * If the lwb is still opened, it means the workload is really
+ * multi-threaded and we won the chance of write aggregation.
+ * If it is not opened yet, but previous lwb is still not
+ * flushed, it still means the workload is multi-threaded, but
+ * there was too much time between the commits to aggregate, so
+ * we try aggregation next times, but without too much hopes.
+ */
+ if (lwb->lwb_state == LWB_STATE_OPENED) {
+ zilog->zl_parallel = ZIL_BURSTS;
+ } else if ((plwb = list_prev(&zilog->zl_lwb_list, lwb))
+ != NULL && plwb->lwb_state != LWB_STATE_FLUSH_DONE) {
+ zilog->zl_parallel = MAX(zilog->zl_parallel,
+ ZIL_BURSTS / 2);
+ }
}
while ((itx = list_remove_head(&zilog->zl_itx_commit_list)) != NULL) {
@@ -2849,7 +2866,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* Our lwb is done, leave the rest of
* itx list to somebody else who care.
*/
- first = B_FALSE;
+ zilog->zl_parallel = ZIL_BURSTS;
break;
}
} else {
@@ -2941,28 +2958,15 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* try and pack as many itxs into as few lwbs as
* possible, without significantly impacting the latency
* of each individual itx.
- *
- * If we had no already running or open LWBs, it can be
- * the workload is single-threaded. And if the ZIL write
- * latency is very small or if the LWB is almost full, it
- * may be cheaper to bypass the delay.
*/
- if (lwb->lwb_state == LWB_STATE_OPENED && first) {
- hrtime_t sleep = zilog->zl_last_lwb_latency *
- zfs_commit_timeout_pct / 100;
- if (sleep < zil_min_commit_timeout ||
- lwb->lwb_nmax - lwb->lwb_nused <
- lwb->lwb_nmax / 8) {
- list_insert_tail(ilwbs, lwb);
- lwb = zil_lwb_write_close(zilog, lwb,
- LWB_STATE_NEW);
- zilog->zl_cur_used = 0;
- if (lwb == NULL) {
- while ((lwb = list_remove_head(ilwbs))
- != NULL)
- zil_lwb_write_issue(zilog, lwb);
- zil_commit_writer_stall(zilog);
- }
+ if (lwb->lwb_state == LWB_STATE_OPENED && !zilog->zl_parallel) {
+ list_insert_tail(ilwbs, lwb);
+ lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
+ zil_burst_done(zilog);
+ if (lwb == NULL) {
+ while ((lwb = list_remove_head(ilwbs)) != NULL)
+ zil_lwb_write_issue(zilog, lwb);
+ zil_commit_writer_stall(zilog);
}
}
}
@@ -3120,19 +3124,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED);
- /*
- * Since the lwb's zio hadn't been issued by the time this thread
- * reached its timeout, we reset the zilog's "zl_cur_used" field
- * to influence the zil block size selection algorithm.
- *
- * By having to issue the lwb's zio here, it means the size of the
- * lwb was too large, given the incoming throughput of itxs. By
- * setting "zl_cur_used" to zero, we communicate this fact to the
- * block size selection algorithm, so it can take this information
- * into account, and potentially select a smaller size for the
- * next lwb block that is allocated.
- */
- zilog->zl_cur_used = 0;
+ zil_burst_done(zilog);
if (nlwb == NULL) {
/*
@@ -4250,9 +4242,6 @@ EXPORT_SYMBOL(zil_kstat_values_update);
ZFS_MODULE_PARAM(zfs, zfs_, commit_timeout_pct, UINT, ZMOD_RW,
"ZIL block open timeout percentage");
-ZFS_MODULE_PARAM(zfs_zil, zil_, min_commit_timeout, U64, ZMOD_RW,
- "Minimum delay we care for ZIL block commit");
-
ZFS_MODULE_PARAM(zfs_zil, zil_, replay_disable, INT, ZMOD_RW,
"Disable intent logging replay");