aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2019-06-10 11:48:42 -0700
committerBrian Behlendorf <[email protected]>2019-06-10 11:48:42 -0700
commitb8738257c2607c73c731ce8e0fd73282b266d6ef (patch)
treed73e90809b9f413b8894d8ee7fc6ad8a11bff7fe /include/sys
parent5a902f5aaa1fbf6f7e459ec29f6d1d988ec78b0a (diff)
make zil max block size tunable
We've observed that on some highly fragmented pools, most metaslab allocations are small (~2-8KB), but there are some large, 128K allocations. The large allocations are for ZIL blocks. If there is a lot of fragmentation, the large allocations can be hard to satisfy. The most common impact of this is that we need to check (and thus load) lots of metaslabs from the ZIL allocation code path, causing sync writes to wait for metaslabs to load, which can take a second or more. In the worst case, we may not be able to satisfy the allocation, in which case the ZIL will resort to txg_wait_synced() to ensure the change is on disk. To provide a workaround for this, this change adds a tunable that can reduce the size of ZIL blocks. External-issue: DLPX-61719 Reviewed-by: George Wilson <[email protected]> Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> Closes #8865
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/zil.h5
-rw-r--r--include/sys/zil_impl.h29
2 files changed, 12 insertions, 22 deletions
diff --git a/include/sys/zil.h b/include/sys/zil.h
index fb7b38a06..cfa5e3995 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -515,6 +515,9 @@ extern void zil_set_sync(zilog_t *zilog, uint64_t syncval);
extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval);
+extern uint64_t zil_max_copied_data(zilog_t *zilog);
+extern uint64_t zil_max_log_data(zilog_t *zilog);
+
extern int zil_replay_disable;
#ifdef __cplusplus
diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h
index 174fef334..d2f401865 100644
--- a/include/sys/zil_impl.h
+++ b/include/sys/zil_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -209,6 +209,13 @@ struct zilog {
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */
+ /*
+ * Max block size for this ZIL. Note that this can not be changed
+ * while the ZIL is in use because consumers (ZPL/zvol) need to take
+ * this into account when deciding between WR_COPIED and WR_NEED_COPY
+ * (see zil_max_copied_data()).
+ */
+ uint64_t zl_max_block_size;
};
typedef struct zil_bp_node {
@@ -216,26 +223,6 @@ typedef struct zil_bp_node {
avl_node_t zn_node;
} zil_bp_node_t;
-/*
- * Maximum amount of write data that can be put into single log block.
- */
-#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
- sizeof (lr_write_t))
-
-/*
- * Maximum amount of log space we agree to waste to reduce number of
- * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
- */
-#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8)
-
-/*
- * Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY
- * as more space efficient if we can't fit at least two log records into
- * maximum sized log block.
- */
-#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \
- sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t))
-
#ifdef __cplusplus
}
#endif