aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2017-09-26 19:45:19 -0600
committerBrian Behlendorf <[email protected]>2018-07-30 11:30:41 -0700
commitdae3e9ea21a73b1ca940abd4aee1993ea4be8028 (patch)
treea7d9398eac1e73fdf13ae8c73fc77c051894d086 /module/zfs
parent6b64382b17ea420b1265237ab52657a2d0a94824 (diff)
OpenZFS 9465 - ARC check for 'anon_size > arc_c/2' can stall the system
In the case of one pool being built on another pool, we want to make sure we don't end up throttling the lower (backing) pool when the upper pool is the majority contributor to dirty data. To insure we make forward progress during throttling, we also check the current pool's net dirty data and only throttle if it exceeds zfs_arc_pool_dirty_percent of the anonymous dirty data in the cache. Authored by: Don Brady <[email protected]> Reviewed by: Sebastien Roy <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Prashanth Sreenivasa <[email protected]> Approved by: Robert Mustacchi <[email protected]> Ported-by: Brian Behlendorf <[email protected]> Porting Notes: * The new global variables zfs_arc_dirty_limit_percent, zfs_arc_anon_limit_percent, and zfs_arc_pool_dirty_percent were intentially not added as tunable module parameters. OpenZFS-issue: https://illumos.org/issues/9465 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/d6a4c3ef Closes #7749
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/arc.c51
-rw-r--r--module/zfs/dsl_dir.c2
-rw-r--r--module/zfs/spa_misc.c6
3 files changed, 43 insertions, 16 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index e49f8a547..7f2929c17 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -392,6 +392,16 @@ int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
+/*
+ * ARC dirty data constraints for arc_tempreserve_space() throttle.
+ */
+unsigned long zfs_arc_dirty_limit_percent = 50; /* total dirty data limit */
+unsigned long zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */
+unsigned long zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */
+
+/*
+ * Enable or disable compressed arc buffers.
+ */
int zfs_compressed_arc_enabled = B_TRUE;
/*
@@ -7182,12 +7192,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
}
static int
-arc_memory_throttle(uint64_t reserve, uint64_t txg)
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
{
#ifdef _KERNEL
uint64_t available_memory = arc_free_memory();
- static uint64_t page_load = 0;
- static uint64_t last_txg = 0;
#if defined(_ILP32)
available_memory =
@@ -7197,9 +7205,9 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
if (available_memory > arc_all_memory() * arc_lotsfree_percent / 100)
return (0);
- if (txg > last_txg) {
- last_txg = txg;
- page_load = 0;
+ if (txg > spa->spa_lowmem_last_txg) {
+ spa->spa_lowmem_last_txg = txg;
+ spa->spa_lowmem_page_load = 0;
}
/*
* If we are in pageout, we know that memory is already tight,
@@ -7207,21 +7215,22 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
* continue to let page writes occur as quickly as possible.
*/
if (current_is_kswapd()) {
- if (page_load > MAX(arc_sys_free / 4, available_memory) / 4) {
+ if (spa->spa_lowmem_page_load >
+ MAX(arc_sys_free / 4, available_memory) / 4) {
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
return (SET_ERROR(ERESTART));
}
/* Note: reserve is inflated, so we deflate */
- page_load += reserve / 8;
+ atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8);
return (0);
- } else if (page_load > 0 && arc_reclaim_needed()) {
+ } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) {
/* memory is low, delay before restarting */
ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
return (SET_ERROR(EAGAIN));
}
- page_load = 0;
-#endif
+ spa->spa_lowmem_page_load = 0;
+#endif /* _KERNEL */
return (0);
}
@@ -7233,7 +7242,7 @@ arc_tempreserve_clear(uint64_t reserve)
}
int
-arc_tempreserve_space(uint64_t reserve, uint64_t txg)
+arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
{
int error;
uint64_t anon_size;
@@ -7269,7 +7278,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* in order to compress/encrypt/etc the data. We therefore need to
* make sure that there is sufficient available memory for this.
*/
- error = arc_memory_throttle(reserve, txg);
+ error = arc_memory_throttle(spa, reserve, txg);
if (error != 0)
return (error);
@@ -7277,12 +7286,24 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* Throttle writes when the amount of dirty data in the cache
* gets too large. We try to keep the cache less than half full
* of dirty blocks so that our sync times don't grow too large.
+ *
+ * In the case of one pool being built on another pool, we want
+ * to make sure we don't end up throttling the lower (backing)
+ * pool when the upper pool is the majority contributor to dirty
+ * data. To insure we make forward progress during throttling, we
+ * also check the current pool's net dirty data and only throttle
+ * if it exceeds zfs_arc_pool_dirty_percent of the anonymous dirty
+ * data in the cache.
+ *
* Note: if two requests come in concurrently, we might let them
* both succeed, when one of them should fail. Not a huge deal.
*/
+ uint64_t total_dirty = reserve + arc_tempreserve + anon_size;
+ uint64_t spa_dirty_anon = spa_dirty_data(spa);
- if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
- anon_size > arc_c / 4) {
+ if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 &&
+ anon_size > arc_c * zfs_arc_anon_limit_percent / 100 &&
+ spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) {
#ifdef ZFS_DEBUG
uint64_t meta_esize =
refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 75c40c68c..9a43691e5 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -1416,7 +1416,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
offsetof(struct tempreserve, tr_node));
ASSERT3S(asize, >, 0);
- err = arc_tempreserve_space(lsize, tx->tx_txg);
+ err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg);
if (err == 0) {
struct tempreserve *tr;
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 9410fab07..f43a38ef1 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1917,6 +1917,12 @@ bp_get_dsize(spa_t *spa, const blkptr_t *bp)
return (dsize);
}
+uint64_t
+spa_dirty_data(spa_t *spa)
+{
+ return (spa->spa_dsl_pool->dp_dirty_total);
+}
+
/*
* ==========================================================================
* Initialization and Termination