aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dmu.c41
1 files changed, 31 insertions, 10 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 231ed3053..36fc8815c 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -25,6 +25,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2019 Datto Inc.
*/
#include <sys/dmu.h>
@@ -61,12 +62,12 @@
int zfs_nopwrite_enabled = 1;
/*
- * Tunable to control percentage of dirtied blocks from frees in one TXG.
- * After this threshold is crossed, additional dirty blocks from frees
- * wait until the next TXG.
+ * Tunable to control percentage of dirtied L1 blocks from frees allowed into
+ * one TXG. After this threshold is crossed, additional dirty blocks from frees
+ * will wait until the next TXG.
* A value of zero will disable this throttle.
*/
-unsigned long zfs_per_txg_dirty_frees_percent = 30;
+unsigned long zfs_per_txg_dirty_frees_percent = 5;
/*
* Enable/disable forcing txg sync when dirty in dmu_offset_next.
@@ -709,11 +710,13 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
*
* On input, *start should be the first offset that does not need to be
* freed (e.g. "offset + length"). On return, *start will be the first
- * offset that should be freed.
+ * offset that should be freed and l1blks is set to the number of level 1
+ * indirect blocks found within the chunk.
*/
static int
-get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
+get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks)
{
+ uint64_t blks;
uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1);
/* bytes of data covered by a level-1 indirect block */
uint64_t iblkrange =
@@ -723,11 +726,16 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
if (*start - minimum <= iblkrange * maxblks) {
*start = minimum;
+ /*
+ * Assume full L1 blocks and 128k recordsize to approximate the
+ * expected number of L1 blocks in this chunk
+ */
+ *l1blks = minimum / (1024 * 128 * 1024);
return (0);
}
ASSERT(ISP2(iblkrange));
- for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) {
+ for (blks = 0; *start > minimum && blks < maxblks; blks++) {
int err;
/*
@@ -745,6 +753,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
*start = minimum;
break;
} else if (err != 0) {
+ *l1blks = blks;
return (err);
}
@@ -753,6 +762,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
}
if (*start < minimum)
*start = minimum;
+ *l1blks = blks;
return (0);
}
@@ -792,7 +802,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
dirty_frees_threshold =
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
else
- dirty_frees_threshold = zfs_dirty_data_max / 4;
+ dirty_frees_threshold = zfs_dirty_data_max / 20;
if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;
@@ -800,6 +810,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
while (length != 0) {
uint64_t chunk_end, chunk_begin, chunk_len;
uint64_t long_free_dirty_all_txgs = 0;
+ uint64_t l1blks;
dmu_tx_t *tx;
if (dmu_objset_zfs_unmounting(dn->dn_objset))
@@ -808,7 +819,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
chunk_end = chunk_begin = offset + length;
/* move chunk_begin backwards to the beginning of this chunk */
- err = get_next_chunk(dn, &chunk_begin, offset);
+ err = get_next_chunk(dn, &chunk_begin, offset, &l1blks);
if (err)
return (err);
ASSERT3U(chunk_begin, >=, offset);
@@ -849,9 +860,19 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
return (err);
}
+ /*
+ * In order to prevent unnecessary write throttling, for each
+ * TXG, we track the cumulative size of L1 blocks being dirtied
+ * in dnode_free_range() below. We compare this number to a
+ * tunable threshold, past which we prevent new L1 dirty freeing
+ * blocks from being added into the open TXG. See
+ * dmu_free_long_range_impl() for details. The threshold
+ * prevents write throttle activation due to dirty freeing L1
+ * blocks taking up a large percentage of zfs_dirty_data_max.
+ */
mutex_enter(&dp->dp_lock);
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
- chunk_len;
+ l1blks << dn->dn_indblkshift;
mutex_exit(&dp->dp_lock);
DTRACE_PROBE3(free__long__range,
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,