diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dmu.c | 41 |
1 files changed, 31 insertions, 10 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 231ed3053..36fc8815c 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -25,6 +25,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. + * Copyright (c) 2019 Datto Inc. */ #include <sys/dmu.h> @@ -61,12 +62,12 @@ int zfs_nopwrite_enabled = 1; /* - * Tunable to control percentage of dirtied blocks from frees in one TXG. - * After this threshold is crossed, additional dirty blocks from frees - * wait until the next TXG. + * Tunable to control percentage of dirtied L1 blocks from frees allowed into + * one TXG. After this threshold is crossed, additional dirty blocks from frees + * will wait until the next TXG. * A value of zero will disable this throttle. */ -unsigned long zfs_per_txg_dirty_frees_percent = 30; +unsigned long zfs_per_txg_dirty_frees_percent = 5; /* * Enable/disable forcing txg sync when dirty in dmu_offset_next. @@ -709,11 +710,13 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, * * On input, *start should be the first offset that does not need to be * freed (e.g. "offset + length"). On return, *start will be the first - * offset that should be freed. + * offset that should be freed and l1blks is set to the number of level 1 + * indirect blocks found within the chunk. */ static int -get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) +get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum, uint64_t *l1blks) { + uint64_t blks; uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1); /* bytes of data covered by a level-1 indirect block */ uint64_t iblkrange = @@ -723,11 +726,16 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) if (*start - minimum <= iblkrange * maxblks) { *start = minimum; + /* + * Assume full L1 blocks and 128k recordsize to approximate the + * expected number of L1 blocks in this chunk + */ + *l1blks = minimum / (1024 * 128 * 1024); return (0); } ASSERT(ISP2(iblkrange)); - for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) { + for (blks = 0; *start > minimum && blks < maxblks; blks++) { int err; /* @@ -745,6 +753,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) *start = minimum; break; } else if (err != 0) { + *l1blks = blks; return (err); } @@ -753,6 +762,7 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum) } if (*start < minimum) *start = minimum; + *l1blks = blks; return (0); } @@ -792,7 +802,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, dirty_frees_threshold = zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100; else - dirty_frees_threshold = zfs_dirty_data_max / 4; + dirty_frees_threshold = zfs_dirty_data_max / 20; if (length == DMU_OBJECT_END || offset + length > object_size) length = object_size - offset; @@ -800,6 +810,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, while (length != 0) { uint64_t chunk_end, chunk_begin, chunk_len; uint64_t long_free_dirty_all_txgs = 0; + uint64_t l1blks; dmu_tx_t *tx; if (dmu_objset_zfs_unmounting(dn->dn_objset)) @@ -808,7 +819,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, chunk_end = chunk_begin = offset + length; /* move chunk_begin backwards to the beginning of this chunk */ - err = get_next_chunk(dn, &chunk_begin, offset); + err = get_next_chunk(dn, &chunk_begin, offset, &l1blks); if (err) return (err); ASSERT3U(chunk_begin, >=, offset); @@ -849,9 +860,19 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, return (err); } + /* + * In order to prevent unnecessary write throttling, for each + * TXG, we track the cumulative size of L1 blocks being dirtied + * in dnode_free_range() below. We compare this number to a + * tunable threshold, past which we prevent new L1 dirty freeing + * blocks from being added into the open TXG. See + * dmu_free_long_range_impl() for details. The threshold + * prevents write throttle activation due to dirty freeing L1 + * blocks taking up a large percentage of zfs_dirty_data_max. + */ mutex_enter(&dp->dp_lock); dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] += - chunk_len; + l1blks << dn->dn_indblkshift; mutex_exit(&dp->dp_lock); DTRACE_PROBE3(free__long__range, uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len, |