diff options
author | Brian Behlendorf <[email protected]> | 2017-03-07 09:51:59 -0800 |
---|---|---|
committer | GitHub <[email protected]> | 2017-03-07 09:51:59 -0800 |
commit | 3ec3bc2167352df525c10c99cf24cb24952c2786 (patch) | |
tree | d38388851ea8b16bc4b5bc29839282a15491e139 /module/zfs/dsl_dir.c | |
parent | e2fcb562759f32d3ca6f3764914222132ce2cfd9 (diff) |
OpenZFS 7793 - ztest fails assertion in dmu_tx_willuse_space
Reviewed by: Steve Gonczi <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Pavel Zakharov <[email protected]>
Ported-by: Brian Behlendorf <[email protected]>
Background information: This assertion about tx_space_* verifies that we
are not dirtying more stuff than we thought we would. We “need” to know
how much we will dirty so that we can check if we should fail this
transaction with ENOSPC/EDQUOT, in dmu_tx_assign(). While the
transaction is open (i.e. between dmu_tx_assign() and dmu_tx_commit() —
typically less than a millisecond), we call dbuf_dirty() on the exact
blocks that will be modified. Once this happens, the temporary
accounting in tx_space_* is unnecessary, because we know exactly what
blocks are newly dirtied; we call dnode_willuse_space() to track this
more exact accounting.
The fundamental problem causing this bug is that dmu_tx_hold_*() relies
on the current state in the DMU (e.g. dn_nlevels) to predict how much
will be dirtied by this transaction, but this state can change before we
actually perform the transaction (i.e. call dbuf_dirty()).
This bug will be fixed by removing the assertion that the tx_space_*
accounting is perfectly accurate (i.e. we never dirty more than was
predicted by dmu_tx_hold_*()). By removing the requirement that this
accounting be perfectly accurate, we can also vastly simplify it, e.g.
removing most of the logic in dmu_tx_count_*().
The new tx space accounting will be very approximate, and may be more or
less than what is actually dirtied. It will still be used to determine
if this transaction will put us over quota. Transactions that are marked
by dmu_tx_mark_netfree() will be excepted from this check. We won’t make
an attempt to determine how much space will be freed by the transaction
— this was rarely accurate enough to determine if a transaction should
be permitted when we are over quota, which is why dmu_tx_mark_netfree()
was introduced in 2014.
We also won’t attempt to give “credit” when overwriting existing blocks,
if those blocks may be freed. This allows us to remove the
do_free_accounting logic in dbuf_dirty(), and associated routines. This
logic attempted to predict what will be on disk when this txg syncs, to
know if the overwritten block will be freed (i.e. exists, and has no
snapshots).
OpenZFS-issue: https://www.illumos.org/issues/7793
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/3704e0a
Upstream bugs: DLPX-32883a
Closes #5804
Porting notes:
- DNODE_SIZE replaced with DNODE_MIN_SIZE in dmu_tx_count_dnode(),
Using the default dnode size would be slightly better.
- DEBUG_DMU_TX wrappers and configure option removed.
- Resolved _by_dnode() conflicts these changes have not yet been
applied to OpenZFS.
Diffstat (limited to 'module/zfs/dsl_dir.c')
-rw-r--r-- | module/zfs/dsl_dir.c | 40 |
1 files changed, 18 insertions, 22 deletions
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 305a87ed9..98aeff5dc 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -1031,13 +1031,12 @@ static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd) { uint64_t space = 0; - int i; ASSERT(MUTEX_HELD(&dd->dd_lock)); - for (i = 0; i < TXG_SIZE; i++) { - space += dd->dd_space_towrite[i&TXG_MASK]; - ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); + for (int i = 0; i < TXG_SIZE; i++) { + space += dd->dd_space_towrite[i & TXG_MASK]; + ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0); } return (space); } @@ -1117,16 +1116,13 @@ struct tempreserve { static int dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, - boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list, + boolean_t ignorequota, list_t *tr_list, dmu_tx_t *tx, boolean_t first) { uint64_t txg = tx->tx_txg; - uint64_t est_inflight, used_on_disk, quota, parent_rsrv; - uint64_t deferred = 0; + uint64_t quota; struct tempreserve *tr; int retval = EDQUOT; - int txgidx = txg & TXG_MASK; - int i; uint64_t ref_rsrv = 0; ASSERT3U(txg, !=, 0); @@ -1138,10 +1134,10 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, * Check against the dsl_dir's quota. We don't add in the delta * when checking for over-quota because they get one free hit. */ - est_inflight = dsl_dir_space_towrite(dd); - for (i = 0; i < TXG_SIZE; i++) + uint64_t est_inflight = dsl_dir_space_towrite(dd); + for (int i = 0; i < TXG_SIZE; i++) est_inflight += dd->dd_tempreserved[i]; - used_on_disk = dsl_dir_phys(dd)->dd_used_bytes; + uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes; /* * On the first iteration, fetch the dataset's used-on-disk and @@ -1152,9 +1148,9 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, int error; dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; - error = dsl_dataset_check_quota(ds, checkrefquota, + error = dsl_dataset_check_quota(ds, !netfree, asize, est_inflight, &used_on_disk, &ref_rsrv); - if (error) { + if (error != 0) { mutex_exit(&dd->dd_lock); DMU_TX_STAT_BUMP(dmu_tx_quota); return (error); @@ -1180,6 +1176,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, * we're very close to full, this will allow a steady trickle of * removes to get through. */ + uint64_t deferred = 0; if (dd->dd_parent == NULL) { spa_t *spa = dd->dd_pool->dp_spa; uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); @@ -1210,9 +1207,9 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, } /* We need to up our estimated delta before dropping dd_lock */ - dd->dd_tempreserved[txgidx] += asize; + dd->dd_tempreserved[txg & TXG_MASK] += asize; - parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, + uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, asize - ref_rsrv); mutex_exit(&dd->dd_lock); @@ -1222,11 +1219,11 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, list_insert_tail(tr_list, tr); /* see if it's OK with our parent */ - if (dd->dd_parent && parent_rsrv) { + if (dd->dd_parent != NULL && parent_rsrv != 0) { boolean_t ismos = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0); return (dsl_dir_tempreserve_impl(dd->dd_parent, - parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE)); + parent_rsrv, netfree, ismos, tr_list, tx, B_FALSE)); } else { return (0); } @@ -1240,7 +1237,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, */ int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, - uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx) + boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx) { int err; list_t *tr_list; @@ -1254,7 +1251,6 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, list_create(tr_list, sizeof (struct tempreserve), offsetof(struct tempreserve, tr_node)); ASSERT3S(asize, >, 0); - ASSERT3S(fsize, >=, 0); err = arc_tempreserve_space(lsize, tx->tx_txg); if (err == 0) { @@ -1281,8 +1277,8 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, } if (err == 0) { - err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, - FALSE, asize > usize, tr_list, tx, TRUE); + err = dsl_dir_tempreserve_impl(dd, asize, netfree, + B_FALSE, tr_list, tx, B_TRUE); } if (err != 0) |