aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys/dmu_tx.h
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2017-03-07 09:51:59 -0800
committerGitHub <[email protected]>2017-03-07 09:51:59 -0800
commit3ec3bc2167352df525c10c99cf24cb24952c2786 (patch)
treed38388851ea8b16bc4b5bc29839282a15491e139 /include/sys/dmu_tx.h
parente2fcb562759f32d3ca6f3764914222132ce2cfd9 (diff)
OpenZFS 7793 - ztest fails assertion in dmu_tx_willuse_space
Reviewed by: Steve Gonczi <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Pavel Zakharov <[email protected]> Ported-by: Brian Behlendorf <[email protected]> Background information: This assertion about tx_space_* verifies that we are not dirtying more stuff than we thought we would. We “need” to know how much we will dirty so that we can check if we should fail this transaction with ENOSPC/EDQUOT, in dmu_tx_assign(). While the transaction is open (i.e. between dmu_tx_assign() and dmu_tx_commit() — typically less than a millisecond), we call dbuf_dirty() on the exact blocks that will be modified. Once this happens, the temporary accounting in tx_space_* is unnecessary, because we know exactly what blocks are newly dirtied; we call dnode_willuse_space() to track this more exact accounting. The fundamental problem causing this bug is that dmu_tx_hold_*() relies on the current state in the DMU (e.g. dn_nlevels) to predict how much will be dirtied by this transaction, but this state can change before we actually perform the transaction (i.e. call dbuf_dirty()). This bug will be fixed by removing the assertion that the tx_space_* accounting is perfectly accurate (i.e. we never dirty more than was predicted by dmu_tx_hold_*()). By removing the requirement that this accounting be perfectly accurate, we can also vastly simplify it, e.g. removing most of the logic in dmu_tx_count_*(). The new tx space accounting will be very approximate, and may be more or less than what is actually dirtied. It will still be used to determine if this transaction will put us over quota. Transactions that are marked by dmu_tx_mark_netfree() will be excepted from this check. We won’t make an attempt to determine how much space will be freed by the transaction — this was rarely accurate enough to determine if a transaction should be permitted when we are over quota, which is why dmu_tx_mark_netfree() was introduced in 2014. We also won’t attempt to give “credit” when overwriting existing blocks, if those blocks may be freed. This allows us to remove the do_free_accounting logic in dbuf_dirty(), and associated routines. This logic attempted to predict what will be on disk when this txg syncs, to know if the overwritten block will be freed (i.e. exists, and has no snapshots). OpenZFS-issue: https://www.illumos.org/issues/7793 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/3704e0a Upstream bugs: DLPX-32883a Closes #5804 Porting notes: - DNODE_SIZE replaced with DNODE_MIN_SIZE in dmu_tx_count_dnode(), Using the default dnode size would be slightly better. - DEBUG_DMU_TX wrappers and configure option removed. - Resolved _by_dnode() conflicts these changes have not yet been applied to OpenZFS.
Diffstat (limited to 'include/sys/dmu_tx.h')
-rw-r--r--include/sys/dmu_tx.h23
1 files changed, 5 insertions, 18 deletions
diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
index 1ee513fdc..f16e1e858 100644
--- a/include/sys/dmu_tx.h
+++ b/include/sys/dmu_tx.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TX_H
@@ -70,6 +70,9 @@ struct dmu_tx {
/* has this transaction already been delayed? */
boolean_t tx_waited;
+ /* transaction is marked as being a "net free" of space */
+ boolean_t tx_netfree;
+
/* time this transaction was created */
hrtime_t tx_start;
@@ -77,14 +80,6 @@ struct dmu_tx {
boolean_t tx_wait_dirty;
int tx_err;
-#ifdef DEBUG_DMU_TX
- uint64_t tx_space_towrite;
- uint64_t tx_space_tofree;
- uint64_t tx_space_tooverwrite;
- uint64_t tx_space_tounref;
- refcount_t tx_space_written;
- refcount_t tx_space_freed;
-#endif
};
enum dmu_tx_hold_type {
@@ -103,16 +98,10 @@ typedef struct dmu_tx_hold {
list_node_t txh_node;
struct dnode *txh_dnode;
refcount_t txh_space_towrite;
- refcount_t txh_space_tofree;
- refcount_t txh_space_tooverwrite;
- refcount_t txh_space_tounref;
refcount_t txh_memory_tohold;
- refcount_t txh_fudge;
-#ifdef DEBUG_DMU_TX
enum dmu_tx_hold_type txh_type;
uint64_t txh_arg1;
uint64_t txh_arg2;
-#endif
} dmu_tx_hold_t;
typedef struct dmu_tx_callback {
@@ -172,12 +161,10 @@ dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
int dmu_tx_is_syncing(dmu_tx_t *tx);
int dmu_tx_private_ok(dmu_tx_t *tx);
void dmu_tx_add_new_object(dmu_tx_t *tx, dnode_t *dn);
-void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
-int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
-#ifdef DEBUG_DMU_TX
+#ifdef ZFS_DEBUG
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
#else
#define DMU_TX_DIRTY_BUF(tx, db)