aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zfs_vnops.c
diff options
context:
space:
mode:
authorPrakash Surya <[email protected]>2018-01-08 13:45:53 -0800
committerBrian Behlendorf <[email protected]>2018-01-26 20:19:46 -0800
commit0735ecb33485e91a78357a274e47c2782858d8b9 (patch)
treeeecf628f46b2e009dba2ec542be7a624c7a6b7e8 /module/zfs/zfs_vnops.c
parent522db29275b81c18c2bf53a95efa1aedeb13b428 (diff)
OpenZFS 8997 - ztest assertion failure in zil_lwb_write_issue
PROBLEM ======= When `dmu_tx_assign` is called from `zil_lwb_write_issue`, it's possible for either `ERESTART` or `EIO` to be returned. If `ERESTART` is returned, this will cause an assertion to fail directly in `zil_lwb_write_issue`, where the code assumes the return value is `EIO` if `dmu_tx_assign` returns a non-zero value. This can occur if the SPA is suspended when `dmu_tx_assign` is called, and most often occurs when running `zloop`. If `EIO` is returned, this can cause assertions to fail elsewhere in the ZIL code. For example, `zil_commit_waiter_timeout` contains the following logic: lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED); In this case, if `dmu_tx_assign` returned `EIO` from within `zil_lwb_write_issue`, the `lwb` variable passed in will not be issued to disk. Thus, it's `lwb_state` field will remain `LWB_STATE_OPENED` and this assertion will fail. `zil_commit_waiter_timeout` assumes that after it calls `zil_lwb_write_issue`, the `lwb` will be issued to disk, and doesn't handle the case where this is not true; i.e. it doesn't handle the case where `dmu_tx_assign` returns `EIO`. SOLUTION ======== This change modifies the `dmu_tx_assign` function such that `txg_how` is a bitmask, rather than of the `txg_how_t` enum type. Now, the previous `TXG_WAITED` semantics can be used via `TXG_NOTHROTTLE`, along with specifying either `TXG_NOWAIT` or `TXG_WAIT` semantics. Previously, when `TXG_WAITED` was specified, `TXG_NOWAIT` semantics was automatically invoked. This was not ideal when using `TXG_WAITED` within `zil_lwb_write_issued`, leading the problem described above. Rather, we want to achieve the semantics of `TXG_WAIT`, while also preventing the `tx` from being penalized via the dirty delay throttling. With this change, `zil_lwb_write_issued` can acheive the semtantics that it requires by passing in the value `TXG_WAIT | TXG_NOTHROTTLE` to `dmu_tx_assign`. Further, consumers of `dmu_tx_assign` wishing to achieve the old `TXG_WAITED` semantics can pass in the value `TXG_NOWAIT | TXG_NOTHROTTLE`. Authored by: Prakash Surya <[email protected]> Approved by: Robert Mustacchi <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Andriy Gapon <[email protected]> Ported-by: Brian Behlendorf <[email protected]> Porting Notes: - Additionally updated `zfs_tmpfile` to use `TXG_NOTHROTTLE` OpenZFS-issue: https://www.illumos.org/issues/8997 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/19ea6cb0f9 Closes #7084
Diffstat (limited to 'module/zfs/zfs_vnops.c')
-rw-r--r--module/zfs/zfs_vnops.c21
1 files changed, 11 insertions, 10 deletions
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 977035fd9..5e34cb2ff 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -130,7 +130,7 @@
*
* If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
* then drop all locks, call dmu_tx_wait(), and try again. On subsequent
- * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
+ * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
* to indicate that this operation has already called dmu_tx_wait().
* This will ensure that we don't retry forever, waiting a short bit
* each time.
@@ -155,7 +155,7 @@
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
- * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
@@ -1429,7 +1429,8 @@ top:
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx,
+ (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
@@ -1604,7 +1605,7 @@ top:
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
if (error == ERESTART) {
waited = B_TRUE;
@@ -1777,7 +1778,7 @@ top:
*/
dmu_tx_mark_netfree(tx);
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
@@ -2019,7 +2020,7 @@ top:
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
@@ -2158,7 +2159,7 @@ top:
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
dmu_tx_mark_netfree(tx);
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
rw_exit(&zp->z_name_lock);
@@ -3625,7 +3626,7 @@ top:
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
if (zl != NULL)
zfs_rename_unlock(&zl);
@@ -3817,7 +3818,7 @@ top:
}
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
@@ -4043,7 +4044,7 @@ top:
zfs_sa_upgrade_txholds(tx, szp);
zfs_sa_upgrade_txholds(tx, dzp);
- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {