summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--module/zfs/dbuf.c34
-rw-r--r--module/zfs/dmu.c3
-rw-r--r--module/zfs/dmu_objset.c17
-rw-r--r--module/zfs/dsl_pool.c24
4 files changed, 60 insertions, 18 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index f8f96c142..ace862637 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -1890,9 +1890,11 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
db->db.db_size = size;
if (db->db_level == 0) {
- ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
db->db_last_dirty->dt.dl.dr_data = buf;
}
+ ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
+ ASSERT3U(db->db_last_dirty->dr_accounted, ==, osize);
+ db->db_last_dirty->dr_accounted = size;
mutex_exit(&db->db_mtx);
dmu_objset_willuse_space(dn->dn_objset, size - osize, tx);
@@ -2105,7 +2107,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
sizeof (dbuf_dirty_record_t),
offsetof(dbuf_dirty_record_t, dr_dirty_node));
}
- if (db->db_blkid != DMU_BONUS_BLKID && os->os_dsl_dataset != NULL)
+ if (db->db_blkid != DMU_BONUS_BLKID)
dr->dr_accounted = db->db.db_size;
dr->dr_dbuf = db;
dr->dr_txg = tx->tx_txg;
@@ -4356,8 +4358,7 @@ dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg)
/*
* The callback will be called io_phys_children times. Retire one
* portion of our dirty space each time we are called. Any rounding
- * error will be cleaned up by dsl_pool_sync()'s call to
- * dsl_pool_undirty_space().
+ * error will be cleaned up by dbuf_write_done().
*/
delta = dr->dr_accounted / zio->io_phys_children;
dsl_pool_undirty_space(dp, delta, zio->io_txg);
@@ -4440,13 +4441,36 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
mutex_destroy(&dr->dt.di.dr_mtx);
list_destroy(&dr->dt.di.dr_children);
}
- kmem_free(dr, sizeof (dbuf_dirty_record_t));
cv_broadcast(&db->db_changed);
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
+
+ /*
+ * If we didn't do a physical write in this ZIO and we
+ * still ended up here, it means that the space of the
+ * dbuf that we just released (and undirtied) above hasn't
+ * been marked as undirtied in the pool's accounting.
+ *
+ * Thus, we undirty that space in the pool's view of the
+ * world here. For physical writes this type of update
+ * happens in dbuf_write_physdone().
+ *
+ * If we did a physical write, cleanup any rounding errors
+ * that came up due to writing multiple copies of a block
+ * on disk [see dbuf_write_physdone()].
+ */
+ if (zio->io_phys_children == 0) {
+ dsl_pool_undirty_space(dmu_objset_pool(os),
+ dr->dr_accounted, zio->io_txg);
+ } else {
+ dsl_pool_undirty_space(dmu_objset_pool(os),
+ dr->dr_accounted % zio->io_phys_children, zio->io_txg);
+ }
+
+ kmem_free(dr, sizeof (dbuf_dirty_record_t));
}
static void
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 955588fb7..aa3ef6458 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -1090,6 +1090,9 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
+/*
+ * Note: Lustre is an external consumer of this interface.
+ */
void
dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx)
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 7a540bdfa..3afafd182 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -2908,9 +2908,17 @@ dmu_fsname(const char *snapname, char *buf)
}
/*
- * Call when we think we're going to write/free space in open context to track
- * the amount of dirty data in the open txg, which is also the amount
- * of memory that can not be evicted until this txg syncs.
+ * Call when we think we're going to write/free space in open context
+ * to track the amount of dirty data in the open txg, which is also the
+ * amount of memory that can not be evicted until this txg syncs.
+ *
+ * Note that there are two conditions where this can be called from
+ * syncing context:
+ *
+ * [1] When we just created the dataset, in which case we go on with
+ * updating any accounting of dirty data as usual.
+ * [2] When we are dirtying MOS data, in which case we only update the
+ * pool's accounting of dirty data.
*/
void
dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx)
@@ -2920,8 +2928,9 @@ dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx)
if (ds != NULL) {
dsl_dir_willuse_space(ds->ds_dir, aspace, tx);
- dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
}
+
+ dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
}
#if defined(_KERNEL)
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index 9fb3a061d..1f1fd6462 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -659,15 +659,6 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
VERIFY0(zio_wait(zio));
/*
- * We have written all of the accounted dirty data, so our
- * dp_space_towrite should now be zero. However, some seldom-used
- * code paths do not adhere to this (e.g. dbuf_undirty(), also
- * rounding error in dbuf_write_physdone).
- * Shore up the accounting of any dirtied space now.
- */
- dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
-
- /*
* Update the long range free counter after
* we're done syncing user data
*/
@@ -763,6 +754,21 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
}
/*
+ * We have written all of the accounted dirty data, so our
+ * dp_space_towrite should now be zero. However, some seldom-used
+ * code paths do not adhere to this (e.g. dbuf_undirty()). Shore up
+ * the accounting of any dirtied space now.
+ *
+ * Note that, besides any dirty data from datasets, the amount of
+ * dirty data in the MOS is also accounted by the pool. Therefore,
+ * we want to do this cleanup after dsl_pool_sync_mos() so we don't
+ * attempt to update the accounting for the same dirty data twice.
+ * (i.e. at this point we only update the accounting for the space
+ * that we know that we "leaked").
+ */
+ dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
+
+ /*
* If we modify a dataset in the same txg that we want to destroy it,
* its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it.
* dsl_dir_destroy_check() will fail if there are unexpected holds.