summaryrefslogtreecommitdiffstats
path: root/module/zfs/dmu_tx.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/dmu_tx.c')
-rw-r--r--module/zfs/dmu_tx.c179
1 files changed, 136 insertions, 43 deletions
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index bf560e565..af2b049a7 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -160,6 +160,41 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
return (err);
}
+static void
+dmu_tx_count_indirects(dmu_tx_hold_t *txh, dmu_buf_impl_t *db,
+ boolean_t freeable, dmu_buf_impl_t **history)
+{
+ int i = db->db_level + 1;
+ dnode_t *dn = db->db_dnode;
+
+ if (i >= dn->dn_nlevels)
+ return;
+
+ db = db->db_parent;
+ if (db == NULL) {
+ uint64_t lvls = dn->dn_nlevels - i;
+
+ txh->txh_space_towrite += lvls << dn->dn_indblkshift;
+ return;
+ }
+
+ if (db != history[i]) {
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ uint64_t space = 1ULL << dn->dn_indblkshift;
+
+ freeable = (db->db_blkptr && (freeable ||
+ dsl_dataset_block_freeable(ds, db->db_blkptr->blk_birth)));
+ if (freeable)
+ txh->txh_space_tooverwrite += space;
+ else
+ txh->txh_space_towrite += space;
+ if (db->db_blkptr)
+ txh->txh_space_tounref += space;
+ history[i] = db;
+ dmu_tx_count_indirects(txh, db, freeable, history);
+ }
+}
+
/* ARGSUSED */
static void
dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
@@ -177,18 +212,24 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
min_ibs = DN_MIN_INDBLKSHIFT;
max_ibs = DN_MAX_INDBLKSHIFT;
- /*
- * For i/o error checking, read the first and last level-0
- * blocks (if they are not aligned), and all the level-1 blocks.
- */
-
if (dn) {
+ dmu_buf_impl_t *last[DN_MAX_LEVELS];
+ int nlvls = dn->dn_nlevels;
+ int delta;
+
+ /*
+ * For i/o error checking, read the first and last level-0
+ * blocks (if they are not aligned), and all the level-1 blocks.
+ */
if (dn->dn_maxblkid == 0) {
- if ((off > 0 || len < dn->dn_datablksz) &&
- off < dn->dn_datablksz) {
+ delta = dn->dn_datablksz;
+ start = (off < dn->dn_datablksz) ? 0 : 1;
+ end = (off+len <= dn->dn_datablksz) ? 0 : 1;
+ if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
if (err)
goto out;
+ delta -= off;
}
} else {
zio_t *zio = zio_root(dn->dn_objset->os_spa,
@@ -213,10 +254,9 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
}
/* level-1 blocks */
- if (dn->dn_nlevels > 1) {
- start >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (i = start+1; i < end; i++) {
+ if (nlvls > 1) {
+ int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+ for (i = (start>>shft)+1; i < end>>shft; i++) {
err = dmu_tx_check_ioerr(zio, dn, 1, i);
if (err)
goto out;
@@ -226,20 +266,70 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
err = zio_wait(zio);
if (err)
goto out;
+ delta = P2NPHASE(off, dn->dn_datablksz);
}
- }
- /*
- * If there's more than one block, the blocksize can't change,
- * so we can make a more precise estimate. Alternatively,
- * if the dnode's ibs is larger than max_ibs, always use that.
- * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
- * the code will still work correctly on existing pools.
- */
- if (dn && (dn->dn_maxblkid != 0 || dn->dn_indblkshift > max_ibs)) {
- min_ibs = max_ibs = dn->dn_indblkshift;
- if (dn->dn_datablkshift != 0)
+ if (dn->dn_maxblkid > 0) {
+ /*
+ * The blocksize can't change,
+ * so we can make a more precise estimate.
+ */
+ ASSERT(dn->dn_datablkshift != 0);
min_bs = max_bs = dn->dn_datablkshift;
+ min_ibs = max_ibs = dn->dn_indblkshift;
+ } else if (dn->dn_indblkshift > max_ibs) {
+ /*
+ * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
+ * the code will still work correctly on older pools.
+ */
+ min_ibs = max_ibs = dn->dn_indblkshift;
+ }
+
+ /*
+ * If this write is not off the end of the file
+ * we need to account for overwrites/unref.
+ */
+ if (start <= dn->dn_maxblkid)
+ bzero(last, sizeof (dmu_buf_impl_t *) * DN_MAX_LEVELS);
+ while (start <= dn->dn_maxblkid) {
+ spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ dmu_buf_impl_t *db;
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ db = dbuf_hold_level(dn, 0, start, FTAG);
+ rw_exit(&dn->dn_struct_rwlock);
+ if (db->db_blkptr && dsl_dataset_block_freeable(ds,
+ db->db_blkptr->blk_birth)) {
+ dprintf_bp(db->db_blkptr, "can free old%s", "");
+ txh->txh_space_tooverwrite += dn->dn_datablksz;
+ txh->txh_space_tounref += dn->dn_datablksz;
+ dmu_tx_count_indirects(txh, db, TRUE, last);
+ } else {
+ txh->txh_space_towrite += dn->dn_datablksz;
+ if (db->db_blkptr)
+ txh->txh_space_tounref +=
+ bp_get_dasize(spa, db->db_blkptr);
+ dmu_tx_count_indirects(txh, db, FALSE, last);
+ }
+ dbuf_rele(db, FTAG);
+ if (++start > end) {
+ /*
+ * Account for new indirects appearing
+ * before this IO gets assigned into a txg.
+ */
+ bits = 64 - min_bs;
+ epbs = min_ibs - SPA_BLKPTRSHIFT;
+ for (bits -= epbs * (nlvls - 1);
+ bits >= 0; bits -= epbs)
+ txh->txh_fudge += 1ULL << max_ibs;
+ goto out;
+ }
+ off += delta;
+ if (len >= delta)
+ len -= delta;
+ delta = dn->dn_datablksz;
+ }
}
/*
@@ -262,20 +352,22 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
start >>= epbs;
end >>= epbs;
- /*
- * If we increase the number of levels of indirection,
- * we'll need new blkid=0 indirect blocks. If start == 0,
- * we're already accounting for that blocks; and if end == 0,
- * we can't increase the number of levels beyond that.
- */
- if (start != 0 && end != 0)
- txh->txh_space_towrite += 1ULL << max_ibs;
+ ASSERT3U(end, >=, start);
txh->txh_space_towrite += (end - start + 1) << max_ibs;
+ if (start != 0) {
+ /*
+ * We also need a new blkid=0 indirect block
+ * to reference any existing file data.
+ */
+ txh->txh_space_towrite += 1ULL << max_ibs;
+ }
}
- ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS);
-
out:
+ if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
+ 2 * DMU_MAX_ACCESS)
+ err = EFBIG;
+
if (err)
txh->txh_tx->tx_err = err;
}
@@ -292,6 +384,7 @@ dmu_tx_count_dnode(dmu_tx_hold_t *txh)
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
dn->dn_dbuf->db_blkptr->blk_birth)) {
txh->txh_space_tooverwrite += space;
+ txh->txh_space_tounref += space;
} else {
txh->txh_space_towrite += space;
if (dn && dn->dn_dbuf->db_blkptr)
@@ -535,7 +628,7 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
}
void
-dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
+dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
{
dmu_tx_hold_t *txh;
dnode_t *dn;
@@ -584,9 +677,9 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
} else {
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- txh->txh_space_tounref +=
- BP_GET_ASIZE(dn->dn_phys->dn_blkptr);
}
+ if (dn->dn_phys->dn_blkptr[0].blk_birth)
+ txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
return;
}
@@ -603,12 +696,9 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
}
}
- /*
- * 3 blocks overwritten: target leaf, ptrtbl block, header block
- * 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks
- */
- dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz,
- (3 + (add ? 3 : 0)) << dn->dn_datablkshift);
+ err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
+ &txh->txh_space_towrite, &txh->txh_space_tooverwrite,
+ txh->txh_dnode->dn_datablkshift);
/*
* If the modified blocks are scattered to the four winds,
@@ -616,7 +706,10 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
*/
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
- txh->txh_space_towrite += 3 << dn->dn_indblkshift;
+ if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj)
+ txh->txh_space_towrite += 3 << dn->dn_indblkshift;
+ else
+ txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
}
void