aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dmu_tx.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/dmu_tx.c')
-rw-r--r--module/zfs/dmu_tx.c105
1 files changed, 105 insertions, 0 deletions
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index 1c5608c45..c4e274bd4 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -295,6 +295,53 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
}
static void
+dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+{
+ dnode_t *dn = txh->txh_dnode;
+ int err = 0;
+
+ if (len == 0)
+ return;
+
+ (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
+
+ if (dn == NULL)
+ return;
+
+ /*
+ * For i/o error checking, read the blocks that will be needed
+ * to perform the append; first level-0 block (if not aligned, i.e.
+ * if they are partial-block writes), no additional blocks are read.
+ */
+ if (dn->dn_maxblkid == 0) {
+ if (off < dn->dn_datablksz &&
+ (off > 0 || len < dn->dn_datablksz)) {
+ err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
+ }
+ } else {
+ zio_t *zio = zio_root(dn->dn_objset->os_spa,
+ NULL, NULL, ZIO_FLAG_CANFAIL);
+
+ /* first level-0 block */
+ uint64_t start = off >> dn->dn_datablkshift;
+ if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
+ err = dmu_tx_check_ioerr(zio, dn, 0, start);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
+ }
+
+ err = zio_wait(zio);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
+ }
+}
+
+static void
dmu_tx_count_dnode(dmu_tx_hold_t *txh)
{
(void) zfs_refcount_add_many(&txh->txh_space_towrite,
@@ -335,6 +382,42 @@ dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
}
/*
+ * Should be used when appending to an object and the exact offset is unknown.
+ * The write must occur at or beyond the specified offset. Only the L0 block
+ * at provided offset will be prefetched.
+ */
+void
+dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
+{
+ dmu_tx_hold_t *txh;
+
+ ASSERT0(tx->tx_txg);
+ ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_APPEND, off, DMU_OBJECT_END);
+ if (txh != NULL) {
+ dmu_tx_count_append(txh, off, len);
+ dmu_tx_count_dnode(txh);
+ }
+}
+
+void
+dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
+{
+ dmu_tx_hold_t *txh;
+
+ ASSERT0(tx->tx_txg);
+ ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END);
+ if (txh != NULL) {
+ dmu_tx_count_append(txh, off, len);
+ dmu_tx_count_dnode(txh);
+ }
+}
+
+/*
* This function marks the transaction as being a "net free". The end
* result is that refquotas will be disabled for this transaction, and
* this transaction will be able to use half of the pool space overhead
@@ -668,6 +751,26 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
if (blkid == 0)
match_offset = TRUE;
break;
+ case THT_APPEND:
+ if (blkid >= beginblk && (blkid <= endblk ||
+ txh->txh_arg2 == DMU_OBJECT_END))
+ match_offset = TRUE;
+
+ /*
+ * THT_WRITE used for bonus and spill blocks.
+ */
+ ASSERT(blkid != DMU_BONUS_BLKID &&
+ blkid != DMU_SPILL_BLKID);
+
+ /*
+ * They might have to increase nlevels,
+ * thus dirtying the new TLIBs. Or the
+ * might have to change the block size,
+ * thus dirying the new lvl=0 blk=0.
+ */
+ if (blkid == 0)
+ match_offset = TRUE;
+ break;
case THT_FREE:
/*
* We will dirty all the level 1 blocks in
@@ -1454,6 +1557,8 @@ dmu_tx_fini(void)
EXPORT_SYMBOL(dmu_tx_create);
EXPORT_SYMBOL(dmu_tx_hold_write);
EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
+EXPORT_SYMBOL(dmu_tx_hold_append);
+EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_free);
EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_zap);