diff options
Diffstat (limited to 'module/zfs/dbuf.c')
-rw-r--r-- | module/zfs/dbuf.c | 126 |
1 files changed, 87 insertions, 39 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 191e5e043..94c2ae9d7 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -26,6 +26,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2019, Klara Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek */ #include <sys/zfs_context.h> @@ -49,6 +50,7 @@ #include <sys/trace_zfs.h> #include <sys/callb.h> #include <sys/abd.h> +#include <sys/brt.h> #include <sys/vdev.h> #include <cityhash.h> #include <sys/spa_impl.h> @@ -1427,7 +1429,7 @@ dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags) } static void -dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn) +dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp) { blkptr_t *bps = db->db.db_data; uint32_t indbs = 1ULL << dn->dn_indblkshift; @@ -1436,12 +1438,12 @@ dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn) for (int i = 0; i < n_bps; i++) { blkptr_t *bp = &bps[i]; - ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, indbs); - BP_SET_LSIZE(bp, BP_GET_LEVEL(db->db_blkptr) == 1 ? - dn->dn_datablksz : BP_GET_LSIZE(db->db_blkptr)); - BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr)); - BP_SET_LEVEL(bp, BP_GET_LEVEL(db->db_blkptr) - 1); - BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0); + ASSERT3U(BP_GET_LSIZE(dbbp), ==, indbs); + BP_SET_LSIZE(bp, BP_GET_LEVEL(dbbp) == 1 ? + dn->dn_datablksz : BP_GET_LSIZE(dbbp)); + BP_SET_TYPE(bp, BP_GET_TYPE(dbbp)); + BP_SET_LEVEL(bp, BP_GET_LEVEL(dbbp) - 1); + BP_SET_BIRTH(bp, dbbp->blk_birth, 0); } } @@ -1451,30 +1453,27 @@ dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn) * was taken, ENOENT if no action was taken. */ static int -dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn) +dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp) { ASSERT(MUTEX_HELD(&db->db_mtx)); - int is_hole = db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr); + int is_hole = bp == NULL || BP_IS_HOLE(bp); /* * For level 0 blocks only, if the above check fails: * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync() * processes the delete record and clears the bp while we are waiting * for the dn_mtx (resulting in a "no" from block_freed). */ - if (!is_hole && db->db_level == 0) { - is_hole = dnode_block_freed(dn, db->db_blkid) || - BP_IS_HOLE(db->db_blkptr); - } + if (!is_hole && db->db_level == 0) + is_hole = dnode_block_freed(dn, db->db_blkid) || BP_IS_HOLE(bp); if (is_hole) { dbuf_set_data(db, dbuf_alloc_arcbuf(db)); memset(db->db.db_data, 0, db->db.db_size); - if (db->db_blkptr != NULL && db->db_level > 0 && - BP_IS_HOLE(db->db_blkptr) && - db->db_blkptr->blk_birth != 0) { - dbuf_handle_indirect_hole(db, dn); + if (bp != NULL && db->db_level > 0 && BP_IS_HOLE(bp) && + bp->blk_birth != 0) { + dbuf_handle_indirect_hole(db, dn, bp); } db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "hole read satisfied"); @@ -1551,12 +1550,13 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, zbookmark_phys_t zb; uint32_t aflags = ARC_FLAG_NOWAIT; int err, zio_flags; + blkptr_t bp, *bpp; DB_DNODE_ENTER(db); dn = DB_DNODE(db); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(MUTEX_HELD(&db->db_mtx)); - ASSERT(db->db_state == DB_UNCACHED); + ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); ASSERT(db->db_buf == NULL); ASSERT(db->db_parent == NULL || RW_LOCK_HELD(&db->db_parent->db_rwlock)); @@ -1566,16 +1566,46 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, goto early_unlock; } - err = dbuf_read_hole(db, dn); + if (db->db_state == DB_UNCACHED) { + if (db->db_blkptr == NULL) { + bpp = NULL; + } else { + bp = *db->db_blkptr; + bpp = &bp; + } + } else { + struct dirty_leaf *dl; + dbuf_dirty_record_t *dr; + + ASSERT3S(db->db_state, ==, DB_NOFILL); + + dr = list_head(&db->db_dirty_records); + if (dr == NULL) { + err = EIO; + goto early_unlock; + } else { + dl = &dr->dt.dl; + if (!dl->dr_brtwrite) { + err = EIO; + goto early_unlock; + } + bp = dl->dr_overridden_by; + bpp = &bp; + } + } + + err = dbuf_read_hole(db, dn, bpp); if (err == 0) goto early_unlock; + ASSERT(bpp != NULL); + /* * Any attempt to read a redacted block should result in an error. This * will never happen under normal conditions, but can be useful for * debugging purposes. */ - if (BP_IS_REDACTED(db->db_blkptr)) { + if (BP_IS_REDACTED(bpp)) { ASSERT(dsl_dataset_feature_is_active( db->db_objset->os_dsl_dataset, SPA_FEATURE_REDACTED_DATASETS)); @@ -1590,7 +1620,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, * All bps of an encrypted os should have the encryption bit set. * If this is not true it indicates tampering and we report an error. */ - if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) { + if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { spa_log_error(db->db_objset->os_spa, &zb); zfs_panic_recover("unencrypted block in encrypted " "object set %llu", dmu_objset_id(db->db_objset)); @@ -1621,15 +1651,14 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) zio_flags |= ZIO_FLAG_RAW; /* - * The zio layer will copy the provided blkptr later, but we need to - * do this now so that we can release the parent's rwlock. We have to - * do that now so that if dbuf_read_done is called synchronously (on + * The zio layer will copy the provided blkptr later, but we have our + * own copy so that we can release the parent's rwlock. We have to + * do that so that if dbuf_read_done is called synchronously (on * an l1 cache hit) we don't acquire the db_mtx while holding the * parent's rwlock, which would be a lock ordering violation. */ - blkptr_t bp = *db->db_blkptr; dmu_buf_unlock_parent(db, dblt, tag); - (void) arc_read(zio, db->db_objset->os_spa, &bp, + (void) arc_read(zio, db->db_objset->os_spa, bpp, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); return (err); @@ -1731,9 +1760,6 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) */ ASSERT(!zfs_refcount_is_zero(&db->db_holds)); - if (db->db_state == DB_NOFILL) - return (SET_ERROR(EIO)); - DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -1780,13 +1806,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) } DB_DNODE_EXIT(db); DBUF_STAT_BUMP(hash_hits); - } else if (db->db_state == DB_UNCACHED) { + } else if (db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL) { boolean_t need_wait = B_FALSE; db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); - if (zio == NULL && - db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) { + if (zio == NULL && (db->db_state == DB_NOFILL || + (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) { spa_t *spa = dn->dn_objset->os_spa; zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); need_wait = B_TRUE; @@ -1913,7 +1939,8 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - arc_release(dr->dt.dl.dr_data, db); + if (!dr->dt.dl.dr_brtwrite) + arc_release(dr->dt.dl.dr_data, db); } /* @@ -1996,6 +2023,11 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, db->db_blkid > dn->dn_maxblkid) dn->dn_maxblkid = db->db_blkid; dbuf_unoverride(dr); + if (dr->dt.dl.dr_brtwrite) { + ASSERT(db->db.db_data == NULL); + mutex_exit(&db->db_mtx); + continue; + } } else { /* * This dbuf is not dirty in the open context. @@ -2285,7 +2317,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); - if (db->db_blkid != DMU_BONUS_BLKID) { + if (db->db_blkid != DMU_BONUS_BLKID && db->db_state != DB_NOFILL) { dmu_objset_willuse_space(os, db->db.db_size, tx); } @@ -2328,8 +2360,9 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); } - if (db->db_blkid != DMU_BONUS_BLKID) + if (db->db_blkid != DMU_BONUS_BLKID && db->db_state != DB_NOFILL) { dr->dr_accounted = db->db.db_size; + } dr->dr_dbuf = db; dr->dr_txg = tx->tx_txg; list_insert_before(&db->db_dirty_records, dr_next, dr); @@ -2489,6 +2522,7 @@ static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { uint64_t txg = tx->tx_txg; + boolean_t brtwrite; ASSERT(txg != 0); @@ -2513,6 +2547,16 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (B_FALSE); ASSERT(dr->dr_dbuf == db); + brtwrite = dr->dt.dl.dr_brtwrite; + if (brtwrite) { + /* + * We are freeing a block that we cloned in the same + * transaction group. + */ + brt_pending_remove(dmu_objset_spa(db->db_objset), + &dr->dt.dl.dr_overridden_by, tx); + } + dnode_t *dn = dr->dr_dnode; dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); @@ -2542,7 +2586,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) mutex_exit(&dn->dn_mtx); } - if (db->db_state != DB_NOFILL) { + if (db->db_state != DB_NOFILL && !brtwrite) { dbuf_unoverride(dr); ASSERT(db->db_buf != NULL); @@ -2557,7 +2601,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_dirtycnt -= 1; if (zfs_refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { - ASSERT(db->db_state == DB_NOFILL || arc_released(db->db_buf)); + ASSERT(db->db_state == DB_NOFILL || brtwrite || + arc_released(db->db_buf)); dbuf_destroy(db); return (B_TRUE); } @@ -4748,8 +4793,10 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); if (db->db_state != DB_NOFILL) { - if (dr->dt.dl.dr_data != db->db_buf) + if (dr->dt.dl.dr_data != NULL && + dr->dt.dl.dr_data != db->db_buf) { arc_buf_destroy(dr->dt.dl.dr_data, db); + } } } else { ASSERT(list_head(&dr->dt.di.dr_children) == NULL); @@ -5046,7 +5093,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) mutex_enter(&db->db_mtx); dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by, - dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite); + dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite, + dr->dt.dl.dr_brtwrite); mutex_exit(&db->db_mtx); } else if (db->db_state == DB_NOFILL) { ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF || |