diff options
author | Pawel Jakub Dawidek <[email protected]> | 2023-04-30 02:47:09 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2023-05-11 16:06:36 -0700 |
commit | 555ef90c5c1db5dcd1b47c02134c85b5a03dc6bc (patch) | |
tree | 033bbadea252e0f5fa8a934d863a2e6520b5a5db /module | |
parent | 469019fb0b2b7ca4bd6c3de5c2f1056a4446f0e3 (diff) |
Additional block cloning fixes.
Reimplement some of the block cloning vs dbuf logic, mostly to fix
situation where we clone a block and in the same transaction group
we want to partially overwrite the clone.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Pawel Jakub Dawidek <[email protected]>
Closes #14825
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dbuf.c | 103 | ||||
-rw-r--r-- | module/zfs/dmu.c | 14 |
2 files changed, 82 insertions, 35 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 6a50f1927..049a62c1c 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1573,24 +1573,22 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, bpp = &bp; } } else { - struct dirty_leaf *dl; dbuf_dirty_record_t *dr; ASSERT3S(db->db_state, ==, DB_NOFILL); + /* + * Block cloning: If we have a pending block clone, + * we don't want to read the underlying block, but the content + * of the block being cloned, so we have the most recent data. + */ dr = list_head(&db->db_dirty_records); - if (dr == NULL) { + if (dr == NULL || !dr->dt.dl.dr_brtwrite) { err = EIO; goto early_unlock; - } else { - dl = &dr->dt.dl; - if (!dl->dr_brtwrite) { - err = EIO; - goto early_unlock; - } - bp = dl->dr_overridden_by; - bpp = &bp; } + bp = dr->dt.dl.dr_overridden_by; + bpp = &bp; } err = dbuf_read_hole(db, dn, bpp); @@ -1906,6 +1904,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dmu_buf_impl_t *db = dr->dr_dbuf; blkptr_t *bp = &dr->dt.dl.dr_overridden_by; uint64_t txg = dr->dr_txg; + boolean_t release; ASSERT(MUTEX_HELD(&db->db_mtx)); /* @@ -1926,8 +1925,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); + release = !dr->dt.dl.dr_brtwrite; dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; + dr->dt.dl.dr_brtwrite = B_FALSE; dr->dt.dl.dr_has_raw_params = B_FALSE; /* @@ -1938,7 +1939,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - if (!dr->dt.dl.dr_brtwrite) + if (release) arc_release(dr->dt.dl.dr_data, db); } @@ -2022,11 +2023,6 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, db->db_blkid > dn->dn_maxblkid) dn->dn_maxblkid = db->db_blkid; dbuf_unoverride(dr); - if (dr->dt.dl.dr_brtwrite) { - ASSERT(db->db.db_data == NULL); - mutex_exit(&db->db_mtx); - continue; - } } else { /* * This dbuf is not dirty in the open context. @@ -2613,6 +2609,7 @@ static void dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + boolean_t undirty = B_FALSE; ASSERT(tx->tx_txg != 0); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); @@ -2625,7 +2622,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) */ mutex_enter(&db->db_mtx); - if (db->db_state == DB_CACHED) { + if (db->db_state == DB_CACHED || db->db_state == DB_NOFILL) { dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg); /* * It's possible that it is already dirty but not cached, @@ -2633,10 +2630,21 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) * go through dmu_buf_will_dirty(). */ if (dr != NULL) { - /* This dbuf is already dirty and cached. */ - dbuf_redirty(dr); - mutex_exit(&db->db_mtx); - return; + if (dr->dt.dl.dr_brtwrite) { + /* + * Block cloning: If we are dirtying a cloned + * block, we cannot simply redirty it, because + * this dr has no data associated with it. + * We will go through a full undirtying below, + * before dirtying it again. + */ + undirty = B_TRUE; + } else { + /* This dbuf is already dirty and cached. */ + dbuf_redirty(dr); + mutex_exit(&db->db_mtx); + return; + } } } mutex_exit(&db->db_mtx); @@ -2645,7 +2653,20 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) flags |= DB_RF_HAVESTRUCT; DB_DNODE_EXIT(db); + + /* + * Block cloning: Do the dbuf_read() before undirtying the dbuf, as we + * want to make sure dbuf_read() will read the pending cloned block and + * not the uderlying block that is being replaced. dbuf_undirty() will + * do dbuf_unoverride(), so we will end up with cloned block content, + * without overridden BP. + */ (void) dbuf_read(db, NULL, flags); + if (undirty) { + mutex_enter(&db->db_mtx); + VERIFY(!dbuf_undirty(db, tx)); + mutex_exit(&db->db_mtx); + } (void) dbuf_dirty(db, tx); } @@ -2669,13 +2690,37 @@ dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) } void +dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + /* + * Block cloning: We are going to clone into this block, so undirty + * modifications done to this block so far in this txg. This includes + * writes and clones into this block. + */ + mutex_enter(&db->db_mtx); + VERIFY(!dbuf_undirty(db, tx)); + ASSERT(list_head(&db->db_dirty_records) == NULL); + if (db->db_buf != NULL) { + arc_buf_destroy(db->db_buf, db); + db->db_buf = NULL; + } + mutex_exit(&db->db_mtx); + + dmu_buf_will_not_fill(db_fake, tx); +} + +void dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; db->db_state = DB_NOFILL; DTRACE_SET_STATE(db, "allocating NOFILL buffer"); - dmu_buf_will_fill(db_fake, tx); + + dbuf_noread(db); + (void) dbuf_dirty(db, tx); } void @@ -2691,6 +2736,19 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); + if (db->db_state == DB_NOFILL) { + /* + * Block cloning: We will be completely overwriting a block + * cloned in this transaction group, so let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + mutex_enter(&db->db_mtx); + VERIFY(!dbuf_undirty(db, tx)); + mutex_exit(&db->db_mtx); + db->db_state = DB_UNCACHED; + } + dbuf_noread(db); (void) dbuf_dirty(db, tx); } @@ -5155,6 +5213,7 @@ EXPORT_SYMBOL(dbuf_dirty); EXPORT_SYMBOL(dmu_buf_set_crypt_params); EXPORT_SYMBOL(dmu_buf_will_dirty); EXPORT_SYMBOL(dmu_buf_is_dirty); +EXPORT_SYMBOL(dmu_buf_will_clone); EXPORT_SYMBOL(dmu_buf_will_not_fill); EXPORT_SYMBOL(dmu_buf_will_fill); EXPORT_SYMBOL(dmu_buf_fill_done); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index cda1472a7..f8accafd6 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2284,18 +2284,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp)); - mutex_enter(&db->db_mtx); - - VERIFY(!dbuf_undirty(db, tx)); - ASSERT(list_head(&db->db_dirty_records) == NULL); - if (db->db_buf != NULL) { - arc_buf_destroy(db->db_buf, db); - db->db_buf = NULL; - } - - mutex_exit(&db->db_mtx); - - dmu_buf_will_not_fill(dbuf, tx); + dmu_buf_will_clone(dbuf, tx); mutex_enter(&db->db_mtx); @@ -2305,7 +2294,6 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, dl = &dr->dt.dl; dl->dr_overridden_by = *bp; dl->dr_brtwrite = B_TRUE; - dl->dr_override_state = DR_OVERRIDDEN; if (BP_IS_HOLE(bp)) { dl->dr_overridden_by.blk_birth = 0; |