diff options
-rw-r--r-- | include/sys/dmu.h | 10 | ||||
-rw-r--r-- | include/sys/zap.h | 9 | ||||
-rw-r--r-- | module/zfs/dmu.c | 31 | ||||
-rw-r--r-- | module/zfs/dmu_object.c | 54 | ||||
-rw-r--r-- | module/zfs/dmu_recv.c | 7 | ||||
-rw-r--r-- | module/zfs/zap_micro.c | 94 |
6 files changed, 149 insertions, 56 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h index f8b5f096a..542eff95f 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -407,6 +407,10 @@ uint64_t dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, uint64_t dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, int dnodesize, dmu_tx_t *tx); +uint64_t dmu_object_alloc_hold(objset_t *os, dmu_object_type_t ot, + int blocksize, int indirect_blockshift, dmu_object_type_t bonustype, + int bonuslen, int dnodesize, dnode_t **allocated_dnode, void *tag, + dmu_tx_t *tx); int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); int dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, @@ -521,9 +525,9 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, * * Returns ENOENT, EIO, or 0. */ -int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, - uint32_t flags, dmu_buf_t **dbp); -int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); +int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp); +int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp, + uint32_t flags); int dmu_bonus_max(void); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *); diff --git a/include/sys/zap.h b/include/sys/zap.h index 7acc3becb..ab13652d8 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -131,6 +131,11 @@ uint64_t zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx); +uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, int dnodesize, + dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx); + uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj, const char *name, dmu_tx_t *tx); uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, @@ -139,8 +144,8 @@ uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, /* * Initialize an already-allocated object. */ -void mzap_create_impl(objset_t *os, uint64_t obj, int normflags, - zap_flags_t flags, dmu_tx_t *tx); +void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, + dmu_tx_t *tx); /* * Create a new zapobj with no attributes from the given (unallocated) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index e8d0ce3be..5b79eb907 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -330,13 +330,13 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) } /* - * returns ENOENT, EIO, or 0. + * Lookup and hold the bonus buffer for the provided dnode. If the dnode + * has not yet been allocated a new bonus dbuf a will be allocated. + * Returns ENOENT, EIO, or 0. */ -int -dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, - dmu_buf_t **dbp) +int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp, + uint32_t flags) { - dnode_t *dn; dmu_buf_impl_t *db; int error; uint32_t db_flags = DB_RF_MUST_SUCCEED; @@ -346,10 +346,6 @@ dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, if (flags & DMU_READ_NO_DECRYPT) db_flags |= DB_RF_NO_DECRYPT; - error = dnode_hold(os, object, FTAG, &dn); - if (error) - return (error); - rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_bonus == NULL) { rw_exit(&dn->dn_struct_rwlock); @@ -372,8 +368,6 @@ dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, */ rw_exit(&dn->dn_struct_rwlock); - dnode_rele(dn, FTAG); - error = dbuf_read(db, NULL, db_flags); if (error) { dnode_evict_bonus(dn); @@ -387,9 +381,19 @@ dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, } int -dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp) +dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) { - return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp)); + dnode_t *dn; + int error; + + error = dnode_hold(os, object, FTAG, &dn); + if (error) + return (error); + + error = dmu_bonus_hold_by_dnode(dn, tag, dbp, DMU_READ_NO_PREFETCH); + dnode_rele(dn, FTAG); + + return (error); } /* @@ -2547,6 +2551,7 @@ dmu_fini(void) #if defined(_KERNEL) EXPORT_SYMBOL(dmu_bonus_hold); +EXPORT_SYMBOL(dmu_bonus_hold_by_dnode); EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus); EXPORT_SYMBOL(dmu_buf_rele_array); EXPORT_SYMBOL(dmu_prefetch); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 9b5cf125f..e77ebeca5 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -44,7 +44,7 @@ int dmu_object_alloc_chunk_shift = 7; static uint64_t dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, - int dnodesize, dmu_tx_t *tx) + int dnodesize, dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) { uint64_t object; uint64_t L1_dnode_count = DNODES_PER_BLOCK << @@ -80,6 +80,19 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, if (dnodes_per_chunk > L1_dnode_count) dnodes_per_chunk = L1_dnode_count; + /* + * The caller requested the dnode be returned as a performance + * optimization in order to avoid releasing the hold only to + * immediately reacquire it. Since they caller is responsible + * for releasing the hold they must provide the tag. + */ + if (allocated_dnode != NULL) { + ASSERT3P(tag, !=, NULL); + } else { + ASSERT3P(tag, ==, NULL); + tag = FTAG; + } + object = *cpuobj; for (;;) { /* @@ -167,7 +180,7 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, * to do so. */ error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, - dn_slots, FTAG, &dn); + dn_slots, tag, &dn); if (error == 0) { rw_enter(&dn->dn_struct_rwlock, RW_WRITER); /* @@ -180,11 +193,20 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, bonuslen, dn_slots, tx); rw_exit(&dn->dn_struct_rwlock); dmu_tx_add_new_object(tx, dn); - dnode_rele(dn, FTAG); + + /* + * Caller requested the allocated dnode be + * returned and is responsible for the hold. + */ + if (allocated_dnode != NULL) + *allocated_dnode = dn; + else + dnode_rele(dn, tag); + return (object); } rw_exit(&dn->dn_struct_rwlock); - dnode_rele(dn, FTAG); + dnode_rele(dn, tag); DNODE_STAT_BUMP(dnode_alloc_race); } @@ -205,7 +227,7 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, - bonuslen, 0, tx); + bonuslen, 0, NULL, NULL, tx); } uint64_t @@ -214,7 +236,7 @@ dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_tx_t *tx) { return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift, - bonustype, bonuslen, 0, tx); + bonustype, bonuslen, 0, NULL, NULL, tx); } uint64_t @@ -222,7 +244,21 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, - bonuslen, dnodesize, tx)); + bonuslen, dnodesize, NULL, NULL, tx)); +} + +/* + * Allocate a new object and return a pointer to the newly allocated dnode + * via the allocated_dnode argument. The returned dnode will be held and + * the caller is responsible for releasing the hold by calling dnode_rele(). + */ +uint64_t +dmu_object_alloc_hold(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, + int dnodesize, dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) +{ + return (dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift, + bonustype, bonuslen, dnodesize, allocated_dnode, tag, tx)); } int @@ -414,14 +450,13 @@ dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type, * so that concurrent calls to *_is_zapified() can determine if * the object has been completely zapified by checking the type. */ - mzap_create_impl(mos, object, 0, 0, tx); + mzap_create_impl(dn, 0, 0, tx); dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type = DMU_OTN_ZAP_METADATA; dnode_setdirty(dn, tx); dnode_rele(dn, FTAG); - spa_feature_incr(dmu_objset_spa(mos), SPA_FEATURE_EXTENSIBLE_DATASET, tx); } @@ -449,6 +484,7 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx) EXPORT_SYMBOL(dmu_object_alloc); EXPORT_SYMBOL(dmu_object_alloc_ibs); EXPORT_SYMBOL(dmu_object_alloc_dnsize); +EXPORT_SYMBOL(dmu_object_alloc_hold); EXPORT_SYMBOL(dmu_object_claim); EXPORT_SYMBOL(dmu_object_claim_dnsize); EXPORT_SYMBOL(dmu_object_reclaim); diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 990f79025..a448bc148 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -1323,13 +1323,15 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, if (data != NULL) { dmu_buf_t *db; + dnode_t *dn; uint32_t flags = DMU_READ_NO_PREFETCH; if (rwa->raw) flags |= DMU_READ_NO_DECRYPT; - VERIFY0(dmu_bonus_hold_impl(rwa->os, drro->drr_object, - FTAG, flags, &db)); + VERIFY0(dnode_hold(rwa->os, drro->drr_object, FTAG, &dn)); + VERIFY0(dmu_bonus_hold_by_dnode(dn, FTAG, &db, flags)); + dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); @@ -1346,6 +1348,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, DRR_OBJECT_PAYLOAD_SIZE(drro)); } dmu_buf_rele(db, FTAG); + dnode_rele(dn, FTAG); } dmu_tx_commit(tx); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 8b4fd0652..fa369f797 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -699,17 +699,17 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags) * of them may be supplied. */ void -mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, - dmu_tx_t *tx) +mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx) { dmu_buf_t *db; - VERIFY0(dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); + VERIFY0(dmu_buf_hold_by_dnode(dn, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); dmu_buf_will_dirty(db, tx); mzap_phys_t *zp = db->db_data; zp->mz_block_type = ZBT_MICRO; - zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; + zp->mz_salt = + ((uintptr_t)db ^ (uintptr_t)tx ^ (dn->dn_object << 1)) | 1ULL; zp->mz_normflags = normflags; if (flags != 0) { @@ -724,6 +724,33 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, } } +static uint64_t +zap_create_impl(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, int dnodesize, + dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) +{ + uint64_t obj; + + ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); + + if (allocated_dnode == NULL) { + dnode_t *dn; + obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift, + indirect_blockshift, bonustype, bonuslen, dnodesize, + &dn, FTAG, tx); + mzap_create_impl(dn, normflags, flags, tx); + dnode_rele(dn, FTAG); + } else { + obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift, + indirect_blockshift, bonustype, bonuslen, dnodesize, + allocated_dnode, tag, tx); + mzap_create_impl(*allocated_dnode, normflags, flags, tx); + } + + return (obj); +} + int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) @@ -754,12 +781,23 @@ zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { + dnode_t *dn; + int error; + ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); - int err = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, + error = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, dnodesize, tx); - if (err != 0) - return (err); - mzap_create_impl(os, obj, normflags, 0, tx); + if (error != 0) + return (error); + + error = dnode_hold(os, obj, FTAG, &dn); + if (error != 0) + return (error); + + mzap_create_impl(dn, normflags, 0, tx); + + dnode_rele(dn, FTAG); + return (0); } @@ -790,12 +828,8 @@ uint64_t zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { - ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); - uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, - dnodesize, tx); - - mzap_create_impl(os, obj, normflags, 0, tx); - return (obj); + return (zap_create_impl(os, normflags, 0, ot, 0, 0, + bonustype, bonuslen, dnodesize, NULL, NULL, tx)); } uint64_t @@ -812,20 +846,25 @@ zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) { - ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); - uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, - dnodesize, tx); - - ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && - leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT && - indirect_blockshift >= SPA_MINBLOCKSHIFT && - indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT); - - VERIFY(dmu_object_set_blocksize(os, obj, - 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); + return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift, + indirect_blockshift, bonustype, bonuslen, dnodesize, NULL, NULL, + tx)); +} - mzap_create_impl(os, obj, normflags, flags, tx); - return (obj); +/* + * Create a zap object and return a pointer to the newly allocated dnode via + * the allocated_dnode argument. The returned dnode will be held and the + * caller is responsible for releasing the hold by calling dnode_rele(). + */ +uint64_t +zap_create_hold(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, int dnodesize, + dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) +{ + return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift, + indirect_blockshift, bonustype, bonuslen, dnodesize, + allocated_dnode, tag, tx)); } int @@ -1596,6 +1635,7 @@ EXPORT_SYMBOL(zap_create_flags_dnsize); EXPORT_SYMBOL(zap_create_claim); EXPORT_SYMBOL(zap_create_claim_norm); EXPORT_SYMBOL(zap_create_claim_norm_dnsize); +EXPORT_SYMBOL(zap_create_hold); EXPORT_SYMBOL(zap_destroy); EXPORT_SYMBOL(zap_lookup); EXPORT_SYMBOL(zap_lookup_by_dnode); |