diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dbuf.c | 2 | ||||
-rw-r--r-- | module/zfs/dmu_object.c | 31 | ||||
-rw-r--r-- | module/zfs/dmu_recv.c | 67 | ||||
-rw-r--r-- | module/zfs/dmu_send.c | 52 | ||||
-rw-r--r-- | module/zfs/dnode.c | 5 |
5 files changed, 142 insertions, 15 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index d52a520fa..07e616f6f 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) ASSERT(db->db_level == 0); ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf)); ASSERT(buf != NULL); - ASSERT(arc_buf_lsize(buf) == db->db.db_size); + ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size); ASSERT(tx->tx_txg != 0); arc_return_buf(buf, db); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index e77ebeca5..ec78ebbdc 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -24,6 +24,7 @@ * Copyright 2014 HybridCluster. All rights reserved. */ +#include <sys/dbuf.h> #include <sys/dmu.h> #include <sys/dmu_objset.h> #include <sys/dmu_tx.h> @@ -304,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype, - bonuslen, DNODE_MIN_SIZE, tx)); + bonuslen, DNODE_MIN_SIZE, B_FALSE, tx)); } int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize, - dmu_tx_t *tx) + boolean_t keep_spill, dmu_tx_t *tx) { dnode_t *dn; int dn_slots = dnodesize >> DNODE_SHIFT; @@ -327,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, if (err) return (err); - dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx); + dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, + keep_spill, tx); + + dnode_rele(dn, FTAG); + return (err); +} + +int +dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, + FTAG, &dn); + if (err) + return (err); + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + dbuf_rm_spill(dn, tx); + dnode_rm_spill(dn, tx); + } + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); @@ -489,6 +513,7 @@ EXPORT_SYMBOL(dmu_object_claim); EXPORT_SYMBOL(dmu_object_claim_dnsize); EXPORT_SYMBOL(dmu_object_reclaim); EXPORT_SYMBOL(dmu_object_reclaim_dnsize); +EXPORT_SYMBOL(dmu_object_rm_spill); EXPORT_SYMBOL(dmu_object_free); EXPORT_SYMBOL(dmu_object_next); EXPORT_SYMBOL(dmu_object_zapify); diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index fc5d47f5f..976b1bd46 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -274,6 +274,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* embedded data is incompatible with encryption and raw recv */ if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) return (SET_ERROR(EINVAL)); + + /* raw receives require spill block allocation flag */ + if (!(flags & DRR_FLAG_SPILL_BLOCK)) + return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING)); } else { dsflags |= DS_HOLD_FLAG_DECRYPT; } @@ -615,8 +619,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + /* raw receives require spill block allocation flag */ + if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)) + return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING)); + } else { dsflags |= DS_HOLD_FLAG_DECRYPT; + } if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ @@ -764,6 +773,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, return (SET_ERROR(EINVAL)); } + if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK) + drc->drc_spill = B_TRUE; + drba.drba_origin = origin; drba.drba_cookie = drc; drba.drba_cred = CRED(); @@ -835,7 +847,8 @@ struct receive_writer_arg { /* A map from guid to dataset to help handle dedup'd streams. */ avl_tree_t *guid_to_ds_map; boolean_t resumable; - boolean_t raw; + boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */ + boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */ uint64_t last_object; uint64_t last_offset; uint64_t max_object; /* highest object ID referenced in stream */ @@ -1151,10 +1164,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_raw_bonuslen) return (SET_ERROR(EINVAL)); } else { - if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 || - drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 || - drro->drr_nblkptr != 0) + /* + * The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN + * record indicates this by setting DRR_FLAG_SPILL_BLOCK. + */ + if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) || + (!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) { + return (SET_ERROR(EINVAL)); + } + + if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 || + drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) { return (SET_ERROR(EINVAL)); + } } err = dmu_object_info(rwa->os, drro->drr_object, &doi); @@ -1312,7 +1334,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, } if (object == DMU_NEW_OBJECT) { - /* currently free, want to be allocated */ + /* Currently free, wants to be allocated */ err = dmu_object_claim_dnsize(rwa->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, @@ -1321,11 +1343,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_blksz != doi.doi_data_block_size || drro->drr_bonustype != doi.doi_bonus_type || drro->drr_bonuslen != doi.doi_bonus_size) { - /* currently allocated, but with different properties */ + /* Currently allocated, but with different properties */ err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, - dn_slots << DNODE_SHIFT, tx); + dn_slots << DNODE_SHIFT, rwa->spill ? + DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx); + } else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) { + /* + * Currently allocated, the existing version of this object + * may reference a spill block that is no longer allocated + * at the source and needs to be freed. + */ + err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx); } if (err != 0) { @@ -1665,6 +1695,17 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os))) return (SET_ERROR(EINVAL)); + /* + * This is an unmodified spill block which was added to the stream + * to resolve an issue with incorrectly removing spill blocks. It + * should be ignored by current versions of the code which support + * the DRR_FLAG_SPILL_BLOCK flag. + */ + if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) { + dmu_return_arcbuf(abuf); + return (0); + } + if (rwa->raw) { if (!DMU_OT_IS_VALID(drrs->drr_type) || drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS || @@ -1699,9 +1740,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, return (err); } - if (db_spill->db_size < drrs->drr_length) + /* + * Spill blocks may both grow and shrink. When a change in size + * occurs any existing dbuf must be updated to match the logical + * size of the provided arc_buf_t. + */ + if (db_spill->db_size != drrs->drr_length) { + dmu_buf_will_fill(db_spill, tx); VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); + } if (rwa->byteswap && !arc_is_encrypted(abuf) && arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { @@ -2575,6 +2623,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, rwa->byteswap = drc->drc_byteswap; rwa->resumable = drc->drc_resumable; rwa->raw = drc->drc_raw; + rwa->spill = drc->drc_spill; rwa->os->os_raw_receive = drc->drc_raw; (void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc, diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index ad64d666b..a6ff5ce3e 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -64,6 +64,8 @@ int zfs_send_corrupt_data = B_FALSE; int zfs_send_queue_length = SPA_MAXBLOCKSIZE; /* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */ int zfs_send_set_freerecords_bit = B_TRUE; +/* Set this tunable to FALSE is disable sending unmodified spill blocks. */ +int zfs_send_unmodified_spill_blocks = B_TRUE; /* * Use this to override the recordsize calculation for fast zfs send estimates. @@ -99,6 +101,8 @@ typedef struct dump_bytes_io { int dbi_len; } dump_bytes_io_t; +static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data); + static void dump_bytes_cb(void *arg) { @@ -436,6 +440,12 @@ dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data) drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; + /* See comment in dump_dnode() for full details */ + if (zfs_send_unmodified_spill_blocks && + (bp->blk_birth <= dsp->dsa_fromtxg)) { + drrs->drr_flags |= DRR_SPILL_UNMODIFIED; + } + /* handle raw send fields */ if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { ASSERT(BP_IS_PROTECTED(bp)); @@ -587,6 +597,14 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, } } + /* + * DRR_OBJECT_SPILL is set for every dnode which references a + * spill block. This allows the receiving pool to definitively + * determine when a spill block should be kept or freed. + */ + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) + drro->drr_flags |= DRR_OBJECT_SPILL; + if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0) return (SET_ERROR(EINTR)); @@ -594,8 +612,34 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0) return (SET_ERROR(EINTR)); + + /* + * Send DRR_SPILL records for unmodified spill blocks. This is useful + * because changing certain attributes of the object (e.g. blocksize) + * can cause old versions of ZFS to incorrectly remove a spill block. + * Including these records in the stream forces an up to date version + * to always be written ensuring they're never lost. Current versions + * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can + * ignore these unmodified spill blocks. + */ + if (zfs_send_unmodified_spill_blocks && + (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && + (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) { + struct send_block_record record; + + bzero(&record, sizeof (struct send_block_record)); + record.eos_marker = B_FALSE; + record.bp = *DN_SPILL_BLKPTR(dnp); + SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os), + object, 0, DMU_SPILL_BLKID); + + if (do_dump(dsp, &record) != 0) + return (SET_ERROR(EINTR)); + } + if (dsp->dsa_err != 0) return (SET_ERROR(EINTR)); + return (0); } @@ -1036,6 +1080,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, /* raw send implies compressok */ if (compressok || rawok) featureflags |= DMU_BACKUP_FEATURE_COMPRESSED; + if (rawok && os->os_encrypted) featureflags |= DMU_BACKUP_FEATURE_RAW; @@ -1064,6 +1109,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (zfs_send_set_freerecords_bit) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS; + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK; + if (ancestor_zb != NULL) { drr->drr_u.drr_begin.drr_fromguid = ancestor_zb->zbm_guid; @@ -1084,6 +1131,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid; + dsp->dsa_fromtxg = fromtxg; dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_featureflags = featureflags; dsp->dsa_resume_object = resumeobj; @@ -1552,4 +1600,8 @@ MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data"); module_param(zfs_send_queue_length, int, 0644); MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length"); + +module_param(zfs_send_unmodified_spill_blocks, int, 0644); +MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks, + "Send unmodified spill blocks"); #endif diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 78a90f68f..38ec646ba 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -660,7 +660,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx) + dmu_object_type_t bonustype, int bonuslen, int dn_slots, + boolean_t keep_spill, dmu_tx_t *tx) { int nblkptr; @@ -710,7 +711,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = bonustype; if (dn->dn_nblkptr != nblkptr) dn->dn_next_nblkptr[tx->tx_txg & TXG_MASK] = nblkptr; - if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) { dbuf_rm_spill(dn, tx); dnode_rm_spill(dn, tx); } |