aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dbuf.c2
-rw-r--r--module/zfs/dmu_object.c31
-rw-r--r--module/zfs/dmu_recv.c67
-rw-r--r--module/zfs/dmu_send.c52
-rw-r--r--module/zfs/dnode.c5
5 files changed, 142 insertions, 15 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index d52a520fa..07e616f6f 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(db->db_level == 0);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
- ASSERT(arc_buf_lsize(buf) == db->db.db_size);
+ ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
ASSERT(tx->tx_txg != 0);
arc_return_buf(buf, db);
diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
index e77ebeca5..ec78ebbdc 100644
--- a/module/zfs/dmu_object.c
+++ b/module/zfs/dmu_object.c
@@ -24,6 +24,7 @@
* Copyright 2014 HybridCluster. All rights reserved.
*/
+#include <sys/dbuf.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
@@ -304,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
- bonuslen, DNODE_MIN_SIZE, tx));
+ bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
}
int
dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
- dmu_tx_t *tx)
+ boolean_t keep_spill, dmu_tx_t *tx)
{
dnode_t *dn;
int dn_slots = dnodesize >> DNODE_SHIFT;
@@ -327,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
if (err)
return (err);
- dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
+ dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
+ keep_spill, tx);
+
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
+int
+dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ int err;
+
+ err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
+ FTAG, &dn);
+ if (err)
+ return (err);
+
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ dbuf_rm_spill(dn, tx);
+ dnode_rm_spill(dn, tx);
+ }
+ rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (err);
@@ -489,6 +513,7 @@ EXPORT_SYMBOL(dmu_object_claim);
EXPORT_SYMBOL(dmu_object_claim_dnsize);
EXPORT_SYMBOL(dmu_object_reclaim);
EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
+EXPORT_SYMBOL(dmu_object_rm_spill);
EXPORT_SYMBOL(dmu_object_free);
EXPORT_SYMBOL(dmu_object_next);
EXPORT_SYMBOL(dmu_object_zapify);
diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
index fc5d47f5f..976b1bd46 100644
--- a/module/zfs/dmu_recv.c
+++ b/module/zfs/dmu_recv.c
@@ -274,6 +274,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* embedded data is incompatible with encryption and raw recv */
if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
return (SET_ERROR(EINVAL));
+
+ /* raw receives require spill block allocation flag */
+ if (!(flags & DRR_FLAG_SPILL_BLOCK))
+ return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
} else {
dsflags |= DS_HOLD_FLAG_DECRYPT;
}
@@ -615,8 +619,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
(void) snprintf(recvname, sizeof (recvname), "%s/%s",
tofs, recv_clone_name);
- if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0)
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ /* raw receives require spill block allocation flag */
+ if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK))
+ return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
+ } else {
dsflags |= DS_HOLD_FLAG_DECRYPT;
+ }
if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
/* %recv does not exist; continue in tofs */
@@ -764,6 +773,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
return (SET_ERROR(EINVAL));
}
+ if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)
+ drc->drc_spill = B_TRUE;
+
drba.drba_origin = origin;
drba.drba_cookie = drc;
drba.drba_cred = CRED();
@@ -835,7 +847,8 @@ struct receive_writer_arg {
/* A map from guid to dataset to help handle dedup'd streams. */
avl_tree_t *guid_to_ds_map;
boolean_t resumable;
- boolean_t raw;
+ boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */
+ boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
uint64_t last_object;
uint64_t last_offset;
uint64_t max_object; /* highest object ID referenced in stream */
@@ -1151,10 +1164,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_raw_bonuslen)
return (SET_ERROR(EINVAL));
} else {
- if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 ||
- drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 ||
- drro->drr_nblkptr != 0)
+ /*
+ * The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN
+ * record indicates this by setting DRR_FLAG_SPILL_BLOCK.
+ */
+ if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) ||
+ (!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) {
+ return (SET_ERROR(EINVAL));
+ }
+
+ if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 ||
+ drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) {
return (SET_ERROR(EINVAL));
+ }
}
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
@@ -1312,7 +1334,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
if (object == DMU_NEW_OBJECT) {
- /* currently free, want to be allocated */
+ /* Currently free, wants to be allocated */
err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
@@ -1321,11 +1343,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_blksz != doi.doi_data_block_size ||
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
- /* currently allocated, but with different properties */
+ /* Currently allocated, but with different properties */
err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
- dn_slots << DNODE_SHIFT, tx);
+ dn_slots << DNODE_SHIFT, rwa->spill ?
+ DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
+ } else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) {
+ /*
+ * Currently allocated, the existing version of this object
+ * may reference a spill block that is no longer allocated
+ * at the source and needs to be freed.
+ */
+ err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx);
}
if (err != 0) {
@@ -1665,6 +1695,17 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
return (SET_ERROR(EINVAL));
+ /*
+ * This is an unmodified spill block which was added to the stream
+ * to resolve an issue with incorrectly removing spill blocks. It
+ * should be ignored by current versions of the code which support
+ * the DRR_FLAG_SPILL_BLOCK flag.
+ */
+ if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
+ dmu_return_arcbuf(abuf);
+ return (0);
+ }
+
if (rwa->raw) {
if (!DMU_OT_IS_VALID(drrs->drr_type) ||
drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
@@ -1699,9 +1740,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
return (err);
}
- if (db_spill->db_size < drrs->drr_length)
+ /*
+ * Spill blocks may both grow and shrink. When a change in size
+ * occurs any existing dbuf must be updated to match the logical
+ * size of the provided arc_buf_t.
+ */
+ if (db_spill->db_size != drrs->drr_length) {
+ dmu_buf_will_fill(db_spill, tx);
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
+ }
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
@@ -2575,6 +2623,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
rwa->byteswap = drc->drc_byteswap;
rwa->resumable = drc->drc_resumable;
rwa->raw = drc->drc_raw;
+ rwa->spill = drc->drc_spill;
rwa->os->os_raw_receive = drc->drc_raw;
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index ad64d666b..a6ff5ce3e 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -64,6 +64,8 @@ int zfs_send_corrupt_data = B_FALSE;
int zfs_send_queue_length = SPA_MAXBLOCKSIZE;
/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
int zfs_send_set_freerecords_bit = B_TRUE;
+/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
+int zfs_send_unmodified_spill_blocks = B_TRUE;
/*
* Use this to override the recordsize calculation for fast zfs send estimates.
@@ -99,6 +101,8 @@ typedef struct dump_bytes_io {
int dbi_len;
} dump_bytes_io_t;
+static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
+
static void
dump_bytes_cb(void *arg)
{
@@ -436,6 +440,12 @@ dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
+ /* See comment in dump_dnode() for full details */
+ if (zfs_send_unmodified_spill_blocks &&
+ (bp->blk_birth <= dsp->dsa_fromtxg)) {
+ drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
+ }
+
/* handle raw send fields */
if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
ASSERT(BP_IS_PROTECTED(bp));
@@ -587,6 +597,14 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
}
}
+ /*
+ * DRR_OBJECT_SPILL is set for every dnode which references a
+ * spill block. This allows the receiving pool to definitively
+ * determine when a spill block should be kept or freed.
+ */
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
+ drro->drr_flags |= DRR_OBJECT_SPILL;
+
if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
return (SET_ERROR(EINTR));
@@ -594,8 +612,34 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
return (SET_ERROR(EINTR));
+
+ /*
+ * Send DRR_SPILL records for unmodified spill blocks. This is useful
+ * because changing certain attributes of the object (e.g. blocksize)
+ * can cause old versions of ZFS to incorrectly remove a spill block.
+ * Including these records in the stream forces an up to date version
+ * to always be written ensuring they're never lost. Current versions
+ * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
+ * ignore these unmodified spill blocks.
+ */
+ if (zfs_send_unmodified_spill_blocks &&
+ (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
+ (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
+ struct send_block_record record;
+
+ bzero(&record, sizeof (struct send_block_record));
+ record.eos_marker = B_FALSE;
+ record.bp = *DN_SPILL_BLKPTR(dnp);
+ SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
+ object, 0, DMU_SPILL_BLKID);
+
+ if (do_dump(dsp, &record) != 0)
+ return (SET_ERROR(EINTR));
+ }
+
if (dsp->dsa_err != 0)
return (SET_ERROR(EINTR));
+
return (0);
}
@@ -1036,6 +1080,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
/* raw send implies compressok */
if (compressok || rawok)
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
+
if (rawok && os->os_encrypted)
featureflags |= DMU_BACKUP_FEATURE_RAW;
@@ -1064,6 +1109,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
if (zfs_send_set_freerecords_bit)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
+ drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
+
if (ancestor_zb != NULL) {
drr->drr_u.drr_begin.drr_fromguid =
ancestor_zb->zbm_guid;
@@ -1084,6 +1131,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_os = os;
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+ dsp->dsa_fromtxg = fromtxg;
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_featureflags = featureflags;
dsp->dsa_resume_object = resumeobj;
@@ -1552,4 +1600,8 @@ MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data");
module_param(zfs_send_queue_length, int, 0644);
MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length");
+
+module_param(zfs_send_unmodified_spill_blocks, int, 0644);
+MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks,
+ "Send unmodified spill blocks");
#endif
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 78a90f68f..38ec646ba 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -660,7 +660,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
void
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots,
+ boolean_t keep_spill, dmu_tx_t *tx)
{
int nblkptr;
@@ -710,7 +711,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = bonustype;
if (dn->dn_nblkptr != nblkptr)
dn->dn_next_nblkptr[tx->tx_txg & TXG_MASK] = nblkptr;
- if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) {
dbuf_rm_spill(dn, tx);
dnode_rm_spill(dn, tx);
}