aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/dmu.h3
-rw-r--r--include/sys/dmu_impl.h1
-rw-r--r--include/sys/dmu_recv.h1
-rw-r--r--include/sys/dnode.h7
-rw-r--r--include/sys/fs/zfs.h1
-rw-r--r--include/sys/zfs_ioctl.h28
-rw-r--r--lib/libzfs/libzfs_sendrecv.c7
-rw-r--r--man/man5/zfs-module-parameters.516
-rw-r--r--module/zfs/dbuf.c2
-rw-r--r--module/zfs/dmu_object.c31
-rw-r--r--module/zfs/dmu_recv.c67
-rw-r--r--module/zfs/dmu_send.c52
-rw-r--r--module/zfs/dnode.c5
-rw-r--r--tests/runfiles/linux.run4
-rw-r--r--tests/zfs-tests/tests/functional/rsend/Makefile.am1
-rw-r--r--tests/zfs-tests/tests/functional/rsend/rsend.kshlib23
-rwxr-xr-xtests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh19
-rwxr-xr-xtests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh17
-rwxr-xr-xtests/zfs-tests/tests/functional/rsend/send_spill_block.ksh155
19 files changed, 398 insertions, 42 deletions
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 93d05aac4..88c836171 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -420,7 +420,8 @@ int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object,
dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype,
- int bonuslen, int dnodesize, dmu_tx_t *txp);
+ int bonuslen, int dnodesize, boolean_t keep_spill, dmu_tx_t *tx);
+int dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx);
/*
* Free an object from this objset.
diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h
index 03a63077f..5e1901da4 100644
--- a/include/sys/dmu_impl.h
+++ b/include/sys/dmu_impl.h
@@ -265,6 +265,7 @@ typedef struct dmu_sendarg {
objset_t *dsa_os;
zio_cksum_t dsa_zc;
uint64_t dsa_toguid;
+ uint64_t dsa_fromtxg;
int dsa_err;
dmu_pendop_t dsa_pending_op;
uint64_t dsa_featureflags;
diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h
index 90002026b..ffa89249d 100644
--- a/include/sys/dmu_recv.h
+++ b/include/sys/dmu_recv.h
@@ -48,6 +48,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_resumable;
boolean_t drc_raw;
boolean_t drc_clone;
+ boolean_t drc_spill;
struct avl_tree *drc_guid_to_ds_map;
nvlist_t *drc_keynvl;
zio_cksum_t drc_cksum;
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index accbe6945..c60258bbc 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -267,8 +267,8 @@ typedef struct dnode_phys {
};
} dnode_phys_t;
-#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \
- (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
+#define DN_SPILL_BLKPTR(dnp) ((blkptr_t *)((char *)(dnp) + \
+ (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)))
struct dnode {
/*
@@ -420,7 +420,8 @@ void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots,
+ boolean_t keep_spill, dmu_tx_t *tx);
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 8a532ec7e..3bcefdbfd 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1317,6 +1317,7 @@ typedef enum {
ZFS_ERR_WRONG_PARENT,
ZFS_ERR_FROM_IVSET_GUID_MISSING,
ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
+ ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
} zfs_errno_t;
/*
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index bb5b48c91..a883c3358 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -101,7 +101,7 @@ typedef enum drr_headertype {
/* flag #18 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
-/* flag #21 is reserved for a Delphix feature */
+/* flag #21 is reserved for the redacted send/receive feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
@@ -131,7 +131,7 @@ typedef enum dmu_send_resume_token_version {
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * | reserved | feature-flags |C|S|
+ * | reserved | feature-flags |C|S|
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* The low order two bits indicate the header type: SUBSTREAM (0x1)
@@ -160,16 +160,38 @@ typedef enum dmu_send_resume_token_version {
* cannot necessarily be received as a clone correctly.
*/
#define DRR_FLAG_FREERECORDS (1<<2)
+/*
+ * When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL
+ * and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream.
+ *
+ * When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have
+ * DRR_OBJECT_SPILL set if and only if they should have a spill block
+ * (either an existing one, or a new one in the send stream). When clear
+ * the object does not have a spill block and any existing spill block
+ * should be freed.
+ *
+ * Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will
+ * have DRR_SPILL_UNMODIFIED set if and only if they were included for
+ * backward compatibility purposes, and can be safely ignored by new versions
+ * of zfs receive. Previous versions of ZFS which do not understand the
+ * DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing
+ * spill blocks.
+ */
+#define DRR_FLAG_SPILL_BLOCK (1<<3)
/*
* flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
* DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/
-#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
+#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for SPILL records */
#define DRR_RAW_BYTESWAP (1<<1)
+#define DRR_OBJECT_SPILL (1<<2) /* OBJECT record has a spill block */
+#define DRR_SPILL_UNMODIFIED (1<<2) /* SPILL record for unmodified block */
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
+#define DRR_OBJECT_HAS_SPILL(flags) ((flags) & DRR_OBJECT_SPILL)
+#define DRR_SPILL_IS_UNMODIFIED(flags) ((flags) & DRR_SPILL_UNMODIFIED)
/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 2c2eca8db..f69a46430 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -4466,6 +4466,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
"of raw encrypted send streams."));
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
+ case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Spill block flag missing for raw send.\n"
+ "The zfs software on the sending system must "
+ "be updated."));
+ (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+ break;
case EBUSY:
if (hastoken) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index ad6cd4e94..5c49670f1 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -2340,6 +2340,21 @@ Use \fB1\fR for yes and \fB0\fR for no (default).
.sp
.ne 2
.na
+\fBzfs_send_unmodified_spill_blocks\fR (int)
+.ad
+.RS 12n
+Include unmodified spill blocks in the send stream. Under certain circumstances
+previous versions of ZFS could incorrectly remove the spill block from an
+existing object. Including unmodified copies of the spill blocks creates a
+backwards compatible stream which will recreate a spill block if it was
+incorrectly removed.
+.sp
+Use \fB1\fR for yes (default) and \fB0\fR for no.
+.RE
+
+.sp
+.ne 2
+.na
\fBzfs_send_queue_length\fR (int)
.ad
.RS 12n
@@ -2355,7 +2370,6 @@ Default value: \fB16,777,216\fR.
\fBzfs_recv_queue_length\fR (int)
.ad
.RS 12n
-.sp
The maximum number of bytes allowed in the \fBzfs receive\fR queue. This value
must be at least twice the maximum block size in use.
.sp
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index d52a520fa..07e616f6f 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(db->db_level == 0);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
- ASSERT(arc_buf_lsize(buf) == db->db.db_size);
+ ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
ASSERT(tx->tx_txg != 0);
arc_return_buf(buf, db);
diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
index e77ebeca5..ec78ebbdc 100644
--- a/module/zfs/dmu_object.c
+++ b/module/zfs/dmu_object.c
@@ -24,6 +24,7 @@
* Copyright 2014 HybridCluster. All rights reserved.
*/
+#include <sys/dbuf.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
@@ -304,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
- bonuslen, DNODE_MIN_SIZE, tx));
+ bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
}
int
dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
- dmu_tx_t *tx)
+ boolean_t keep_spill, dmu_tx_t *tx)
{
dnode_t *dn;
int dn_slots = dnodesize >> DNODE_SHIFT;
@@ -327,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
if (err)
return (err);
- dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
+ dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
+ keep_spill, tx);
+
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
+int
+dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ int err;
+
+ err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
+ FTAG, &dn);
+ if (err)
+ return (err);
+
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ dbuf_rm_spill(dn, tx);
+ dnode_rm_spill(dn, tx);
+ }
+ rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (err);
@@ -489,6 +513,7 @@ EXPORT_SYMBOL(dmu_object_claim);
EXPORT_SYMBOL(dmu_object_claim_dnsize);
EXPORT_SYMBOL(dmu_object_reclaim);
EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
+EXPORT_SYMBOL(dmu_object_rm_spill);
EXPORT_SYMBOL(dmu_object_free);
EXPORT_SYMBOL(dmu_object_next);
EXPORT_SYMBOL(dmu_object_zapify);
diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
index fc5d47f5f..976b1bd46 100644
--- a/module/zfs/dmu_recv.c
+++ b/module/zfs/dmu_recv.c
@@ -274,6 +274,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* embedded data is incompatible with encryption and raw recv */
if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
return (SET_ERROR(EINVAL));
+
+ /* raw receives require spill block allocation flag */
+ if (!(flags & DRR_FLAG_SPILL_BLOCK))
+ return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
} else {
dsflags |= DS_HOLD_FLAG_DECRYPT;
}
@@ -615,8 +619,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
(void) snprintf(recvname, sizeof (recvname), "%s/%s",
tofs, recv_clone_name);
- if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0)
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ /* raw receives require spill block allocation flag */
+ if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK))
+ return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
+ } else {
dsflags |= DS_HOLD_FLAG_DECRYPT;
+ }
if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
/* %recv does not exist; continue in tofs */
@@ -764,6 +773,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
return (SET_ERROR(EINVAL));
}
+ if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)
+ drc->drc_spill = B_TRUE;
+
drba.drba_origin = origin;
drba.drba_cookie = drc;
drba.drba_cred = CRED();
@@ -835,7 +847,8 @@ struct receive_writer_arg {
/* A map from guid to dataset to help handle dedup'd streams. */
avl_tree_t *guid_to_ds_map;
boolean_t resumable;
- boolean_t raw;
+ boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */
+ boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
uint64_t last_object;
uint64_t last_offset;
uint64_t max_object; /* highest object ID referenced in stream */
@@ -1151,10 +1164,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_raw_bonuslen)
return (SET_ERROR(EINVAL));
} else {
- if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 ||
- drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 ||
- drro->drr_nblkptr != 0)
+ /*
+ * The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN
+ * record indicates this by setting DRR_FLAG_SPILL_BLOCK.
+ */
+ if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) ||
+ (!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) {
+ return (SET_ERROR(EINVAL));
+ }
+
+ if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 ||
+ drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) {
return (SET_ERROR(EINVAL));
+ }
}
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
@@ -1312,7 +1334,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
if (object == DMU_NEW_OBJECT) {
- /* currently free, want to be allocated */
+ /* Currently free, wants to be allocated */
err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
@@ -1321,11 +1343,19 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_blksz != doi.doi_data_block_size ||
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
- /* currently allocated, but with different properties */
+ /* Currently allocated, but with different properties */
err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
- dn_slots << DNODE_SHIFT, tx);
+ dn_slots << DNODE_SHIFT, rwa->spill ?
+ DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
+ } else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) {
+ /*
+ * Currently allocated, the existing version of this object
+ * may reference a spill block that is no longer allocated
+ * at the source and needs to be freed.
+ */
+ err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx);
}
if (err != 0) {
@@ -1665,6 +1695,17 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
return (SET_ERROR(EINVAL));
+ /*
+ * This is an unmodified spill block which was added to the stream
+ * to resolve an issue with incorrectly removing spill blocks. It
+ * should be ignored by current versions of the code which support
+ * the DRR_FLAG_SPILL_BLOCK flag.
+ */
+ if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
+ dmu_return_arcbuf(abuf);
+ return (0);
+ }
+
if (rwa->raw) {
if (!DMU_OT_IS_VALID(drrs->drr_type) ||
drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
@@ -1699,9 +1740,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
return (err);
}
- if (db_spill->db_size < drrs->drr_length)
+ /*
+ * Spill blocks may both grow and shrink. When a change in size
+ * occurs any existing dbuf must be updated to match the logical
+ * size of the provided arc_buf_t.
+ */
+ if (db_spill->db_size != drrs->drr_length) {
+ dmu_buf_will_fill(db_spill, tx);
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
+ }
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
@@ -2575,6 +2623,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
rwa->byteswap = drc->drc_byteswap;
rwa->resumable = drc->drc_resumable;
rwa->raw = drc->drc_raw;
+ rwa->spill = drc->drc_spill;
rwa->os->os_raw_receive = drc->drc_raw;
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index ad64d666b..a6ff5ce3e 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -64,6 +64,8 @@ int zfs_send_corrupt_data = B_FALSE;
int zfs_send_queue_length = SPA_MAXBLOCKSIZE;
/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
int zfs_send_set_freerecords_bit = B_TRUE;
+/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
+int zfs_send_unmodified_spill_blocks = B_TRUE;
/*
* Use this to override the recordsize calculation for fast zfs send estimates.
@@ -99,6 +101,8 @@ typedef struct dump_bytes_io {
int dbi_len;
} dump_bytes_io_t;
+static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
+
static void
dump_bytes_cb(void *arg)
{
@@ -436,6 +440,12 @@ dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
+ /* See comment in dump_dnode() for full details */
+ if (zfs_send_unmodified_spill_blocks &&
+ (bp->blk_birth <= dsp->dsa_fromtxg)) {
+ drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
+ }
+
/* handle raw send fields */
if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
ASSERT(BP_IS_PROTECTED(bp));
@@ -587,6 +597,14 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
}
}
+ /*
+ * DRR_OBJECT_SPILL is set for every dnode which references a
+ * spill block. This allows the receiving pool to definitively
+ * determine when a spill block should be kept or freed.
+ */
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
+ drro->drr_flags |= DRR_OBJECT_SPILL;
+
if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
return (SET_ERROR(EINTR));
@@ -594,8 +612,34 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
return (SET_ERROR(EINTR));
+
+ /*
+ * Send DRR_SPILL records for unmodified spill blocks. This is useful
+ * because changing certain attributes of the object (e.g. blocksize)
+ * can cause old versions of ZFS to incorrectly remove a spill block.
+ * Including these records in the stream forces an up to date version
+ * to always be written ensuring they're never lost. Current versions
+ * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
+ * ignore these unmodified spill blocks.
+ */
+ if (zfs_send_unmodified_spill_blocks &&
+ (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
+ (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
+ struct send_block_record record;
+
+ bzero(&record, sizeof (struct send_block_record));
+ record.eos_marker = B_FALSE;
+ record.bp = *DN_SPILL_BLKPTR(dnp);
+ SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
+ object, 0, DMU_SPILL_BLKID);
+
+ if (do_dump(dsp, &record) != 0)
+ return (SET_ERROR(EINTR));
+ }
+
if (dsp->dsa_err != 0)
return (SET_ERROR(EINTR));
+
return (0);
}
@@ -1036,6 +1080,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
/* raw send implies compressok */
if (compressok || rawok)
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
+
if (rawok && os->os_encrypted)
featureflags |= DMU_BACKUP_FEATURE_RAW;
@@ -1064,6 +1109,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
if (zfs_send_set_freerecords_bit)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
+ drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
+
if (ancestor_zb != NULL) {
drr->drr_u.drr_begin.drr_fromguid =
ancestor_zb->zbm_guid;
@@ -1084,6 +1131,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_os = os;
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+ dsp->dsa_fromtxg = fromtxg;
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_featureflags = featureflags;
dsp->dsa_resume_object = resumeobj;
@@ -1552,4 +1600,8 @@ MODULE_PARM_DESC(zfs_send_corrupt_data, "Allow sending corrupt data");
module_param(zfs_send_queue_length, int, 0644);
MODULE_PARM_DESC(zfs_send_queue_length, "Maximum send queue length");
+
+module_param(zfs_send_unmodified_spill_blocks, int, 0644);
+MODULE_PARM_DESC(zfs_send_unmodified_spill_blocks,
+ "Send unmodified spill blocks");
#endif
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 78a90f68f..38ec646ba 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -660,7 +660,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
void
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots,
+ boolean_t keep_spill, dmu_tx_t *tx)
{
int nblkptr;
@@ -710,7 +711,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = bonustype;
if (dn->dn_nblkptr != nblkptr)
dn->dn_next_nblkptr[tx->tx_txg & TXG_MASK] = nblkptr;
- if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) {
dbuf_rm_spill(dn, tx);
dnode_rm_spill(dn, tx);
}
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 746d42a22..8219cf42b 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -807,8 +807,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy',
'send_encrypted_props', 'send_encrypted_truncated_files',
'send_freeobjects', 'send_realloc_dnode_size', 'send_realloc_files',
- 'send_realloc_encrypted_files', 'send_holds', 'send_hole_birth',
- 'send_mixed_raw', 'send-wDR_encrypted_zvol']
+ 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
+ 'send_hole_birth', 'send_mixed_raw', 'send-wDR_encrypted_zvol']
tags = ['functional', 'rsend']
[tests/functional/scrub_mirror]
diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
index 8669a51fb..585018ac2 100644
--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
+++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
@@ -44,6 +44,7 @@ dist_pkgdata_SCRIPTS = \
send_realloc_dnode_size.ksh \
send_realloc_files.ksh \
send_realloc_encrypted_files.ksh \
+ send_spill_block.ksh \
send_holds.ksh \
send_hole_birth.ksh \
send_mixed_raw.ksh \
diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
index 2ef6775e6..521a1c7eb 100644
--- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
+++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
@@ -30,6 +30,7 @@
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
. $STF_SUITE/tests/functional/rsend/rsend.cfg
#
@@ -518,9 +519,13 @@ function churn_files
value=$((RANDOM % 5))
if [ $value -eq 0 -a $xattrs -ne 0 ]; then
attrname="testattr$((RANDOM % 3))"
+ attrlen="$(((RANDOM % 1000) + 1))"
+ attrvalue="$(random_string VALID_NAME_CHAR \
+ $attrlen)"
attr -qr $attrname $file_name || \
log_fail "Failed to remove $attrname"
- attr -qs $attrname -V TestValue $file_name || \
+ attr -qs $attrname \
+ -V "$attrvalue" $file_name || \
log_fail "Failed to set $attrname"
elif [ $value -eq 1 ]; then
dd if=/dev/urandom of=$file_name \
@@ -548,9 +553,12 @@ function churn_files
if [ $xattrs -ne 0 ]; then
for j in {0..2}; do
attrname="testattr$j"
- attr -qs $attrname -V TestValue \
- $file_name || log_fail \
- "Failed to set $attrname"
+ attrlen="$(((RANDOM % 1000) + 1))"
+ attrvalue="$(random_string \
+ VALID_NAME_CHAR $attrlen)"
+ attr -qs $attrname \
+ -V "$attrvalue" $file_name || \
+ log_fail "Failed to set $attrname"
done
fi
fi
@@ -791,10 +799,11 @@ function rand_set_prop
log_must eval "zfs set $prop='$value' $dtst"
}
-# Generate a recursive checksum of a filesystems contents. Only file
-# data is included in the checksum (no meta data, or xattrs).
+# Generate a recursive checksum of a filesystem which includes the file
+# contents and any associated xattrs.
function recursive_cksum
{
- find $1 -type f -exec sha256sum {} \; | \
+ find $1 -type f -exec sh -c 'sha256sum {}; getfattr \
+ --absolute-names --only-values -d {} | sha256sum' \; | \
sort -k 2 | awk '{ print $1 }' | sha256sum
}
diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh
index 0649beaa3..3c3de86d9 100755
--- a/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh
@@ -65,7 +65,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}"
# Set atime=off to prevent the recursive_cksum from modifying newfs.
log_must zfs set atime=off $POOL/newfs
-for i in {1..5}; do
+# Due to reduced performance on debug kernels use fewer files by default.
+if is_kmemleak; then
+ nr_files=100
+ passes=2
+else
+ nr_files=1000
+ passes=3
+fi
+
+for i in {1..$passes}; do
# Randomly modify several dataset properties in order to generate
# more interesting incremental send streams.
rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
@@ -76,12 +85,8 @@ for i in {1..5}; do
# Churn the filesystem in such a way that we're likely to be both
# allocating and reallocating objects in the incremental stream.
- #
- # Disable xattrs until the following spill block issue is resolved:
- # https://github.com/openzfs/openzfs/pull/705
- #
- log_must churn_files 1000 524288 $POOL/fs 0
- expected_cksum=$(recursive_cksum /$fs)
+ log_must churn_files $nr_files 524288 $POOL/fs
+ expected_cksum=$(recursive_cksum /$POOL/fs)
# Create a snapshot and use it to send an incremental stream.
this_snap=$((last_snap + 1))
diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh
index 80464e05e..4b89a73d8 100755
--- a/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_files.ksh
@@ -35,6 +35,8 @@
# e) Destroy the incremental stream and old snapshot.
#
+verify_runnable "both"
+
log_assert "Verify incremental receive handles reallocation"
function cleanup
@@ -56,7 +58,16 @@ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}"
# Set atime=off to prevent the recursive_cksum from modifying newfs.
log_must zfs set atime=off $POOL/newfs
-for i in {1..5}; do
+# Due to reduced performance on debug kernels use fewer files by default.
+if is_kmemleak; then
+ nr_files=100
+ passes=2
+else
+ nr_files=1000
+ passes=3
+fi
+
+for i in {1..$passes}; do
# Randomly modify several dataset properties in order to generate
# more interesting incremental send streams.
rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256"
@@ -67,8 +78,8 @@ for i in {1..5}; do
# Churn the filesystem in such a way that we're likely to be both
# allocating and reallocating objects in the incremental stream.
- log_must churn_files 1000 524288 $POOL/fs
- expected_cksum=$(recursive_cksum /$fs)
+ log_must churn_files $nr_files 524288 $POOL/fs
+ expected_cksum=$(recursive_cksum /$POOL/fs)
# Create a snapshot and use it to send an incremental stream.
this_snap=$((last_snap + 1))
diff --git a/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh b/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
new file mode 100755
index 000000000..9de732e22
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
@@ -0,0 +1,155 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify spill blocks are correctly preserved.
+#
+# Strategy:
+# 1) Create a set of files each containing some file data.
+# 2) Add enough xattrs to the file to require a spill block.
+# 3) Snapshot and send these files to a new dataset.
+# 4) Modify the files and spill blocks in a variety of ways.
+# 5) Send the changes using an incremental send stream.
+# 6) Verify that all the xattrs (and thus the spill block) were
+# preserved when receiving the incremental stream.
+#
+
+verify_runnable "both"
+
+log_assert "Verify spill blocks are correctly preserved"
+
+function cleanup
+{
+ rm -f $BACKDIR/fs@*
+ destroy_dataset $POOL/fs "-rR"
+ destroy_dataset $POOL/newfs "-rR"
+}
+
+attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
+
+log_onexit cleanup
+
+log_must zfs create $POOL/fs
+log_must zfs set xattr=sa $POOL/fs
+log_must zfs set dnodesize=legacy $POOL/fs
+log_must zfs set recordsize=128k $POOL/fs
+
+# Create 40 files each with a spill block containing xattrs. Each file
+# will be modified in a different way to validate the incremental receive.
+for i in {1..40}; do
+ file="/$POOL/fs/file$i"
+
+ log_must mkfile 16384 $file
+ for j in {1..20}; do
+ log_must attr -qs "testattr$j" -V "$attrvalue" $file
+ done
+done
+
+# Snapshot the pool and send it to the new dataset.
+log_must zfs snapshot $POOL/fs@snap1
+log_must eval "zfs send -e $POOL/fs@snap1 >$BACKDIR/fs@snap1"
+log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap1"
+
+#
+# Modify file[1-6]'s contents but not the spill blocks.
+#
+# file1 - Increase record size; single block
+# file2 - Increase record size; multiple blocks
+# file3 - Truncate file to zero size; single block
+# file4 - Truncate file to smaller size; single block
+# file5 - Truncate file to much larger size; add holes
+# file6 - Truncate file to embedded size; embedded data
+#
+log_must mkfile 32768 /$POOL/fs/file1
+log_must mkfile 1048576 /$POOL/fs/file2
+log_must truncate -s 0 /$POOL/fs/file3
+log_must truncate -s 8192 /$POOL/fs/file4
+log_must truncate -s 1073741824 /$POOL/fs/file5
+log_must truncate -s 50 /$POOL/fs/file6
+
+#
+# Modify file[11-16]'s contents and their spill blocks.
+#
+# file11 - Increase record size; single block
+# file12 - Increase record size; multiple blocks
+# file13 - Truncate file to zero size; single block
+# file14 - Truncate file to smaller size; single block
+# file15 - Truncate file to much larger size; add holes
+# file16 - Truncate file to embedded size; embedded data
+#
+log_must mkfile 32768 /$POOL/fs/file11
+log_must mkfile 1048576 /$POOL/fs/file12
+log_must truncate -s 0 /$POOL/fs/file13
+log_must truncate -s 8192 /$POOL/fs/file14
+log_must truncate -s 1073741824 /$POOL/fs/file15
+log_must truncate -s 50 /$POOL/fs/file16
+
+for i in {11..20}; do
+ log_must attr -qr testattr1 /$POOL/fs/file$i
+done
+
+#
+# Modify file[21-26]'s contents and remove their spill blocks.
+#
+# file21 - Increase record size; single block
+# file22 - Increase record size; multiple blocks
+# file23 - Truncate file to zero size; single block
+# file24 - Truncate file to smaller size; single block
+# file25 - Truncate file to much larger size; add holes
+# file26 - Truncate file to embedded size; embedded data
+#
+log_must mkfile 32768 /$POOL/fs/file21
+log_must mkfile 1048576 /$POOL/fs/file22
+log_must truncate -s 0 /$POOL/fs/file23
+log_must truncate -s 8192 /$POOL/fs/file24
+log_must truncate -s 1073741824 /$POOL/fs/file25
+log_must truncate -s 50 /$POOL/fs/file26
+
+for i in {21..30}; do
+ for j in {1..20}; do
+ log_must attr -qr testattr$j /$POOL/fs/file$i
+ done
+done
+
+#
+# Modify file[31-40]'s spill blocks but not the file contents.
+#
+for i in {31..40}; do
+ file="/$POOL/fs/file$i"
+ log_must attr -qr testattr$(((RANDOM % 20) + 1)) $file
+ log_must attr -qs testattr$(((RANDOM % 20) + 1)) -V "$attrvalue" $file
+done
+
+# Calculate the expected recursive checksum for the source.
+expected_cksum=$(recursive_cksum /$POOL/fs)
+
+# Snapshot the pool and send the incremental snapshot.
+log_must zfs snapshot $POOL/fs@snap2
+log_must eval "zfs send -e -i $POOL/fs@snap1 $POOL/fs@snap2 >$BACKDIR/fs@snap2"
+log_must eval "zfs recv -F $POOL/newfs < $BACKDIR/fs@snap2"
+
+# Validate the received copy using the received recursive checksum.
+actual_cksum=$(recursive_cksum /$POOL/newfs)
+if [[ "$expected_cksum" != "$actual_cksum" ]]; then
+ log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
+fi
+
+log_pass "Verify spill blocks are correctly preserved"