summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zhack/zhack.c4
-rw-r--r--include/sys/dmu.h13
-rw-r--r--include/sys/dmu_objset.h2
-rw-r--r--include/sys/fs/zfs.h1
-rw-r--r--include/sys/zfs_ioctl.h18
-rw-r--r--include/sys/zfs_quota.h29
-rw-r--r--lib/libzfs/libzfs_sendrecv.c6
-rw-r--r--module/os/freebsd/zfs/zfs_vfsops.c2
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c2
-rw-r--r--module/zfs/dmu_objset.c49
-rw-r--r--module/zfs/dmu_recv.c410
-rw-r--r--module/zfs/zfs_quota.c61
-rw-r--r--tests/runfiles/common.run2
-rw-r--r--tests/zfs-tests/tests/functional/rsend/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh65
15 files changed, 500 insertions, 165 deletions
diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c
index bb974133d..4d958fe43 100644
--- a/cmd/zhack/zhack.c
+++ b/cmd/zhack/zhack.c
@@ -103,8 +103,8 @@ fatal(spa_t *spa, void *tag, const char *fmt, ...)
/* ARGSUSED */
static int
-space_delta_cb(dmu_object_type_t bonustype, void *data,
- uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
+space_delta_cb(dmu_object_type_t bonustype, const void *data,
+ zfs_file_info_t *zoi)
{
/*
* Is it a valid type of object to track?
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 139f3cbdf..5174bdc45 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -1013,10 +1013,17 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
-typedef int objset_used_cb_t(dmu_object_type_t bonustype,
- void *bonus, uint64_t *userp, uint64_t *groupp, uint64_t *projectp);
+typedef struct zfs_file_info {
+ uint64_t zfi_user;
+ uint64_t zfi_group;
+ uint64_t zfi_project;
+ uint64_t zfi_generation;
+} zfs_file_info_t;
+
+typedef int file_info_cb_t(dmu_object_type_t bonustype, const void *data,
+ struct zfs_file_info *zoi);
extern void dmu_objset_register_type(dmu_objset_type_t ost,
- objset_used_cb_t *cb);
+ file_info_cb_t *cb);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
extern void *dmu_objset_get_user(objset_t *os);
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index 9b6614e98..a77131ef1 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -254,6 +254,8 @@ boolean_t dmu_objset_projectquota_enabled(objset_t *os);
boolean_t dmu_objset_projectquota_present(objset_t *os);
boolean_t dmu_objset_projectquota_upgradable(objset_t *os);
void dmu_objset_id_quota_upgrade(objset_t *os);
+int dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype,
+ const void *data, zfs_file_info_t *zfi);
int dmu_fsname(const char *snapname, char *buf);
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index ecdfd42d0..575a4af51 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1336,6 +1336,7 @@ typedef enum {
ZFS_ERR_EXPORT_IN_PROGRESS,
ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR,
ZFS_ERR_STREAM_TRUNCATED,
+ ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH,
} zfs_errno_t;
/*
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index d4ffe70bb..78d33deda 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -107,6 +107,22 @@ typedef enum drr_headertype {
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
/* flag #25 is reserved for the ZSTD compression feature */
#define DMU_BACKUP_FEATURE_HOLDS (1 << 26)
+/*
+ * The SWITCH_TO_LARGE_BLOCKS feature indicates that we can receive
+ * incremental LARGE_BLOCKS streams (those with WRITE records of >128KB) even
+ * if the previous send did not use LARGE_BLOCKS, and thus its large blocks
+ * were split into multiple 128KB WRITE records. (See
+ * flush_write_batch_impl() and receive_object()). Older software that does
+ * not support this flag may encounter a bug when switching to large blocks,
+ * which causes files to incorrectly be zeroed.
+ *
+ * This flag is currently not set on any send streams. In the future, we
+ * intend for incremental send streams of snapshots that have large blocks to
+ * use LARGE_BLOCKS by default, and these streams will also have the
+ * SWITCH_TO_LARGE_BLOCKS feature set. This ensures that streams from the
+ * default use of "zfs send" won't encounter the bug mentioned above.
+ */
+#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
/*
* Mask of all supported backup features
@@ -116,7 +132,7 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
- DMU_BACKUP_FEATURE_REDACTED)
+ DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
diff --git a/include/sys/zfs_quota.h b/include/sys/zfs_quota.h
index ec4dc8f16..b215b8dd0 100644
--- a/include/sys/zfs_quota.h
+++ b/include/sys/zfs_quota.h
@@ -24,23 +24,22 @@
#include <sys/dmu.h>
#include <sys/fs/zfs.h>
-#include <sys/zfs_vfsops.h>
-extern int zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
- uint64_t *userp, uint64_t *groupp, uint64_t *projectp);
+struct zfsvfs;
+struct zfs_file_info_t;
-extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
- const char *domain, uint64_t rid, uint64_t *valuep);
-extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
- uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
-extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
- const char *domain, uint64_t rid, uint64_t quota);
+extern int zpl_get_file_info(dmu_object_type_t,
+ const void *, struct zfs_file_info *);
-extern boolean_t zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
- uint64_t id);
-extern boolean_t zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
- uint64_t id);
-extern boolean_t zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
- uint64_t id);
+extern int zfs_userspace_one(struct zfsvfs *, zfs_userquota_prop_t,
+ const char *, uint64_t, uint64_t *);
+extern int zfs_userspace_many(struct zfsvfs *, zfs_userquota_prop_t,
+ uint64_t *, void *, uint64_t *);
+extern int zfs_set_userquota(struct zfsvfs *, zfs_userquota_prop_t,
+ const char *, uint64_t, uint64_t);
+
+extern boolean_t zfs_id_overobjquota(struct zfsvfs *, uint64_t, uint64_t);
+extern boolean_t zfs_id_overblockquota(struct zfsvfs *, uint64_t, uint64_t);
+extern boolean_t zfs_id_overquota(struct zfsvfs *, uint64_t, uint64_t);
#endif
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 39b5c6df1..3fffc426c 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -4802,6 +4802,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
ioctl_err == ECKSUM);
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
+ case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "incremental send stream requires -L "
+ "(--large-block), to match previous receive."));
+ (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+ break;
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded to receive this stream."));
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index 3c37d3faa..317773e44 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -2202,7 +2202,7 @@ zfs_init(void)
*/
zfs_vnodes_adjust();
- dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
+ dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
}
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 28ea34a00..ea5971b0c 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -2131,7 +2131,7 @@ zfs_init(void)
{
zfsctl_init();
zfs_znode_init();
- dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
+ dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
register_filesystem(&zpl_fs_type);
}
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 16b93a4fe..d305cee40 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1728,19 +1728,29 @@ dmu_objset_is_dirty(objset_t *os, uint64_t txg)
return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
}
-static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES];
void
-dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+dmu_objset_register_type(dmu_objset_type_t ost, file_info_cb_t *cb)
{
- used_cbs[ost] = cb;
+ file_cbs[ost] = cb;
+}
+
+int
+dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype, const void *data,
+ zfs_file_info_t *zfi)
+{
+ file_info_cb_t *cb = file_cbs[os->os_phys->os_type];
+ if (cb == NULL)
+ return (EINVAL);
+ return (cb(bonustype, data, zfi));
}
boolean_t
dmu_objset_userused_enabled(objset_t *os)
{
return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
- used_cbs[os->os_phys->os_type] != NULL &&
+ file_cbs[os->os_phys->os_type] != NULL &&
DMU_USERUSED_DNODE(os) != NULL);
}
@@ -1754,7 +1764,7 @@ dmu_objset_userobjused_enabled(objset_t *os)
boolean_t
dmu_objset_projectquota_enabled(objset_t *os)
{
- return (used_cbs[os->os_phys->os_type] != NULL &&
+ return (file_cbs[os->os_phys->os_type] != NULL &&
DMU_PROJECTUSED_DNODE(os) != NULL &&
spa_feature_is_enabled(os->os_spa, SPA_FEATURE_PROJECT_QUOTA));
}
@@ -2089,9 +2099,6 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
objset_t *os = dn->dn_objset;
void *data = NULL;
dmu_buf_impl_t *db = NULL;
- uint64_t *user = NULL;
- uint64_t *group = NULL;
- uint64_t *project = NULL;
int flags = dn->dn_id_flags;
int error;
boolean_t have_spill = B_FALSE;
@@ -2145,23 +2152,23 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
return;
}
- if (before) {
- ASSERT(data);
- user = &dn->dn_olduid;
- group = &dn->dn_oldgid;
- project = &dn->dn_oldprojid;
- } else if (data) {
- user = &dn->dn_newuid;
- group = &dn->dn_newgid;
- project = &dn->dn_newprojid;
- }
-
/*
* Must always call the callback in case the object
* type has changed and that type isn't an object type to track
*/
- error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
- user, group, project);
+ zfs_file_info_t zfi;
+ error = file_cbs[os->os_phys->os_type](dn->dn_bonustype, data, &zfi);
+
+ if (before) {
+ ASSERT(data);
+ dn->dn_olduid = zfi.zfi_user;
+ dn->dn_oldgid = zfi.zfi_group;
+ dn->dn_oldprojid = zfi.zfi_project;
+ } else if (data) {
+ dn->dn_newuid = zfi.zfi_user;
+ dn->dn_newgid = zfi.zfi_group;
+ dn->dn_newprojid = zfi.zfi_project;
+ }
/*
* Preserve existing uid/gid when the callback can't determine
diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
index 29fbe854d..17ebeb682 100644
--- a/module/zfs/dmu_recv.c
+++ b/module/zfs/dmu_recv.c
@@ -104,6 +104,7 @@ struct receive_writer_arg {
boolean_t resumable;
boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */
boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
+ boolean_t full; /* this is a full send stream */
uint64_t last_object;
uint64_t last_offset;
uint64_t max_object; /* highest object ID referenced in stream */
@@ -333,6 +334,21 @@ redact_check(dmu_recv_begin_arg_t *drba, dsl_dataset_t *origin)
return (ret);
}
+/*
+ * If we previously received a stream with --large-block, we don't support
+ * receiving an incremental on top of it without --large-block. This avoids
+ * forcing a read-modify-write or trying to re-aggregate a string of WRITE
+ * records.
+ */
+static int
+recv_check_large_blocks(dsl_dataset_t *ds, uint64_t featureflags)
+{
+ if (dsl_dataset_feature_is_active(ds, SPA_FEATURE_LARGE_BLOCKS) &&
+ !(featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS))
+ return (SET_ERROR(ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH));
+ return (0);
+}
+
static int
recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
uint64_t fromguid, uint64_t featureflags)
@@ -445,6 +461,12 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
return (SET_ERROR(EINVAL));
}
+ error = recv_check_large_blocks(snap, featureflags);
+ if (error != 0) {
+ dsl_dataset_rele(snap, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele(snap, FTAG);
} else {
/* if full, then must be forced */
@@ -479,7 +501,6 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
}
return (0);
-
}
/*
@@ -725,6 +746,13 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
}
}
+ error = recv_check_large_blocks(ds, featureflags);
+ if (error != 0) {
+ dsl_dataset_rele_flags(origin, dsflags, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele_flags(origin, dsflags, FTAG);
}
@@ -1050,6 +1078,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
}
}
}
+
+ error = recv_check_large_blocks(ds, drc->drc_featureflags);
+ if (error != 0) {
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (0);
}
@@ -1289,14 +1324,251 @@ save_resume_state(struct receive_writer_arg *rwa,
rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
}
+static int
+receive_object_is_same_generation(objset_t *os, uint64_t object,
+ dmu_object_type_t old_bonus_type, dmu_object_type_t new_bonus_type,
+ const void *new_bonus, boolean_t *samegenp)
+{
+ zfs_file_info_t zoi;
+ int err;
+
+ dmu_buf_t *old_bonus_dbuf;
+ err = dmu_bonus_hold(os, object, FTAG, &old_bonus_dbuf);
+ if (err != 0)
+ return (err);
+ err = dmu_get_file_info(os, old_bonus_type, old_bonus_dbuf->db_data,
+ &zoi);
+ dmu_buf_rele(old_bonus_dbuf, FTAG);
+ if (err != 0)
+ return (err);
+ uint64_t old_gen = zoi.zfi_generation;
+
+ err = dmu_get_file_info(os, new_bonus_type, new_bonus, &zoi);
+ if (err != 0)
+ return (err);
+ uint64_t new_gen = zoi.zfi_generation;
+
+ *samegenp = (old_gen == new_gen);
+ return (0);
+}
+
+static int
+receive_handle_existing_object(const struct receive_writer_arg *rwa,
+ const struct drr_object *drro, const dmu_object_info_t *doi,
+ const void *bonus_data,
+ uint64_t *object_to_hold, uint32_t *new_blksz)
+{
+ uint32_t indblksz = drro->drr_indblkshift ?
+ 1ULL << drro->drr_indblkshift : 0;
+ int nblkptr = deduce_nblkptr(drro->drr_bonustype,
+ drro->drr_bonuslen);
+ uint8_t dn_slots = drro->drr_dn_slots != 0 ?
+ drro->drr_dn_slots : DNODE_MIN_SLOTS;
+ boolean_t do_free_range = B_FALSE;
+ int err;
+
+ *object_to_hold = drro->drr_object;
+
+ /* nblkptr should be bounded by the bonus size and type */
+ if (rwa->raw && nblkptr != drro->drr_nblkptr)
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * After the previous send stream, the sending system may
+ * have freed this object, and then happened to re-allocate
+ * this object number in a later txg. In this case, we are
+ * receiving a different logical file, and the block size may
+ * appear to be different. i.e. we may have a different
+ * block size for this object than what the send stream says.
+ * In this case we need to remove the object's contents,
+ * so that its structure can be changed and then its contents
+ * entirely replaced by subsequent WRITE records.
+ *
+ * If this is a -L (--large-block) incremental stream, and
+ * the previous stream was not -L, the block size may appear
+ * to increase. i.e. we may have a smaller block size for
+ * this object than what the send stream says. In this case
+ * we need to keep the object's contents and block size
+ * intact, so that we don't lose parts of the object's
+ * contents that are not changed by this incremental send
+ * stream.
+ *
+ * We can distinguish between the two above cases by using
+ * the ZPL's generation number (see
+ * receive_object_is_same_generation()). However, we only
+ * want to rely on the generation number when absolutely
+ * necessary, because with raw receives, the generation is
+ * encrypted. We also want to minimize dependence on the
+ * ZPL, so that other types of datasets can also be received
+ * (e.g. ZVOLs, although note that ZVOLS currently do not
+ * reallocate their objects or change their structure).
+ * Therefore, we check a number of different cases where we
+ * know it is safe to discard the object's contents, before
+ * using the ZPL's generation number to make the above
+ * distinction.
+ */
+ if (drro->drr_blksz != doi->doi_data_block_size) {
+ if (rwa->raw) {
+ /*
+ * RAW streams always have large blocks, so
+ * we are sure that the data is not needed
+ * due to changing --large-block to be on.
+ * Which is fortunate since the bonus buffer
+ * (which contains the ZPL generation) is
+ * encrypted, and the key might not be
+ * loaded.
+ */
+ do_free_range = B_TRUE;
+ } else if (rwa->full) {
+ /*
+ * This is a full send stream, so it always
+ * replaces what we have. Even if the
+ * generation numbers happen to match, this
+ * can not actually be the same logical file.
+ * This is relevant when receiving a full
+ * send as a clone.
+ */
+ do_free_range = B_TRUE;
+ } else if (drro->drr_type !=
+ DMU_OT_PLAIN_FILE_CONTENTS ||
+ doi->doi_type != DMU_OT_PLAIN_FILE_CONTENTS) {
+ /*
+ * PLAIN_FILE_CONTENTS are the only type of
+ * objects that have ever been stored with
+ * large blocks, so we don't need the special
+ * logic below. ZAP blocks can shrink (when
+ * there's only one block), so we don't want
+ * to hit the error below about block size
+ * only increasing.
+ */
+ do_free_range = B_TRUE;
+ } else if (doi->doi_max_offset <=
+ doi->doi_data_block_size) {
+ /*
+ * There is only one block. We can free it,
+ * because its contents will be replaced by a
+ * WRITE record. This can not be the no-L ->
+ * -L case, because the no-L case would have
+ * resulted in multiple blocks. If we
+ * supported -L -> no-L, it would not be safe
+ * to free the file's contents. Fortunately,
+ * that is not allowed (see
+ * recv_check_large_blocks()).
+ */
+ do_free_range = B_TRUE;
+ } else {
+ boolean_t is_same_gen;
+ err = receive_object_is_same_generation(rwa->os,
+ drro->drr_object, doi->doi_bonus_type,
+ drro->drr_bonustype, bonus_data, &is_same_gen);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (is_same_gen) {
+ /*
+ * This is the same logical file, and
+ * the block size must be increasing.
+ * It could only decrease if
+ * --large-block was changed to be
+ * off, which is checked in
+ * recv_check_large_blocks().
+ */
+ if (drro->drr_blksz <=
+ doi->doi_data_block_size)
+ return (SET_ERROR(EINVAL));
+ /*
+ * We keep the existing blocksize and
+ * contents.
+ */
+ *new_blksz =
+ doi->doi_data_block_size;
+ } else {
+ do_free_range = B_TRUE;
+ }
+ }
+ }
+
+ /* nblkptr can only decrease if the object was reallocated */
+ if (nblkptr < doi->doi_nblkptr)
+ do_free_range = B_TRUE;
+
+ /* number of slots can only change on reallocation */
+ if (dn_slots != doi->doi_dnodesize >> DNODE_SHIFT)
+ do_free_range = B_TRUE;
+
+ /*
+ * For raw sends we also check a few other fields to
+ * ensure we are preserving the objset structure exactly
+ * as it was on the receive side:
+ * - A changed indirect block size
+ * - A smaller nlevels
+ */
+ if (rwa->raw) {
+ if (indblksz != doi->doi_metadata_block_size)
+ do_free_range = B_TRUE;
+ if (drro->drr_nlevels < doi->doi_indirection)
+ do_free_range = B_TRUE;
+ }
+
+ if (do_free_range) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ 0, DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * The dmu does not currently support decreasing nlevels
+ * or changing the number of dnode slots on an object. For
+ * non-raw sends, this does not matter and the new object
+ * can just use the previous one's nlevels. For raw sends,
+ * however, the structure of the received dnode (including
+ * nlevels and dnode slots) must match that of the send
+ * side. Therefore, instead of using dmu_object_reclaim(),
+ * we must free the object completely and call
+ * dmu_object_claim_dnsize() instead.
+ */
+ if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) ||
+ dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
+ err = dmu_free_long_object(rwa->os, drro->drr_object);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+
+ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+ *object_to_hold = DMU_NEW_OBJECT;
+ }
+
+ /*
+ * For raw receives, free everything beyond the new incoming
+ * maxblkid. Normally this would be done with a DRR_FREE
+ * record that would come after this DRR_OBJECT record is
+ * processed. However, for raw receives we manually set the
+ * maxblkid from the drr_maxblkid and so we must first free
+ * everything above that blkid to ensure the DMU is always
+ * consistent with itself. We will never free the first block
+ * of the object here because a maxblkid of 0 could indicate
+ * an object with a single block or one with no blocks. This
+ * free may be skipped when dmu_free_long_range() was called
+ * above since it covers the entire object's contents.
+ */
+ if (rwa->raw && *object_to_hold != DMU_NEW_OBJECT && !do_free_range) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ (drro->drr_maxblkid + 1) * doi->doi_data_block_size,
+ DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
+ return (0);
+}
+
noinline static int
receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
void *data)
{
dmu_object_info_t doi;
dmu_tx_t *tx;
- uint64_t object;
int err;
+ uint32_t new_blksz = drro->drr_blksz;
uint8_t dn_slots = drro->drr_dn_slots != 0 ?
drro->drr_dn_slots : DNODE_MIN_SLOTS;
@@ -1360,86 +1632,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
* Raw receives will also check that the indirect structure of the
* dnode hasn't changed.
*/
+ uint64_t object_to_hold;
if (err == 0) {
- uint32_t indblksz = drro->drr_indblkshift ?
- 1ULL << drro->drr_indblkshift : 0;
- int nblkptr = deduce_nblkptr(drro->drr_bonustype,
- drro->drr_bonuslen);
- boolean_t did_free = B_FALSE;
-
- object = drro->drr_object;
-
- /* nblkptr should be bounded by the bonus size and type */
- if (rwa->raw && nblkptr != drro->drr_nblkptr)
- return (SET_ERROR(EINVAL));
-
- /*
- * Check for indicators that the object was freed and
- * reallocated. For all sends, these indicators are:
- * - A changed block size
- * - A smaller nblkptr
- * - A changed dnode size
- * For raw sends we also check a few other fields to
- * ensure we are preserving the objset structure exactly
- * as it was on the receive side:
- * - A changed indirect block size
- * - A smaller nlevels
- */
- if (drro->drr_blksz != doi.doi_data_block_size ||
- nblkptr < doi.doi_nblkptr ||
- dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
- (rwa->raw &&
- (indblksz != doi.doi_metadata_block_size ||
- drro->drr_nlevels < doi.doi_indirection))) {
- err = dmu_free_long_range(rwa->os, drro->drr_object,
- 0, DMU_OBJECT_END);
- if (err != 0)
- return (SET_ERROR(EINVAL));
- else
- did_free = B_TRUE;
- }
-
- /*
- * The dmu does not currently support decreasing nlevels
- * or changing the number of dnode slots on an object. For
- * non-raw sends, this does not matter and the new object
- * can just use the previous one's nlevels. For raw sends,
- * however, the structure of the received dnode (including
- * nlevels and dnode slots) must match that of the send
- * side. Therefore, instead of using dmu_object_reclaim(),
- * we must free the object completely and call
- * dmu_object_claim_dnsize() instead.
- */
- if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
- dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
- err = dmu_free_long_object(rwa->os, drro->drr_object);
- if (err != 0)
- return (SET_ERROR(EINVAL));
-
- txg_wait_synced(dmu_objset_pool(rwa->os), 0);
- object = DMU_NEW_OBJECT;
- }
-
- /*
- * For raw receives, free everything beyond the new incoming
- * maxblkid. Normally this would be done with a DRR_FREE
- * record that would come after this DRR_OBJECT record is
- * processed. However, for raw receives we manually set the
- * maxblkid from the drr_maxblkid and so we must first free
- * everything above that blkid to ensure the DMU is always
- * consistent with itself. We will never free the first block
- * of the object here because a maxblkid of 0 could indicate
- * an object with a single block or one with no blocks. This
- * free may be skipped when dmu_free_long_range() was called
- * above since it covers the entire object's contents.
- */
- if (rwa->raw && object != DMU_NEW_OBJECT && !did_free) {
- err = dmu_free_long_range(rwa->os, drro->drr_object,
- (drro->drr_maxblkid + 1) * doi.doi_data_block_size,
- DMU_OBJECT_END);
- if (err != 0)
- return (SET_ERROR(EINVAL));
- }
+ err = receive_handle_existing_object(rwa, drro, &doi, data,
+ &object_to_hold, &new_blksz);
} else if (err == EEXIST) {
/*
* The object requested is currently an interior slot of a
@@ -1454,10 +1650,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
return (SET_ERROR(EINVAL));
/* object was freed and we are about to allocate a new one */
- object = DMU_NEW_OBJECT;
+ object_to_hold = DMU_NEW_OBJECT;
} else {
/* object is free and we are about to allocate a new one */
- object = DMU_NEW_OBJECT;
+ object_to_hold = DMU_NEW_OBJECT;
}
/*
@@ -1492,27 +1688,27 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
tx = dmu_tx_create(rwa->os);
- dmu_tx_hold_bonus(tx, object);
- dmu_tx_hold_write(tx, object, 0, 0);
+ dmu_tx_hold_bonus(tx, object_to_hold);
+ dmu_tx_hold_write(tx, object_to_hold, 0, 0);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
- if (object == DMU_NEW_OBJECT) {
+ if (object_to_hold == DMU_NEW_OBJECT) {
/* Currently free, wants to be allocated */
err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
- drro->drr_type, drro->drr_blksz,
+ drro->drr_type, new_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
dn_slots << DNODE_SHIFT, tx);
} else if (drro->drr_type != doi.doi_type ||
- drro->drr_blksz != doi.doi_data_block_size ||
+ new_blksz != doi.doi_data_block_size ||
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
/* Currently allocated, but with different properties */
err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
- drro->drr_type, drro->drr_blksz,
+ drro->drr_type, new_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
dn_slots << DNODE_SHIFT, rwa->spill ?
DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
@@ -1578,6 +1774,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
* For non-new objects block size and indirect block
* shift cannot change and nlevels can only increase.
*/
+ ASSERT3U(new_blksz, ==, drro->drr_blksz);
VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
drro->drr_blksz, drro->drr_indblkshift, tx));
VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
@@ -1707,6 +1904,40 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
DRR_WRITE_PAYLOAD_SIZE(drrw));
}
+ /*
+ * If we are receiving an incremental large-block stream into
+ * a dataset that previously did a non-large-block receive,
+ * the WRITE record may be larger than the object's block
+ * size. dmu_assign_arcbuf_by_dnode() handles this as long
+ * as the arcbuf is not compressed, so decompress it here if
+ * necessary.
+ */
+ if (drrw->drr_logical_size != dn->dn_datablksz &&
+ arc_get_compression(abuf) != ZIO_COMPRESS_OFF) {
+ ASSERT3U(drrw->drr_logical_size, >, dn->dn_datablksz);
+ zbookmark_phys_t zb = {
+ .zb_objset = dmu_objset_id(rwa->os),
+ .zb_object = rwa->last_object,
+ .zb_level = 0,
+ .zb_blkid =
+ drrw->drr_offset >> dn->dn_datablkshift,
+ };
+
+ /*
+ * The size of loaned arc bufs is counted in
+ * arc_loaned_bytes. When we untransform
+ * (decompress) the buf, its size increases. To
+ * ensure that arc_loaned_bytes remains accurate, we
+ * need to return (un-loan) the buf (with its
+ * compressed size) and then re-loan it (with its
+ * new, uncompressed size).
+ */
+ arc_return_buf(abuf, FTAG);
+ VERIFY0(arc_untransform(abuf, dmu_objset_spa(rwa->os),
+ &zb, B_FALSE));
+ arc_loan_inuse_buf(abuf, FTAG);
+ }
+
err = dmu_assign_arcbuf_by_dnode(dn,
drrw->drr_offset, abuf, tx);
if (err != 0) {
@@ -2710,6 +2941,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
rwa->resumable = drc->drc_resumable;
rwa->raw = drc->drc_raw;
rwa->spill = drc->drc_spill;
+ rwa->full = (drc->drc_drr_begin->drr_u.drr_begin.drr_fromguid == 0);
rwa->os->os_raw_receive = drc->drc_raw;
list_create(&rwa->write_batch, sizeof (struct receive_record_arg),
offsetof(struct receive_record_arg, node.bqn_node));
diff --git a/module/zfs/zfs_quota.c b/module/zfs/zfs_quota.c
index 6c83f79c9..e61db5c7a 100644
--- a/module/zfs/zfs_quota.c
+++ b/module/zfs/zfs_quota.c
@@ -39,21 +39,17 @@
#include <sys/zfs_znode.h>
int
-zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
- uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
+zpl_get_file_info(dmu_object_type_t bonustype, const void *data,
+ zfs_file_info_t *zoi)
{
- sa_hdr_phys_t sa;
- sa_hdr_phys_t *sap = data;
- uint64_t flags;
- int hdrsize;
- boolean_t swap = B_FALSE;
-
/*
* Is it a valid type of object to track?
*/
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
return (SET_ERROR(ENOENT));
+ zoi->zfi_project = ZFS_DEFAULT_PROJID;
+
/*
* If we have a NULL data pointer
* then assume the id's aren't changing and
@@ -64,52 +60,55 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
return (SET_ERROR(EEXIST));
if (bonustype == DMU_OT_ZNODE) {
- znode_phys_t *znp = data;
- *userp = znp->zp_uid;
- *groupp = znp->zp_gid;
- *projectp = ZFS_DEFAULT_PROJID;
+ const znode_phys_t *znp = data;
+ zoi->zfi_user = znp->zp_uid;
+ zoi->zfi_group = znp->zp_gid;
+ zoi->zfi_generation = znp->zp_gen;
return (0);
}
+ const sa_hdr_phys_t *sap = data;
if (sap->sa_magic == 0) {
/*
* This should only happen for newly created files
* that haven't had the znode data filled in yet.
*/
- *userp = 0;
- *groupp = 0;
- *projectp = ZFS_DEFAULT_PROJID;
+ zoi->zfi_user = 0;
+ zoi->zfi_group = 0;
+ zoi->zfi_generation = 0;
return (0);
}
- sa = *sap;
+ sa_hdr_phys_t sa = *sap;
+ boolean_t swap = B_FALSE;
if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
sa.sa_magic = SA_MAGIC;
sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
swap = B_TRUE;
- } else {
- VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
}
+ VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
- hdrsize = sa_hdrsize(&sa);
+ int hdrsize = sa_hdrsize(&sa);
VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
- *userp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_UID_OFFSET));
- *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_GID_OFFSET));
- flags = *((uint64_t *)((uintptr_t)data + hdrsize + SA_FLAGS_OFFSET));
+ uintptr_t data_after_hdr = (uintptr_t)data + hdrsize;
+ zoi->zfi_user = *((uint64_t *)(data_after_hdr + SA_UID_OFFSET));
+ zoi->zfi_group = *((uint64_t *)(data_after_hdr + SA_GID_OFFSET));
+ zoi->zfi_generation = *((uint64_t *)(data_after_hdr + SA_GEN_OFFSET));
+ uint64_t flags = *((uint64_t *)(data_after_hdr + SA_FLAGS_OFFSET));
if (swap)
flags = BSWAP_64(flags);
- if (flags & ZFS_PROJID)
- *projectp = *((uint64_t *)((uintptr_t)data + hdrsize +
- SA_PROJID_OFFSET));
- else
- *projectp = ZFS_DEFAULT_PROJID;
+ if (flags & ZFS_PROJID) {
+ zoi->zfi_project =
+ *((uint64_t *)(data_after_hdr + SA_PROJID_OFFSET));
+ }
if (swap) {
- *userp = BSWAP_64(*userp);
- *groupp = BSWAP_64(*groupp);
- *projectp = BSWAP_64(*projectp);
+ zoi->zfi_user = BSWAP_64(zoi->zfi_user);
+ zoi->zfi_group = BSWAP_64(zoi->zfi_group);
+ zoi->zfi_project = BSWAP_64(zoi->zfi_project);
+ zoi->zfi_generation = BSWAP_64(zoi->zfi_generation);
}
return (0);
}
@@ -468,7 +467,7 @@ zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
zfs_id_overobjquota(zfsvfs, usedobj, id));
}
-EXPORT_SYMBOL(zfs_space_delta_cb);
+EXPORT_SYMBOL(zpl_get_file_info);
EXPORT_SYMBOL(zfs_userspace_one);
EXPORT_SYMBOL(zfs_userspace_many);
EXPORT_SYMBOL(zfs_set_userquota);
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index cbad90ad1..d8c109eb7 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -781,7 +781,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
'send-c_mixed_compression', 'send-c_stream_size_estimate',
'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
- 'send-c_recv_dedup', 'send_encrypted_hierarchy',
+ 'send-c_recv_dedup', 'send-L_toggle', 'send_encrypted_hierarchy',
'send_encrypted_props', 'send_encrypted_truncated_files',
'send_freeobjects', 'send_realloc_files',
'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
index 7728a6481..ab3a1c6c3 100644
--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
+++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
@@ -41,6 +41,7 @@ dist_pkgdata_SCRIPTS = \
send-c_volume.ksh \
send-c_zstreamdump.ksh \
send-cpL_varied_recsize.ksh \
+ send-L_toggle.ksh \
send_freeobjects.ksh \
send_partial_dataset.ksh \
send_realloc_dnode_size.ksh \
diff --git a/tests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh b/tests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh
new file mode 100755
index 000000000..483efcc60
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/rsend/send-L_toggle.ksh
@@ -0,0 +1,65 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# Description:
+# Verify that send -L can be changed to on in an incremental.
+# Verify that send -L can not be turned off in an incremental.
+#
+
+function cleanup
+{
+ log_must_busy zfs destroy -r $TESTPOOL/fs
+ log_must_busy zfs destroy -r $TESTPOOL/recv
+}
+
+verify_runnable "both"
+
+log_assert "Verify toggling send -L works as expected"
+log_onexit cleanup
+
+log_must zfs create -o compression=on -o recordsize=1m $TESTPOOL/fs
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/fs/file bs=1024 count=1500
+
+log_must zfs snapshot $TESTPOOL/fs@snap
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/fs/file bs=1024 count=1500 conv=notrunc seek=2048
+
+log_must zfs snapshot $TESTPOOL/fs@snap2
+
+log_must zfs create $TESTPOOL/recv
+
+log_must zfs send -c $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/noL-noL
+log_must zfs send -c -i @snap $TESTPOOL/fs@snap2| zfs recv $TESTPOOL/recv/noL-noL
+log_must diff /$TESTPOOL/fs/file /$TESTPOOL/recv/noL-noL/file
+
+log_must zfs send -c -L $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/L-L
+log_must zfs send -c -L -i @snap $TESTPOOL/fs@snap2 | zfs recv $TESTPOOL/recv/L-L
+log_must diff /$TESTPOOL/fs/file /$TESTPOOL/recv/L-L/file
+
+log_must zfs send -c $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/noL-L
+log_must zfs send -c -L -i @snap $TESTPOOL/fs@snap2 | zfs recv $TESTPOOL/recv/noL-L
+log_must diff /$TESTPOOL/fs/file /$TESTPOOL/recv/noL-L/file
+
+log_must zfs send -c -L $TESTPOOL/fs@snap | zfs recv $TESTPOOL/recv/L-noL
+log_mustnot zfs send -c -i @snap $TESTPOOL/fs@snap2 | zfs recv $TESTPOOL/recv/L-noL
+log_must diff /$TESTPOOL/fs/.zfs/snapshot/snap/file /$TESTPOOL/recv/L-noL/file
+
+log_pass "Verify toggling send -L works as expected"