summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2020-06-09 10:41:01 -0700
committerGitHub <[email protected]>2020-06-09 10:41:01 -0700
commit7bcb7f0840d1857370dd1f9ee0ad48f9b7939dfd (patch)
tree5582990412f2058fe8b796dbe240205bba027dd0 /module/zfs
parent6722be2823b5ef39d647e440541806c72b3dbf9b (diff)
File incorrectly zeroed when receiving incremental stream that toggles -L
Background: By increasing the recordsize property above the default of 128KB, a filesystem may have "large" blocks. By default, a send stream of such a filesystem does not contain large WRITE records, instead it decreases objects' block sizes to 128KB and splits the large blocks into 128KB blocks, allowing the large-block filesystem to be received by a system that does not support the `large_blocks` feature. A send stream generated by `zfs send -L` (or `--large-block`) preserves the large block size on the receiving system, by using large WRITE records. When receiving an incremental send stream for a filesystem with large blocks, if the send stream's -L flag was toggled, a bug is encountered in which the file's contents are incorrectly zeroed out. The contents of any blocks that were not modified by this send stream will be lost. "Toggled" means that the previous send used `-L`, but this incremental does not use `-L` (-L to no-L); or that the previous send did not use `-L`, but this incremental does use `-L` (no-L to -L). Changes: This commit addresses the problem with several changes to the semantics of zfs send/receive: 1. "-L to no-L" incrementals are rejected. If the previous send used `-L`, but this incremental does not use `-L`, the `zfs receive` will fail with this error message: incremental send stream requires -L (--large-block), to match previous receive. 2. "no-L to -L" incrementals are handled correctly, preserving the smaller (128KB) block size of any already-received files that used large blocks on the sending system but were split by `zfs send` without the `-L` flag. 3. A new send stream format flag is added, `SWITCH_TO_LARGE_BLOCKS`. This feature indicates that we can correctly handle "no-L to -L" incrementals. This flag is currently not set on any send streams. In the future, we intend for incremental send streams of snapshots that have large blocks to use `-L` by default, and these streams will also have the `SWITCH_TO_LARGE_BLOCKS` feature set. This ensures that streams from the default use of `zfs send` won't encounter the bug mentioned above, because they can't be received by software with the bug. Implementation notes: To facilitate accessing the ZPL's generation number, `zfs_space_delta_cb()` has been renamed to `zpl_get_file_info()` and restructured to fill in a struct with ZPL-specific info including owner and generation. In the "no-L to -L" case, if this is a compressed send stream (from `zfs send -cL`), large WRITE records that are being written to small (128KB) blocksize files need to be decompressed so that they can be written split up into multiple blocks. The zio pipeline will recompress each smaller block individually. A new test case, `send-L_toggle`, is added, which tests the "no-L to -L" case and verifies that we get an error for the "-L to no-L" case. Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> Closes #6224 Closes #10383
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dmu_objset.c49
-rw-r--r--module/zfs/dmu_recv.c410
-rw-r--r--module/zfs/zfs_quota.c61
3 files changed, 379 insertions, 141 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 16b93a4fe..d305cee40 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1728,19 +1728,29 @@ dmu_objset_is_dirty(objset_t *os, uint64_t txg)
return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
}
-static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES];
void
-dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+dmu_objset_register_type(dmu_objset_type_t ost, file_info_cb_t *cb)
{
- used_cbs[ost] = cb;
+ file_cbs[ost] = cb;
+}
+
+int
+dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype, const void *data,
+ zfs_file_info_t *zfi)
+{
+ file_info_cb_t *cb = file_cbs[os->os_phys->os_type];
+ if (cb == NULL)
+ return (EINVAL);
+ return (cb(bonustype, data, zfi));
}
boolean_t
dmu_objset_userused_enabled(objset_t *os)
{
return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
- used_cbs[os->os_phys->os_type] != NULL &&
+ file_cbs[os->os_phys->os_type] != NULL &&
DMU_USERUSED_DNODE(os) != NULL);
}
@@ -1754,7 +1764,7 @@ dmu_objset_userobjused_enabled(objset_t *os)
boolean_t
dmu_objset_projectquota_enabled(objset_t *os)
{
- return (used_cbs[os->os_phys->os_type] != NULL &&
+ return (file_cbs[os->os_phys->os_type] != NULL &&
DMU_PROJECTUSED_DNODE(os) != NULL &&
spa_feature_is_enabled(os->os_spa, SPA_FEATURE_PROJECT_QUOTA));
}
@@ -2089,9 +2099,6 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
objset_t *os = dn->dn_objset;
void *data = NULL;
dmu_buf_impl_t *db = NULL;
- uint64_t *user = NULL;
- uint64_t *group = NULL;
- uint64_t *project = NULL;
int flags = dn->dn_id_flags;
int error;
boolean_t have_spill = B_FALSE;
@@ -2145,23 +2152,23 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
return;
}
- if (before) {
- ASSERT(data);
- user = &dn->dn_olduid;
- group = &dn->dn_oldgid;
- project = &dn->dn_oldprojid;
- } else if (data) {
- user = &dn->dn_newuid;
- group = &dn->dn_newgid;
- project = &dn->dn_newprojid;
- }
-
/*
* Must always call the callback in case the object
* type has changed and that type isn't an object type to track
*/
- error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
- user, group, project);
+ zfs_file_info_t zfi;
+ error = file_cbs[os->os_phys->os_type](dn->dn_bonustype, data, &zfi);
+
+ if (before) {
+ ASSERT(data);
+ dn->dn_olduid = zfi.zfi_user;
+ dn->dn_oldgid = zfi.zfi_group;
+ dn->dn_oldprojid = zfi.zfi_project;
+ } else if (data) {
+ dn->dn_newuid = zfi.zfi_user;
+ dn->dn_newgid = zfi.zfi_group;
+ dn->dn_newprojid = zfi.zfi_project;
+ }
/*
* Preserve existing uid/gid when the callback can't determine
diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
index 29fbe854d..17ebeb682 100644
--- a/module/zfs/dmu_recv.c
+++ b/module/zfs/dmu_recv.c
@@ -104,6 +104,7 @@ struct receive_writer_arg {
boolean_t resumable;
boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */
boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */
+ boolean_t full; /* this is a full send stream */
uint64_t last_object;
uint64_t last_offset;
uint64_t max_object; /* highest object ID referenced in stream */
@@ -333,6 +334,21 @@ redact_check(dmu_recv_begin_arg_t *drba, dsl_dataset_t *origin)
return (ret);
}
+/*
+ * If we previously received a stream with --large-block, we don't support
+ * receiving an incremental on top of it without --large-block. This avoids
+ * forcing a read-modify-write or trying to re-aggregate a string of WRITE
+ * records.
+ */
+static int
+recv_check_large_blocks(dsl_dataset_t *ds, uint64_t featureflags)
+{
+ if (dsl_dataset_feature_is_active(ds, SPA_FEATURE_LARGE_BLOCKS) &&
+ !(featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS))
+ return (SET_ERROR(ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH));
+ return (0);
+}
+
static int
recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
uint64_t fromguid, uint64_t featureflags)
@@ -445,6 +461,12 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
return (SET_ERROR(EINVAL));
}
+ error = recv_check_large_blocks(snap, featureflags);
+ if (error != 0) {
+ dsl_dataset_rele(snap, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele(snap, FTAG);
} else {
/* if full, then must be forced */
@@ -479,7 +501,6 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
}
return (0);
-
}
/*
@@ -725,6 +746,13 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
}
}
+ error = recv_check_large_blocks(ds, featureflags);
+ if (error != 0) {
+ dsl_dataset_rele_flags(origin, dsflags, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele_flags(origin, dsflags, FTAG);
}
@@ -1050,6 +1078,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
}
}
}
+
+ error = recv_check_large_blocks(ds, drc->drc_featureflags);
+ if (error != 0) {
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (0);
}
@@ -1289,14 +1324,251 @@ save_resume_state(struct receive_writer_arg *rwa,
rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
}
+static int
+receive_object_is_same_generation(objset_t *os, uint64_t object,
+ dmu_object_type_t old_bonus_type, dmu_object_type_t new_bonus_type,
+ const void *new_bonus, boolean_t *samegenp)
+{
+ zfs_file_info_t zoi;
+ int err;
+
+ dmu_buf_t *old_bonus_dbuf;
+ err = dmu_bonus_hold(os, object, FTAG, &old_bonus_dbuf);
+ if (err != 0)
+ return (err);
+ err = dmu_get_file_info(os, old_bonus_type, old_bonus_dbuf->db_data,
+ &zoi);
+ dmu_buf_rele(old_bonus_dbuf, FTAG);
+ if (err != 0)
+ return (err);
+ uint64_t old_gen = zoi.zfi_generation;
+
+ err = dmu_get_file_info(os, new_bonus_type, new_bonus, &zoi);
+ if (err != 0)
+ return (err);
+ uint64_t new_gen = zoi.zfi_generation;
+
+ *samegenp = (old_gen == new_gen);
+ return (0);
+}
+
+static int
+receive_handle_existing_object(const struct receive_writer_arg *rwa,
+ const struct drr_object *drro, const dmu_object_info_t *doi,
+ const void *bonus_data,
+ uint64_t *object_to_hold, uint32_t *new_blksz)
+{
+ uint32_t indblksz = drro->drr_indblkshift ?
+ 1ULL << drro->drr_indblkshift : 0;
+ int nblkptr = deduce_nblkptr(drro->drr_bonustype,
+ drro->drr_bonuslen);
+ uint8_t dn_slots = drro->drr_dn_slots != 0 ?
+ drro->drr_dn_slots : DNODE_MIN_SLOTS;
+ boolean_t do_free_range = B_FALSE;
+ int err;
+
+ *object_to_hold = drro->drr_object;
+
+ /* nblkptr should be bounded by the bonus size and type */
+ if (rwa->raw && nblkptr != drro->drr_nblkptr)
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * After the previous send stream, the sending system may
+ * have freed this object, and then happened to re-allocate
+ * this object number in a later txg. In this case, we are
+ * receiving a different logical file, and the block size may
+ * appear to be different. i.e. we may have a different
+ * block size for this object than what the send stream says.
+ * In this case we need to remove the object's contents,
+ * so that its structure can be changed and then its contents
+ * entirely replaced by subsequent WRITE records.
+ *
+ * If this is a -L (--large-block) incremental stream, and
+ * the previous stream was not -L, the block size may appear
+ * to increase. i.e. we may have a smaller block size for
+ * this object than what the send stream says. In this case
+ * we need to keep the object's contents and block size
+ * intact, so that we don't lose parts of the object's
+ * contents that are not changed by this incremental send
+ * stream.
+ *
+ * We can distinguish between the two above cases by using
+ * the ZPL's generation number (see
+ * receive_object_is_same_generation()). However, we only
+ * want to rely on the generation number when absolutely
+ * necessary, because with raw receives, the generation is
+ * encrypted. We also want to minimize dependence on the
+ * ZPL, so that other types of datasets can also be received
+ * (e.g. ZVOLs, although note that ZVOLS currently do not
+ * reallocate their objects or change their structure).
+ * Therefore, we check a number of different cases where we
+ * know it is safe to discard the object's contents, before
+ * using the ZPL's generation number to make the above
+ * distinction.
+ */
+ if (drro->drr_blksz != doi->doi_data_block_size) {
+ if (rwa->raw) {
+ /*
+ * RAW streams always have large blocks, so
+ * we are sure that the data is not needed
+ * due to changing --large-block to be on.
+ * Which is fortunate since the bonus buffer
+ * (which contains the ZPL generation) is
+ * encrypted, and the key might not be
+ * loaded.
+ */
+ do_free_range = B_TRUE;
+ } else if (rwa->full) {
+ /*
+ * This is a full send stream, so it always
+ * replaces what we have. Even if the
+ * generation numbers happen to match, this
+ * can not actually be the same logical file.
+ * This is relevant when receiving a full
+ * send as a clone.
+ */
+ do_free_range = B_TRUE;
+ } else if (drro->drr_type !=
+ DMU_OT_PLAIN_FILE_CONTENTS ||
+ doi->doi_type != DMU_OT_PLAIN_FILE_CONTENTS) {
+ /*
+ * PLAIN_FILE_CONTENTS are the only type of
+ * objects that have ever been stored with
+ * large blocks, so we don't need the special
+ * logic below. ZAP blocks can shrink (when
+ * there's only one block), so we don't want
+ * to hit the error below about block size
+ * only increasing.
+ */
+ do_free_range = B_TRUE;
+ } else if (doi->doi_max_offset <=
+ doi->doi_data_block_size) {
+ /*
+ * There is only one block. We can free it,
+ * because its contents will be replaced by a
+ * WRITE record. This can not be the no-L ->
+ * -L case, because the no-L case would have
+ * resulted in multiple blocks. If we
+ * supported -L -> no-L, it would not be safe
+ * to free the file's contents. Fortunately,
+ * that is not allowed (see
+ * recv_check_large_blocks()).
+ */
+ do_free_range = B_TRUE;
+ } else {
+ boolean_t is_same_gen;
+ err = receive_object_is_same_generation(rwa->os,
+ drro->drr_object, doi->doi_bonus_type,
+ drro->drr_bonustype, bonus_data, &is_same_gen);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (is_same_gen) {
+ /*
+ * This is the same logical file, and
+ * the block size must be increasing.
+ * It could only decrease if
+ * --large-block was changed to be
+ * off, which is checked in
+ * recv_check_large_blocks().
+ */
+ if (drro->drr_blksz <=
+ doi->doi_data_block_size)
+ return (SET_ERROR(EINVAL));
+ /*
+ * We keep the existing blocksize and
+ * contents.
+ */
+ *new_blksz =
+ doi->doi_data_block_size;
+ } else {
+ do_free_range = B_TRUE;
+ }
+ }
+ }
+
+ /* nblkptr can only decrease if the object was reallocated */
+ if (nblkptr < doi->doi_nblkptr)
+ do_free_range = B_TRUE;
+
+ /* number of slots can only change on reallocation */
+ if (dn_slots != doi->doi_dnodesize >> DNODE_SHIFT)
+ do_free_range = B_TRUE;
+
+ /*
+ * For raw sends we also check a few other fields to
+ * ensure we are preserving the objset structure exactly
+ * as it was on the receive side:
+ * - A changed indirect block size
+ * - A smaller nlevels
+ */
+ if (rwa->raw) {
+ if (indblksz != doi->doi_metadata_block_size)
+ do_free_range = B_TRUE;
+ if (drro->drr_nlevels < doi->doi_indirection)
+ do_free_range = B_TRUE;
+ }
+
+ if (do_free_range) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ 0, DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * The dmu does not currently support decreasing nlevels
+ * or changing the number of dnode slots on an object. For
+ * non-raw sends, this does not matter and the new object
+ * can just use the previous one's nlevels. For raw sends,
+ * however, the structure of the received dnode (including
+ * nlevels and dnode slots) must match that of the send
+ * side. Therefore, instead of using dmu_object_reclaim(),
+ * we must free the object completely and call
+ * dmu_object_claim_dnsize() instead.
+ */
+ if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) ||
+ dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
+ err = dmu_free_long_object(rwa->os, drro->drr_object);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+
+ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+ *object_to_hold = DMU_NEW_OBJECT;
+ }
+
+ /*
+ * For raw receives, free everything beyond the new incoming
+ * maxblkid. Normally this would be done with a DRR_FREE
+ * record that would come after this DRR_OBJECT record is
+ * processed. However, for raw receives we manually set the
+ * maxblkid from the drr_maxblkid and so we must first free
+ * everything above that blkid to ensure the DMU is always
+ * consistent with itself. We will never free the first block
+ * of the object here because a maxblkid of 0 could indicate
+ * an object with a single block or one with no blocks. This
+ * free may be skipped when dmu_free_long_range() was called
+ * above since it covers the entire object's contents.
+ */
+ if (rwa->raw && *object_to_hold != DMU_NEW_OBJECT && !do_free_range) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ (drro->drr_maxblkid + 1) * doi->doi_data_block_size,
+ DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
+ return (0);
+}
+
noinline static int
receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
void *data)
{
dmu_object_info_t doi;
dmu_tx_t *tx;
- uint64_t object;
int err;
+ uint32_t new_blksz = drro->drr_blksz;
uint8_t dn_slots = drro->drr_dn_slots != 0 ?
drro->drr_dn_slots : DNODE_MIN_SLOTS;
@@ -1360,86 +1632,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
* Raw receives will also check that the indirect structure of the
* dnode hasn't changed.
*/
+ uint64_t object_to_hold;
if (err == 0) {
- uint32_t indblksz = drro->drr_indblkshift ?
- 1ULL << drro->drr_indblkshift : 0;
- int nblkptr = deduce_nblkptr(drro->drr_bonustype,
- drro->drr_bonuslen);
- boolean_t did_free = B_FALSE;
-
- object = drro->drr_object;
-
- /* nblkptr should be bounded by the bonus size and type */
- if (rwa->raw && nblkptr != drro->drr_nblkptr)
- return (SET_ERROR(EINVAL));
-
- /*
- * Check for indicators that the object was freed and
- * reallocated. For all sends, these indicators are:
- * - A changed block size
- * - A smaller nblkptr
- * - A changed dnode size
- * For raw sends we also check a few other fields to
- * ensure we are preserving the objset structure exactly
- * as it was on the receive side:
- * - A changed indirect block size
- * - A smaller nlevels
- */
- if (drro->drr_blksz != doi.doi_data_block_size ||
- nblkptr < doi.doi_nblkptr ||
- dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
- (rwa->raw &&
- (indblksz != doi.doi_metadata_block_size ||
- drro->drr_nlevels < doi.doi_indirection))) {
- err = dmu_free_long_range(rwa->os, drro->drr_object,
- 0, DMU_OBJECT_END);
- if (err != 0)
- return (SET_ERROR(EINVAL));
- else
- did_free = B_TRUE;
- }
-
- /*
- * The dmu does not currently support decreasing nlevels
- * or changing the number of dnode slots on an object. For
- * non-raw sends, this does not matter and the new object
- * can just use the previous one's nlevels. For raw sends,
- * however, the structure of the received dnode (including
- * nlevels and dnode slots) must match that of the send
- * side. Therefore, instead of using dmu_object_reclaim(),
- * we must free the object completely and call
- * dmu_object_claim_dnsize() instead.
- */
- if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
- dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
- err = dmu_free_long_object(rwa->os, drro->drr_object);
- if (err != 0)
- return (SET_ERROR(EINVAL));
-
- txg_wait_synced(dmu_objset_pool(rwa->os), 0);
- object = DMU_NEW_OBJECT;
- }
-
- /*
- * For raw receives, free everything beyond the new incoming
- * maxblkid. Normally this would be done with a DRR_FREE
- * record that would come after this DRR_OBJECT record is
- * processed. However, for raw receives we manually set the
- * maxblkid from the drr_maxblkid and so we must first free
- * everything above that blkid to ensure the DMU is always
- * consistent with itself. We will never free the first block
- * of the object here because a maxblkid of 0 could indicate
- * an object with a single block or one with no blocks. This
- * free may be skipped when dmu_free_long_range() was called
- * above since it covers the entire object's contents.
- */
- if (rwa->raw && object != DMU_NEW_OBJECT && !did_free) {
- err = dmu_free_long_range(rwa->os, drro->drr_object,
- (drro->drr_maxblkid + 1) * doi.doi_data_block_size,
- DMU_OBJECT_END);
- if (err != 0)
- return (SET_ERROR(EINVAL));
- }
+ err = receive_handle_existing_object(rwa, drro, &doi, data,
+ &object_to_hold, &new_blksz);
} else if (err == EEXIST) {
/*
* The object requested is currently an interior slot of a
@@ -1454,10 +1650,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
return (SET_ERROR(EINVAL));
/* object was freed and we are about to allocate a new one */
- object = DMU_NEW_OBJECT;
+ object_to_hold = DMU_NEW_OBJECT;
} else {
/* object is free and we are about to allocate a new one */
- object = DMU_NEW_OBJECT;
+ object_to_hold = DMU_NEW_OBJECT;
}
/*
@@ -1492,27 +1688,27 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
tx = dmu_tx_create(rwa->os);
- dmu_tx_hold_bonus(tx, object);
- dmu_tx_hold_write(tx, object, 0, 0);
+ dmu_tx_hold_bonus(tx, object_to_hold);
+ dmu_tx_hold_write(tx, object_to_hold, 0, 0);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
- if (object == DMU_NEW_OBJECT) {
+ if (object_to_hold == DMU_NEW_OBJECT) {
/* Currently free, wants to be allocated */
err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
- drro->drr_type, drro->drr_blksz,
+ drro->drr_type, new_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
dn_slots << DNODE_SHIFT, tx);
} else if (drro->drr_type != doi.doi_type ||
- drro->drr_blksz != doi.doi_data_block_size ||
+ new_blksz != doi.doi_data_block_size ||
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
/* Currently allocated, but with different properties */
err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
- drro->drr_type, drro->drr_blksz,
+ drro->drr_type, new_blksz,
drro->drr_bonustype, drro->drr_bonuslen,
dn_slots << DNODE_SHIFT, rwa->spill ?
DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
@@ -1578,6 +1774,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
* For non-new objects block size and indirect block
* shift cannot change and nlevels can only increase.
*/
+ ASSERT3U(new_blksz, ==, drro->drr_blksz);
VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
drro->drr_blksz, drro->drr_indblkshift, tx));
VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
@@ -1707,6 +1904,40 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
DRR_WRITE_PAYLOAD_SIZE(drrw));
}
+ /*
+ * If we are receiving an incremental large-block stream into
+ * a dataset that previously did a non-large-block receive,
+ * the WRITE record may be larger than the object's block
+ * size. dmu_assign_arcbuf_by_dnode() handles this as long
+ * as the arcbuf is not compressed, so decompress it here if
+ * necessary.
+ */
+ if (drrw->drr_logical_size != dn->dn_datablksz &&
+ arc_get_compression(abuf) != ZIO_COMPRESS_OFF) {
+ ASSERT3U(drrw->drr_logical_size, >, dn->dn_datablksz);
+ zbookmark_phys_t zb = {
+ .zb_objset = dmu_objset_id(rwa->os),
+ .zb_object = rwa->last_object,
+ .zb_level = 0,
+ .zb_blkid =
+ drrw->drr_offset >> dn->dn_datablkshift,
+ };
+
+ /*
+ * The size of loaned arc bufs is counted in
+ * arc_loaned_bytes. When we untransform
+ * (decompress) the buf, its size increases. To
+ * ensure that arc_loaned_bytes remains accurate, we
+ * need to return (un-loan) the buf (with its
+ * compressed size) and then re-loan it (with its
+ * new, uncompressed size).
+ */
+ arc_return_buf(abuf, FTAG);
+ VERIFY0(arc_untransform(abuf, dmu_objset_spa(rwa->os),
+ &zb, B_FALSE));
+ arc_loan_inuse_buf(abuf, FTAG);
+ }
+
err = dmu_assign_arcbuf_by_dnode(dn,
drrw->drr_offset, abuf, tx);
if (err != 0) {
@@ -2710,6 +2941,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
rwa->resumable = drc->drc_resumable;
rwa->raw = drc->drc_raw;
rwa->spill = drc->drc_spill;
+ rwa->full = (drc->drc_drr_begin->drr_u.drr_begin.drr_fromguid == 0);
rwa->os->os_raw_receive = drc->drc_raw;
list_create(&rwa->write_batch, sizeof (struct receive_record_arg),
offsetof(struct receive_record_arg, node.bqn_node));
diff --git a/module/zfs/zfs_quota.c b/module/zfs/zfs_quota.c
index 6c83f79c9..e61db5c7a 100644
--- a/module/zfs/zfs_quota.c
+++ b/module/zfs/zfs_quota.c
@@ -39,21 +39,17 @@
#include <sys/zfs_znode.h>
int
-zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
- uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
+zpl_get_file_info(dmu_object_type_t bonustype, const void *data,
+ zfs_file_info_t *zoi)
{
- sa_hdr_phys_t sa;
- sa_hdr_phys_t *sap = data;
- uint64_t flags;
- int hdrsize;
- boolean_t swap = B_FALSE;
-
/*
* Is it a valid type of object to track?
*/
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
return (SET_ERROR(ENOENT));
+ zoi->zfi_project = ZFS_DEFAULT_PROJID;
+
/*
* If we have a NULL data pointer
* then assume the id's aren't changing and
@@ -64,52 +60,55 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
return (SET_ERROR(EEXIST));
if (bonustype == DMU_OT_ZNODE) {
- znode_phys_t *znp = data;
- *userp = znp->zp_uid;
- *groupp = znp->zp_gid;
- *projectp = ZFS_DEFAULT_PROJID;
+ const znode_phys_t *znp = data;
+ zoi->zfi_user = znp->zp_uid;
+ zoi->zfi_group = znp->zp_gid;
+ zoi->zfi_generation = znp->zp_gen;
return (0);
}
+ const sa_hdr_phys_t *sap = data;
if (sap->sa_magic == 0) {
/*
* This should only happen for newly created files
* that haven't had the znode data filled in yet.
*/
- *userp = 0;
- *groupp = 0;
- *projectp = ZFS_DEFAULT_PROJID;
+ zoi->zfi_user = 0;
+ zoi->zfi_group = 0;
+ zoi->zfi_generation = 0;
return (0);
}
- sa = *sap;
+ sa_hdr_phys_t sa = *sap;
+ boolean_t swap = B_FALSE;
if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
sa.sa_magic = SA_MAGIC;
sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
swap = B_TRUE;
- } else {
- VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
}
+ VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
- hdrsize = sa_hdrsize(&sa);
+ int hdrsize = sa_hdrsize(&sa);
VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
- *userp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_UID_OFFSET));
- *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_GID_OFFSET));
- flags = *((uint64_t *)((uintptr_t)data + hdrsize + SA_FLAGS_OFFSET));
+ uintptr_t data_after_hdr = (uintptr_t)data + hdrsize;
+ zoi->zfi_user = *((uint64_t *)(data_after_hdr + SA_UID_OFFSET));
+ zoi->zfi_group = *((uint64_t *)(data_after_hdr + SA_GID_OFFSET));
+ zoi->zfi_generation = *((uint64_t *)(data_after_hdr + SA_GEN_OFFSET));
+ uint64_t flags = *((uint64_t *)(data_after_hdr + SA_FLAGS_OFFSET));
if (swap)
flags = BSWAP_64(flags);
- if (flags & ZFS_PROJID)
- *projectp = *((uint64_t *)((uintptr_t)data + hdrsize +
- SA_PROJID_OFFSET));
- else
- *projectp = ZFS_DEFAULT_PROJID;
+ if (flags & ZFS_PROJID) {
+ zoi->zfi_project =
+ *((uint64_t *)(data_after_hdr + SA_PROJID_OFFSET));
+ }
if (swap) {
- *userp = BSWAP_64(*userp);
- *groupp = BSWAP_64(*groupp);
- *projectp = BSWAP_64(*projectp);
+ zoi->zfi_user = BSWAP_64(zoi->zfi_user);
+ zoi->zfi_group = BSWAP_64(zoi->zfi_group);
+ zoi->zfi_project = BSWAP_64(zoi->zfi_project);
+ zoi->zfi_generation = BSWAP_64(zoi->zfi_generation);
}
return (0);
}
@@ -468,7 +467,7 @@ zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
zfs_id_overobjquota(zfsvfs, usedobj, id));
}
-EXPORT_SYMBOL(zfs_space_delta_cb);
+EXPORT_SYMBOL(zpl_get_file_info);
EXPORT_SYMBOL(zfs_userspace_one);
EXPORT_SYMBOL(zfs_userspace_many);
EXPORT_SYMBOL(zfs_set_userquota);