summaryrefslogtreecommitdiffstats
path: root/module/zfs/dmu_send.c
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2018-04-17 11:06:54 -0700
committerBrian Behlendorf <[email protected]>2018-04-17 11:06:54 -0700
commit0c03d21ac99ebdbefe65c319fc3712928c40af78 (patch)
tree5ec1e603a374a089fcd5b1c5b673974a5e687c1b /module/zfs/dmu_send.c
parentb40d45bc6c0e908c4d1086ac8ac3dcd1f7178d2a (diff)
assertion in arc_release() during encrypted receive
In the existing code, when doing a raw (encrypted) zfs receive, we call arc_convert_to_raw() from open context. This creates a race condition between arc_release()/arc_change_state() and writing out the block from syncing context (arc_write_ready/done()). This change makes it so that when we are doing a raw (encrypted) zfs receive, we save the crypt parameters (salt, iv, mac) of dnode blocks in the dbuf_dirty_record_t, and call arc_convert_to_raw() from syncing context when writing out the block of dnodes. Additionally, we can eliminate dr_raw and associated setters, and instead know that dnode blocks are always raw when doing a zfs receive (see the new field os_raw_receive). Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Tom Caputi <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> Closes #7424 Closes #7429
Diffstat (limited to 'module/zfs/dmu_send.c')
-rw-r--r--module/zfs/dmu_send.c114
1 files changed, 46 insertions, 68 deletions
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index a007e96ba..6c535e541 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -2174,6 +2174,7 @@ struct receive_writer_arg {
uint64_t bytes_read; /* bytes read when current record created */
/* Encryption parameters for the last received DRR_OBJECT_RANGE */
+ boolean_t or_crypt_params_present;
uint64_t or_firstobj;
uint64_t or_numslots;
uint8_t or_salt[ZIO_DATA_SALT_LEN];
@@ -2507,23 +2508,16 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
if (rwa->raw && nblkptr != drro->drr_nblkptr)
return (SET_ERROR(EINVAL));
- if (rwa->raw &&
- (drro->drr_blksz != doi.doi_data_block_size ||
+ if (drro->drr_blksz != doi.doi_data_block_size ||
nblkptr < doi.doi_nblkptr ||
- indblksz != doi.doi_metadata_block_size ||
- drro->drr_nlevels < doi.doi_indirection ||
- drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT)) {
- err = dmu_free_long_range_raw(rwa->os,
+ drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
+ (rwa->raw &&
+ (indblksz != doi.doi_metadata_block_size ||
+ drro->drr_nlevels < doi.doi_indirection))) {
+ err = dmu_free_long_range(rwa->os,
drro->drr_object, 0, DMU_OBJECT_END);
if (err != 0)
return (SET_ERROR(EINVAL));
- } else if (drro->drr_blksz != doi.doi_data_block_size ||
- nblkptr < doi.doi_nblkptr ||
- drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
- err = dmu_free_long_range(rwa->os, drro->drr_object,
- 0, DMU_OBJECT_END);
- if (err != 0)
- return (SET_ERROR(EINVAL));
}
/*
@@ -2538,13 +2532,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
*/
if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
- if (rwa->raw) {
- err = dmu_free_long_object_raw(rwa->os,
- drro->drr_object);
- } else {
- err = dmu_free_long_object(rwa->os,
- drro->drr_object);
- }
+ err = dmu_free_long_object(rwa->os, drro->drr_object);
if (err != 0)
return (SET_ERROR(EINVAL));
@@ -2586,10 +2574,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
else if (err != 0)
return (err);
- if (rwa->raw)
- err = dmu_free_long_object_raw(rwa->os, slot);
- else
- err = dmu_free_long_object(rwa->os, slot);
+ err = dmu_free_long_object(rwa->os, slot);
if (err != 0)
return (err);
@@ -2630,26 +2615,38 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
return (SET_ERROR(EINVAL));
}
- if (rwa->raw) {
+ if (rwa->or_crypt_params_present) {
/*
- * Convert the buffer associated with this range of dnodes
- * to a raw buffer. This ensures that it will be written out
- * as a raw buffer when we fill in the dnode object. Since we
- * are committing this tx now, it is possible for the dnode
- * block to end up on-disk with the incorrect MAC. Despite
- * this, the dataset is marked as inconsistent so no other
- * code paths (apart from scrubs) will attempt to read this
- * data. Scrubs will not be effected by this either since
- * scrubs only read raw data and do not attempt to check
- * the MAC.
+ * Set the crypt params for the buffer associated with this
+ * range of dnodes. This causes the blkptr_t to have the
+ * same crypt params (byteorder, salt, iv, mac) as on the
+ * sending side.
+ *
+ * Since we are committing this tx now, it is possible for
+ * the dnode block to end up on-disk with the incorrect MAC,
+ * if subsequent objects in this block are received in a
+ * different txg. However, since the dataset is marked as
+ * inconsistent, no code paths will do a non-raw read (or
+ * decrypt the block / verify the MAC). The receive code and
+ * scrub code can safely do raw reads and verify the
+ * checksum. They don't need to verify the MAC.
*/
- err = dmu_convert_mdn_block_to_raw(rwa->os, rwa->or_firstobj,
- rwa->or_byteorder, rwa->or_salt, rwa->or_iv, rwa->or_mac,
- tx);
+ dmu_buf_t *db = NULL;
+ uint64_t offset = rwa->or_firstobj * DNODE_MIN_SIZE;
+
+ err = dmu_buf_hold_by_dnode(DMU_META_DNODE(rwa->os),
+ offset, FTAG, &db, DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
if (err != 0) {
dmu_tx_commit(tx);
return (SET_ERROR(EINVAL));
}
+
+ dmu_buf_set_crypt_params(db, rwa->or_byteorder,
+ rwa->or_salt, rwa->or_iv, rwa->or_mac, tx);
+
+ dmu_buf_rele(db, FTAG);
+
+ rwa->or_crypt_params_present = B_FALSE;
}
dmu_object_set_checksum(rwa->os, drro->drr_object,
@@ -2726,10 +2723,7 @@ receive_freeobjects(struct receive_writer_arg *rwa,
else if (err != 0)
return (err);
- if (rwa->raw)
- err = dmu_free_long_object_raw(rwa->os, obj);
- else
- err = dmu_free_long_object(rwa->os, obj);
+ err = dmu_free_long_object(rwa->os, obj);
if (err != 0)
return (err);
@@ -2781,9 +2775,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
return (err);
}
- if (rwa->raw)
- VERIFY0(dmu_object_dirty_raw(rwa->os, drrw->drr_object, tx));
-
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
dmu_object_byteswap_t byteswap =
@@ -2870,7 +2861,6 @@ receive_write_byref(struct receive_writer_arg *rwa,
}
if (rwa->raw) {
- VERIFY0(dmu_object_dirty_raw(rwa->os, drrwbr->drr_object, tx));
dmu_copy_from_buf(rwa->os, drrwbr->drr_object,
drrwbr->drr_offset, dbp, tx);
} else {
@@ -2971,13 +2961,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
dmu_tx_abort(tx);
return (err);
}
-
- if (rwa->raw) {
- VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx));
- dmu_buf_will_change_crypt_params(db_spill, tx);
- } else {
- dmu_buf_will_dirty(db_spill, tx);
- }
+ dmu_buf_will_dirty(db_spill, tx);
if (db_spill->db_size < drrs->drr_length)
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
@@ -3016,13 +3000,8 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
if (drrf->drr_object > rwa->max_object)
rwa->max_object = drrf->drr_object;
- if (rwa->raw) {
- err = dmu_free_long_range_raw(rwa->os, drrf->drr_object,
- drrf->drr_offset, drrf->drr_length);
- } else {
- err = dmu_free_long_range(rwa->os, drrf->drr_object,
- drrf->drr_offset, drrf->drr_length);
- }
+ err = dmu_free_long_range(rwa->os, drrf->drr_object,
+ drrf->drr_offset, drrf->drr_length);
return (err);
}
@@ -3062,9 +3041,10 @@ receive_object_range(struct receive_writer_arg *rwa,
/*
* The DRR_OBJECT_RANGE handling must be deferred to receive_object()
- * so that the encryption parameters are set with each object that is
- * written into that block.
+ * so that the block of dnodes is not written out when it's empty,
+ * and converted to a HOLE BP.
*/
+ rwa->or_crypt_params_present = B_TRUE;
rwa->or_firstobj = drror->drr_firstobj;
rwa->or_numslots = drror->drr_numslots;
bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN);
@@ -3090,6 +3070,7 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
* after we stopped receiving the dataset.
*/
txg_wait_synced(ds->ds_dir->dd_pool, 0);
+ ds->ds_objset->os_raw_receive = B_FALSE;
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
@@ -3841,6 +3822,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
rwa->byteswap = drc->drc_byteswap;
rwa->resumable = drc->drc_resumable;
rwa->raw = drc->drc_raw;
+ rwa->os->os_raw_receive = drc->drc_raw;
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
TS_RUN, minclsyspri);
@@ -3903,12 +3885,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
int next_err = 0;
while (next_err == 0) {
- if (drc->drc_raw) {
- free_err = dmu_free_long_object_raw(rwa->os,
- obj);
- } else {
- free_err = dmu_free_long_object(rwa->os, obj);
- }
+ free_err = dmu_free_long_object(rwa->os, obj);
if (free_err != 0 && free_err != ENOENT)
break;
@@ -4037,6 +4014,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
tx, "snap=%s", drc->drc_tosnap);
+ drc->drc_ds->ds_objset->os_raw_receive = B_FALSE;
if (!drc->drc_newfs) {
dsl_dataset_t *origin_head;