diff options
author | Matthew Ahrens <[email protected]> | 2018-04-17 11:06:54 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-04-17 11:06:54 -0700 |
commit | 0c03d21ac99ebdbefe65c319fc3712928c40af78 (patch) | |
tree | 5ec1e603a374a089fcd5b1c5b673974a5e687c1b /module/zfs/dmu_send.c | |
parent | b40d45bc6c0e908c4d1086ac8ac3dcd1f7178d2a (diff) |
assertion in arc_release() during encrypted receive
In the existing code, when doing a raw (encrypted) zfs receive,
we call arc_convert_to_raw() from open context. This creates a
race condition between arc_release()/arc_change_state() and
writing out the block from syncing context (arc_write_ready/done()).
This change makes it so that when we are doing a raw (encrypted)
zfs receive, we save the crypt parameters (salt, iv, mac) of dnode
blocks in the dbuf_dirty_record_t, and call arc_convert_to_raw()
from syncing context when writing out the block of dnodes.
Additionally, we can eliminate dr_raw and associated setters, and
instead know that dnode blocks are always raw when doing a zfs
receive (see the new field os_raw_receive).
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Tom Caputi <[email protected]>
Signed-off-by: Matthew Ahrens <[email protected]>
Closes #7424
Closes #7429
Diffstat (limited to 'module/zfs/dmu_send.c')
-rw-r--r-- | module/zfs/dmu_send.c | 114 |
1 files changed, 46 insertions, 68 deletions
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index a007e96ba..6c535e541 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -2174,6 +2174,7 @@ struct receive_writer_arg { uint64_t bytes_read; /* bytes read when current record created */ /* Encryption parameters for the last received DRR_OBJECT_RANGE */ + boolean_t or_crypt_params_present; uint64_t or_firstobj; uint64_t or_numslots; uint8_t or_salt[ZIO_DATA_SALT_LEN]; @@ -2507,23 +2508,16 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, if (rwa->raw && nblkptr != drro->drr_nblkptr) return (SET_ERROR(EINVAL)); - if (rwa->raw && - (drro->drr_blksz != doi.doi_data_block_size || + if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || - indblksz != doi.doi_metadata_block_size || - drro->drr_nlevels < doi.doi_indirection || - drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT)) { - err = dmu_free_long_range_raw(rwa->os, + drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT || + (rwa->raw && + (indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection))) { + err = dmu_free_long_range(rwa->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) return (SET_ERROR(EINVAL)); - } else if (drro->drr_blksz != doi.doi_data_block_size || - nblkptr < doi.doi_nblkptr || - drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { - err = dmu_free_long_range(rwa->os, drro->drr_object, - 0, DMU_OBJECT_END); - if (err != 0) - return (SET_ERROR(EINVAL)); } /* @@ -2538,13 +2532,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, */ if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) || drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { - if (rwa->raw) { - err = dmu_free_long_object_raw(rwa->os, - drro->drr_object); - } else { - err = dmu_free_long_object(rwa->os, - drro->drr_object); - } + err = dmu_free_long_object(rwa->os, drro->drr_object); if (err != 0) return (SET_ERROR(EINVAL)); @@ -2586,10 +2574,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, else if (err != 0) return (err); - if (rwa->raw) - err = dmu_free_long_object_raw(rwa->os, slot); - else - err = dmu_free_long_object(rwa->os, slot); + err = dmu_free_long_object(rwa->os, slot); if (err != 0) return (err); @@ -2630,26 +2615,38 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); } - if (rwa->raw) { + if (rwa->or_crypt_params_present) { /* - * Convert the buffer associated with this range of dnodes - * to a raw buffer. This ensures that it will be written out - * as a raw buffer when we fill in the dnode object. Since we - * are committing this tx now, it is possible for the dnode - * block to end up on-disk with the incorrect MAC. Despite - * this, the dataset is marked as inconsistent so no other - * code paths (apart from scrubs) will attempt to read this - * data. Scrubs will not be effected by this either since - * scrubs only read raw data and do not attempt to check - * the MAC. + * Set the crypt params for the buffer associated with this + * range of dnodes. This causes the blkptr_t to have the + * same crypt params (byteorder, salt, iv, mac) as on the + * sending side. + * + * Since we are committing this tx now, it is possible for + * the dnode block to end up on-disk with the incorrect MAC, + * if subsequent objects in this block are received in a + * different txg. However, since the dataset is marked as + * inconsistent, no code paths will do a non-raw read (or + * decrypt the block / verify the MAC). The receive code and + * scrub code can safely do raw reads and verify the + * checksum. They don't need to verify the MAC. */ - err = dmu_convert_mdn_block_to_raw(rwa->os, rwa->or_firstobj, - rwa->or_byteorder, rwa->or_salt, rwa->or_iv, rwa->or_mac, - tx); + dmu_buf_t *db = NULL; + uint64_t offset = rwa->or_firstobj * DNODE_MIN_SIZE; + + err = dmu_buf_hold_by_dnode(DMU_META_DNODE(rwa->os), + offset, FTAG, &db, DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT); if (err != 0) { dmu_tx_commit(tx); return (SET_ERROR(EINVAL)); } + + dmu_buf_set_crypt_params(db, rwa->or_byteorder, + rwa->or_salt, rwa->or_iv, rwa->or_mac, tx); + + dmu_buf_rele(db, FTAG); + + rwa->or_crypt_params_present = B_FALSE; } dmu_object_set_checksum(rwa->os, drro->drr_object, @@ -2726,10 +2723,7 @@ receive_freeobjects(struct receive_writer_arg *rwa, else if (err != 0) return (err); - if (rwa->raw) - err = dmu_free_long_object_raw(rwa->os, obj); - else - err = dmu_free_long_object(rwa->os, obj); + err = dmu_free_long_object(rwa->os, obj); if (err != 0) return (err); @@ -2781,9 +2775,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, return (err); } - if (rwa->raw) - VERIFY0(dmu_object_dirty_raw(rwa->os, drrw->drr_object, tx)); - if (rwa->byteswap && !arc_is_encrypted(abuf) && arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { dmu_object_byteswap_t byteswap = @@ -2870,7 +2861,6 @@ receive_write_byref(struct receive_writer_arg *rwa, } if (rwa->raw) { - VERIFY0(dmu_object_dirty_raw(rwa->os, drrwbr->drr_object, tx)); dmu_copy_from_buf(rwa->os, drrwbr->drr_object, drrwbr->drr_offset, dbp, tx); } else { @@ -2971,13 +2961,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, dmu_tx_abort(tx); return (err); } - - if (rwa->raw) { - VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx)); - dmu_buf_will_change_crypt_params(db_spill, tx); - } else { - dmu_buf_will_dirty(db_spill, tx); - } + dmu_buf_will_dirty(db_spill, tx); if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, @@ -3016,13 +3000,8 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) if (drrf->drr_object > rwa->max_object) rwa->max_object = drrf->drr_object; - if (rwa->raw) { - err = dmu_free_long_range_raw(rwa->os, drrf->drr_object, - drrf->drr_offset, drrf->drr_length); - } else { - err = dmu_free_long_range(rwa->os, drrf->drr_object, - drrf->drr_offset, drrf->drr_length); - } + err = dmu_free_long_range(rwa->os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); return (err); } @@ -3062,9 +3041,10 @@ receive_object_range(struct receive_writer_arg *rwa, /* * The DRR_OBJECT_RANGE handling must be deferred to receive_object() - * so that the encryption parameters are set with each object that is - * written into that block. + * so that the block of dnodes is not written out when it's empty, + * and converted to a HOLE BP. */ + rwa->or_crypt_params_present = B_TRUE; rwa->or_firstobj = drror->drr_firstobj; rwa->or_numslots = drror->drr_numslots; bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN); @@ -3090,6 +3070,7 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) * after we stopped receiving the dataset. */ txg_wait_synced(ds->ds_dir->dd_pool, 0); + ds->ds_objset->os_raw_receive = B_FALSE; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { @@ -3841,6 +3822,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, rwa->byteswap = drc->drc_byteswap; rwa->resumable = drc->drc_resumable; rwa->raw = drc->drc_raw; + rwa->os->os_raw_receive = drc->drc_raw; (void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc, TS_RUN, minclsyspri); @@ -3903,12 +3885,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int next_err = 0; while (next_err == 0) { - if (drc->drc_raw) { - free_err = dmu_free_long_object_raw(rwa->os, - obj); - } else { - free_err = dmu_free_long_object(rwa->os, obj); - } + free_err = dmu_free_long_object(rwa->os, obj); if (free_err != 0 && free_err != ENOENT) break; @@ -4037,6 +4014,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) spa_history_log_internal_ds(drc->drc_ds, "finish receiving", tx, "snap=%s", drc->drc_tosnap); + drc->drc_ds->ds_objset->os_raw_receive = B_FALSE; if (!drc->drc_newfs) { dsl_dataset_t *origin_head; |