diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dbuf.c | 59 | ||||
-rw-r--r-- | module/zfs/dmu.c | 119 | ||||
-rw-r--r-- | module/zfs/dmu_objset.c | 9 | ||||
-rw-r--r-- | module/zfs/dmu_send.c | 114 |
4 files changed, 92 insertions, 209 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index b0ae6cc72..bc03317a2 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1538,7 +1538,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; - dr->dt.dl.dr_raw = B_FALSE; + dr->dt.dl.dr_has_raw_params = B_FALSE; /* * Release the already-written buffer, so we leave it in @@ -2211,15 +2211,26 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) /* * This function is effectively the same as dmu_buf_will_dirty(), but - * indicates the caller expects raw encrypted data in the db. It will - * also set the raw flag on the created dirty record. + * indicates the caller expects raw encrypted data in the db, and provides + * the crypt params (byteorder, salt, iv, mac) which should be stored in the + * blkptr_t when this dbuf is written. This is only used for blocks of + * dnodes, during raw receive. */ void -dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx) +dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dbuf_dirty_record_t *dr; + /* + * dr_has_raw_params is only processed for blocks of dnodes + * (see dbuf_sync_dnode_leaf_crypt()). + */ + ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); + ASSERT3U(db->db_level, ==, 0); + ASSERT(db->db_objset->os_raw_receive); + dmu_buf_will_dirty_impl(db_fake, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx); @@ -2229,8 +2240,12 @@ dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx) ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); - dr->dt.dl.dr_raw = B_TRUE; - db->db_objset->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; + + dr->dt.dl.dr_has_raw_params = B_TRUE; + dr->dt.dl.dr_byteorder = byteorder; + bcopy(salt, dr->dt.dl.dr_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, dr->dt.dl.dr_iv, ZIO_DATA_IV_LEN); + bcopy(mac, dr->dt.dl.dr_mac, ZIO_DATA_MAC_LEN); } #pragma weak dmu_buf_fill_done = dbuf_fill_done @@ -2341,7 +2356,6 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) ASSERT(db->db_buf != NULL); if (dr != NULL && dr->dr_txg == tx->tx_txg) { ASSERT(dr->dt.dl.dr_data == db->db_buf); - IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw); if (!arc_released(db->db_buf)) { ASSERT(dr->dt.dl.dr_override_state == @@ -3452,20 +3466,23 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) } /* - * Ensure the dbuf's data is untransformed if the associated dirty - * record requires it. This is used by dbuf_sync_leaf() to ensure - * that a dnode block is decrypted before we write new data to it. - * For raw writes we assert that the buffer is already encrypted. + * When syncing out a blocks of dnodes, adjust the block to deal with + * encryption. Normally, we make sure the block is decrypted before writing + * it. If we have crypt params, then we are writing a raw (encrypted) block, + * from a raw receive. In this case, set the ARC buf's crypt params so + * that the BP will be filled with the correct byteorder, salt, iv, and mac. */ static void -dbuf_check_crypt(dbuf_dirty_record_t *dr) +dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr) { int err; dmu_buf_impl_t *db = dr->dr_dbuf; ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); + ASSERT3U(db->db_level, ==, 0); - if (!dr->dt.dl.dr_raw && arc_is_encrypted(db->db_buf)) { + if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) { zbookmark_phys_t zb; /* @@ -3481,12 +3498,12 @@ dbuf_check_crypt(dbuf_dirty_record_t *dr) &zb, B_TRUE); if (err) panic("Invalid dnode block MAC"); - } else if (dr->dt.dl.dr_raw) { - /* - * Writing raw encrypted data requires the db's arc buffer - * to be converted to raw by the caller. - */ - ASSERT(arc_is_encrypted(db->db_buf)); + } else if (dr->dt.dl.dr_has_raw_params) { + (void) arc_release(dr->dt.dl.dr_data, db); + arc_convert_to_raw(dr->dt.dl.dr_data, + dmu_objset_id(db->db_objset), + dr->dt.dl.dr_byteorder, DMU_OT_DNODE, + dr->dt.dl.dr_salt, dr->dt.dl.dr_iv, dr->dt.dl.dr_mac); } } @@ -3667,7 +3684,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) * or decrypted, depending on what we are writing to it this txg. */ if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT) - dbuf_check_crypt(dr); + dbuf_prepare_encrypted_dnode_leaf(dr); if (db->db_state != DB_NOFILL && dn->dn_object != DMU_META_DNODE_OBJECT && @@ -4336,7 +4353,7 @@ EXPORT_SYMBOL(dbuf_free_range); EXPORT_SYMBOL(dbuf_new_size); EXPORT_SYMBOL(dbuf_release_bp); EXPORT_SYMBOL(dbuf_dirty); -EXPORT_SYMBOL(dmu_buf_will_change_crypt_params); +EXPORT_SYMBOL(dmu_buf_set_crypt_params); EXPORT_SYMBOL(dmu_buf_will_dirty); EXPORT_SYMBOL(dmu_buf_will_not_fill); EXPORT_SYMBOL(dmu_buf_will_fill); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 0352393dc..2d2586a8c 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -765,7 +765,7 @@ dmu_objset_zfs_unmounting(objset_t *os) static int dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, - uint64_t length, boolean_t raw) + uint64_t length) { uint64_t object_size; int err; @@ -848,19 +848,6 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, uint64_t, dmu_tx_get_txg(tx)); dnode_free_range(dn, chunk_begin, chunk_len, tx); - /* if this is a raw free, mark the dirty record as such */ - if (raw) { - dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty; - - while (dr != NULL && dr->dr_txg > tx->tx_txg) - dr = dr->dr_next; - if (dr != NULL && dr->dr_txg == tx->tx_txg) { - dr->dt.dl.dr_raw = B_TRUE; - dn->dn_objset->os_next_write_raw - [tx->tx_txg & TXG_MASK] = B_TRUE; - } - } - dmu_tx_commit(tx); length -= chunk_len; @@ -878,7 +865,7 @@ dmu_free_long_range(objset_t *os, uint64_t object, err = dnode_hold(os, object, FTAG, &dn); if (err != 0) return (err); - err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE); + err = dmu_free_long_range_impl(os, dn, offset, length); /* * It is important to zero out the maxblkid when freeing the entire @@ -893,37 +880,8 @@ dmu_free_long_range(objset_t *os, uint64_t object, return (err); } -/* - * This function is equivalent to dmu_free_long_range(), but also - * marks the new dirty record as a raw write. - */ int -dmu_free_long_range_raw(objset_t *os, uint64_t object, - uint64_t offset, uint64_t length) -{ - dnode_t *dn; - int err; - - err = dnode_hold(os, object, FTAG, &dn); - if (err != 0) - return (err); - err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE); - - /* - * It is important to zero out the maxblkid when freeing the entire - * file, so that (a) subsequent calls to dmu_free_long_range_impl() - * will take the fast path, and (b) dnode_reallocate() can verify - * that the entire file has been freed. - */ - if (err == 0 && offset == 0 && length == DMU_OBJECT_END) - dn->dn_maxblkid = 0; - - dnode_rele(dn, FTAG); - return (err); -} - -static int -dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw) +dmu_free_long_object(objset_t *os, uint64_t object) { dmu_tx_t *tx; int err; @@ -938,8 +896,6 @@ dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw) dmu_tx_mark_netfree(tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err == 0) { - if (raw) - err = dmu_object_dirty_raw(os, object, tx); if (err == 0) err = dmu_object_free(os, object, tx); @@ -952,19 +908,6 @@ dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw) } int -dmu_free_long_object(objset_t *os, uint64_t object) -{ - return (dmu_free_long_object_impl(os, object, B_FALSE)); -} - -int -dmu_free_long_object_raw(objset_t *os, uint64_t object) -{ - return (dmu_free_long_object_impl(os, object, B_TRUE)); -} - - -int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) { @@ -1666,41 +1609,6 @@ dmu_return_arcbuf(arc_buf_t *buf) arc_buf_destroy(buf, FTAG); } -int -dmu_convert_mdn_block_to_raw(objset_t *os, uint64_t firstobj, - boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, - const uint8_t *mac, dmu_tx_t *tx) -{ - int ret; - dmu_buf_t *handle = NULL; - dmu_buf_impl_t *db = NULL; - uint64_t offset = firstobj * DNODE_MIN_SIZE; - uint64_t dsobj = dmu_objset_id(os); - - ret = dmu_buf_hold_by_dnode(DMU_META_DNODE(os), offset, FTAG, &handle, - DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT); - if (ret != 0) - return (ret); - - dmu_buf_will_change_crypt_params(handle, tx); - - db = (dmu_buf_impl_t *)handle; - ASSERT3P(db->db_buf, !=, NULL); - ASSERT3U(dsobj, !=, 0); - - /* - * This technically violates the assumption the dmu code makes - * that dnode blocks are only released in syncing context. - */ - (void) arc_release(db->db_buf, db); - arc_convert_to_raw(db->db_buf, dsobj, byteorder, DMU_OT_DNODE, - salt, iv, mac); - - dmu_buf_rele(handle, FTAG); - - return (0); -} - void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, dmu_buf_t *handle, dmu_tx_t *tx) @@ -2223,25 +2131,6 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, } /* - * Dirty an object and set the dirty record's raw flag. This is used - * when writing raw data to an object that will not effect the - * encryption parameters, specifically during raw receives. - */ -int -dmu_object_dirty_raw(objset_t *os, uint64_t object, dmu_tx_t *tx) -{ - dnode_t *dn; - int err; - - err = dnode_hold(os, object, FTAG, &dn); - if (err) - return (err); - dmu_buf_will_change_crypt_params((dmu_buf_t *)dn->dn_dbuf, tx); - dnode_rele(dn, FTAG); - return (err); -} - -/* * When the "redundant_metadata" property is set to "most", only indirect * blocks of this level and higher will have an additional ditto block. */ @@ -2623,9 +2512,7 @@ EXPORT_SYMBOL(dmu_buf_rele_array); EXPORT_SYMBOL(dmu_prefetch); EXPORT_SYMBOL(dmu_free_range); EXPORT_SYMBOL(dmu_free_long_range); -EXPORT_SYMBOL(dmu_free_long_range_raw); EXPORT_SYMBOL(dmu_free_long_object); -EXPORT_SYMBOL(dmu_free_long_object_raw); EXPORT_SYMBOL(dmu_read); EXPORT_SYMBOL(dmu_read_by_dnode); EXPORT_SYMBOL(dmu_write); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index a44f485b7..f1813015a 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1663,11 +1663,12 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_write_policy(os, NULL, 0, 0, &zp); /* - * If we are either claiming the ZIL or doing a raw receive write out - * the os_phys_buf raw. Neither of these actions will effect the MAC - * at this point. + * If we are either claiming the ZIL or doing a raw receive, write + * out the os_phys_buf raw. Neither of these actions will effect the + * MAC at this point. */ - if (os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { + if (os->os_raw_receive || + os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { ASSERT(os->os_encrypted); os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; arc_convert_to_raw(os->os_phys_buf, diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index a007e96ba..6c535e541 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -2174,6 +2174,7 @@ struct receive_writer_arg { uint64_t bytes_read; /* bytes read when current record created */ /* Encryption parameters for the last received DRR_OBJECT_RANGE */ + boolean_t or_crypt_params_present; uint64_t or_firstobj; uint64_t or_numslots; uint8_t or_salt[ZIO_DATA_SALT_LEN]; @@ -2507,23 +2508,16 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, if (rwa->raw && nblkptr != drro->drr_nblkptr) return (SET_ERROR(EINVAL)); - if (rwa->raw && - (drro->drr_blksz != doi.doi_data_block_size || + if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || - indblksz != doi.doi_metadata_block_size || - drro->drr_nlevels < doi.doi_indirection || - drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT)) { - err = dmu_free_long_range_raw(rwa->os, + drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT || + (rwa->raw && + (indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection))) { + err = dmu_free_long_range(rwa->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) return (SET_ERROR(EINVAL)); - } else if (drro->drr_blksz != doi.doi_data_block_size || - nblkptr < doi.doi_nblkptr || - drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { - err = dmu_free_long_range(rwa->os, drro->drr_object, - 0, DMU_OBJECT_END); - if (err != 0) - return (SET_ERROR(EINVAL)); } /* @@ -2538,13 +2532,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, */ if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) || drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { - if (rwa->raw) { - err = dmu_free_long_object_raw(rwa->os, - drro->drr_object); - } else { - err = dmu_free_long_object(rwa->os, - drro->drr_object); - } + err = dmu_free_long_object(rwa->os, drro->drr_object); if (err != 0) return (SET_ERROR(EINVAL)); @@ -2586,10 +2574,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, else if (err != 0) return (err); - if (rwa->raw) - err = dmu_free_long_object_raw(rwa->os, slot); - else - err = dmu_free_long_object(rwa->os, slot); + err = dmu_free_long_object(rwa->os, slot); if (err != 0) return (err); @@ -2630,26 +2615,38 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); } - if (rwa->raw) { + if (rwa->or_crypt_params_present) { /* - * Convert the buffer associated with this range of dnodes - * to a raw buffer. This ensures that it will be written out - * as a raw buffer when we fill in the dnode object. Since we - * are committing this tx now, it is possible for the dnode - * block to end up on-disk with the incorrect MAC. Despite - * this, the dataset is marked as inconsistent so no other - * code paths (apart from scrubs) will attempt to read this - * data. Scrubs will not be effected by this either since - * scrubs only read raw data and do not attempt to check - * the MAC. + * Set the crypt params for the buffer associated with this + * range of dnodes. This causes the blkptr_t to have the + * same crypt params (byteorder, salt, iv, mac) as on the + * sending side. + * + * Since we are committing this tx now, it is possible for + * the dnode block to end up on-disk with the incorrect MAC, + * if subsequent objects in this block are received in a + * different txg. However, since the dataset is marked as + * inconsistent, no code paths will do a non-raw read (or + * decrypt the block / verify the MAC). The receive code and + * scrub code can safely do raw reads and verify the + * checksum. They don't need to verify the MAC. */ - err = dmu_convert_mdn_block_to_raw(rwa->os, rwa->or_firstobj, - rwa->or_byteorder, rwa->or_salt, rwa->or_iv, rwa->or_mac, - tx); + dmu_buf_t *db = NULL; + uint64_t offset = rwa->or_firstobj * DNODE_MIN_SIZE; + + err = dmu_buf_hold_by_dnode(DMU_META_DNODE(rwa->os), + offset, FTAG, &db, DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT); if (err != 0) { dmu_tx_commit(tx); return (SET_ERROR(EINVAL)); } + + dmu_buf_set_crypt_params(db, rwa->or_byteorder, + rwa->or_salt, rwa->or_iv, rwa->or_mac, tx); + + dmu_buf_rele(db, FTAG); + + rwa->or_crypt_params_present = B_FALSE; } dmu_object_set_checksum(rwa->os, drro->drr_object, @@ -2726,10 +2723,7 @@ receive_freeobjects(struct receive_writer_arg *rwa, else if (err != 0) return (err); - if (rwa->raw) - err = dmu_free_long_object_raw(rwa->os, obj); - else - err = dmu_free_long_object(rwa->os, obj); + err = dmu_free_long_object(rwa->os, obj); if (err != 0) return (err); @@ -2781,9 +2775,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, return (err); } - if (rwa->raw) - VERIFY0(dmu_object_dirty_raw(rwa->os, drrw->drr_object, tx)); - if (rwa->byteswap && !arc_is_encrypted(abuf) && arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { dmu_object_byteswap_t byteswap = @@ -2870,7 +2861,6 @@ receive_write_byref(struct receive_writer_arg *rwa, } if (rwa->raw) { - VERIFY0(dmu_object_dirty_raw(rwa->os, drrwbr->drr_object, tx)); dmu_copy_from_buf(rwa->os, drrwbr->drr_object, drrwbr->drr_offset, dbp, tx); } else { @@ -2971,13 +2961,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, dmu_tx_abort(tx); return (err); } - - if (rwa->raw) { - VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx)); - dmu_buf_will_change_crypt_params(db_spill, tx); - } else { - dmu_buf_will_dirty(db_spill, tx); - } + dmu_buf_will_dirty(db_spill, tx); if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, @@ -3016,13 +3000,8 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) if (drrf->drr_object > rwa->max_object) rwa->max_object = drrf->drr_object; - if (rwa->raw) { - err = dmu_free_long_range_raw(rwa->os, drrf->drr_object, - drrf->drr_offset, drrf->drr_length); - } else { - err = dmu_free_long_range(rwa->os, drrf->drr_object, - drrf->drr_offset, drrf->drr_length); - } + err = dmu_free_long_range(rwa->os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); return (err); } @@ -3062,9 +3041,10 @@ receive_object_range(struct receive_writer_arg *rwa, /* * The DRR_OBJECT_RANGE handling must be deferred to receive_object() - * so that the encryption parameters are set with each object that is - * written into that block. + * so that the block of dnodes is not written out when it's empty, + * and converted to a HOLE BP. */ + rwa->or_crypt_params_present = B_TRUE; rwa->or_firstobj = drror->drr_firstobj; rwa->or_numslots = drror->drr_numslots; bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN); @@ -3090,6 +3070,7 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) * after we stopped receiving the dataset. */ txg_wait_synced(ds->ds_dir->dd_pool, 0); + ds->ds_objset->os_raw_receive = B_FALSE; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { @@ -3841,6 +3822,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, rwa->byteswap = drc->drc_byteswap; rwa->resumable = drc->drc_resumable; rwa->raw = drc->drc_raw; + rwa->os->os_raw_receive = drc->drc_raw; (void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc, TS_RUN, minclsyspri); @@ -3903,12 +3885,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int next_err = 0; while (next_err == 0) { - if (drc->drc_raw) { - free_err = dmu_free_long_object_raw(rwa->os, - obj); - } else { - free_err = dmu_free_long_object(rwa->os, obj); - } + free_err = dmu_free_long_object(rwa->os, obj); if (free_err != 0 && free_err != ENOENT) break; @@ -4037,6 +4014,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) spa_history_log_internal_ds(drc->drc_ds, "finish receiving", tx, "snap=%s", drc->drc_tosnap); + drc->drc_ds->ds_objset->os_raw_receive = B_FALSE; if (!drc->drc_newfs) { dsl_dataset_t *origin_head; |