From ae76f45cda0e0857f99e53959cf71c7a5d66bd8b Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Wed, 8 Nov 2017 14:12:59 -0500 Subject: Encryption Stability and On-Disk Format Fixes The on-disk format for encrypted datasets protects not only the encrypted and authenticated blocks themselves, but also the order and interpretation of these blocks. In order to make this work while maintaining the ability to do raw sends, the indirect bps maintain a secure checksum of all the MACs in the block below it along with a few other fields that determine how the data is interpreted. Unfortunately, the current on-disk format erroneously includes some fields which are not portable and thus cannot support raw sends. It is not possible to easily work around this issue due to a separate and much smaller bug which causes indirect blocks for encrypted dnodes to not be compressed, which conflicts with the previous bug. In addition, the current code generates incompatible on-disk formats on big endian and little endian systems due to an issue with how block pointers are authenticated. Finally, raw send streams do not currently include dn_maxblkid when sending both the metadnode and normal dnodes which are needed in order to ensure that we are correctly maintaining the portable objset MAC. This patch zero's out the offending fields when computing the bp MAC and ensures that these MACs are always calculated in little endian order (regardless of the host system's byte order). This patch also registers an errata for the old on-disk format, which we detect by adding a "version" field to newly created DSL Crypto Keys. We allow datasets without a version (version 0) to only be mounted for read so that they can easily be migrated. We also now include dn_maxblkid in raw send streams to ensure the MAC can be maintained correctly. This patch also contains minor bug fixes and cleanups. Reviewed-by: Jorgen Lundman Reviewed-by: Brian Behlendorf Reviewed by: Matthew Ahrens Signed-off-by: Tom Caputi Closes #6845 Closes #6864 Closes #7052 --- module/zfs/arc.c | 15 ++- module/zfs/dmu.c | 22 +++- module/zfs/dmu_objset.c | 10 ++ module/zfs/dmu_send.c | 32 ++++- module/zfs/dnode.c | 6 + module/zfs/dnode_sync.c | 12 ++ module/zfs/dsl_crypt.c | 91 +++++++++++-- module/zfs/dsl_dir.c | 7 + module/zfs/zfs_vfsops.c | 12 +- module/zfs/zio.c | 50 ++++--- module/zfs/zio_crypt.c | 343 +++++++++++++++++++++++++++++++----------------- module/zfs/zvol.c | 7 +- 12 files changed, 449 insertions(+), 158 deletions(-) (limited to 'module/zfs') diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 45b0abe7f..2f3fe9771 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1229,6 +1229,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) arc_buf_hdr_t *hdr = vbuf; bzero(hdr, HDR_FULL_SIZE); + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); refcount_create(&hdr->b_l1hdr.b_refcnt); mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -3246,9 +3247,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata); IMPLY(alloc_rdata, HDR_PROTECTED(hdr)); - if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - if (alloc_rdata) { size = HDR_GET_PSIZE(hdr); ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); @@ -6751,6 +6749,17 @@ arc_write_ready(zio_t *zio) ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); ASSERT(HDR_PROTECTED(hdr)); + if (BP_SHOULD_BYTESWAP(bp)) { + if (BP_GET_LEVEL(bp) > 0) { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64; + } else { + hdr->b_l1hdr.b_byteswap = + DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); + } + } else { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + } + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 56740ae37..20ed3ebff 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2029,6 +2029,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, return (err); } +int +dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(dn, maxblkid, tx, B_FALSE); + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + return (0); +} + void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, dmu_tx_t *tx) @@ -2214,8 +2231,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) dedup = B_FALSE; } - if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET) + if (level <= 0 && + (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) { compress = ZIO_COMPRESS_EMPTY; + } } zp->zp_compress = compress; @@ -2488,6 +2507,7 @@ EXPORT_SYMBOL(dmu_object_size_from_db); EXPORT_SYMBOL(dmu_object_dnsize_from_db); EXPORT_SYMBOL(dmu_object_set_nlevels); EXPORT_SYMBOL(dmu_object_set_blocksize); +EXPORT_SYMBOL(dmu_object_set_maxblkid); EXPORT_SYMBOL(dmu_object_set_checksum); EXPORT_SYMBOL(dmu_object_set_compress); EXPORT_SYMBOL(dmu_write_policy); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index e596b70e9..2b069b6ce 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -663,6 +663,9 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { return (SET_ERROR(EROFS)); + } else if (!readonly && decrypt && + dsl_dir_incompatible_encryption_version(ds->ds_dir)) { + return (SET_ERROR(EROFS)); } /* if we are decrypting, we can now check MACs in os->os_phys_buf */ @@ -2635,6 +2638,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg, return (error); } +boolean_t +dmu_objset_incompatible_encryption_version(objset_t *os) +{ + return (dsl_dir_incompatible_encryption_version( + os->os_dsl_dataset->ds_dir)); +} + void dmu_objset_set_user(objset_t *os, void *user_ptr) { diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 09d79742b..63a4f98bf 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -570,6 +570,7 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, drro->drr_flags |= DRR_RAW_BYTESWAP; /* needed for reconstructing dnp on recv side */ + drro->drr_maxblkid = dnp->dn_maxblkid; drro->drr_indblkshift = dnp->dn_indblkshift; drro->drr_nlevels = dnp->dn_nlevels; drro->drr_nblkptr = dnp->dn_nblkptr; @@ -2294,6 +2295,7 @@ byteswap_record(dmu_replay_record_t *drr) DO32(drr_object.drr_bonuslen); DO32(drr_object.drr_raw_bonuslen); DO64(drr_object.drr_toguid); + DO64(drr_object.drr_maxblkid); break; case DRR_FREEOBJECTS: DO64(drr_freeobjects.drr_firstobj); @@ -2478,11 +2480,17 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, if (rwa->raw && nblkptr != drro->drr_nblkptr) return (SET_ERROR(EINVAL)); - if (drro->drr_blksz != doi.doi_data_block_size || + if (rwa->raw && + (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || - (rwa->raw && - (indblksz != doi.doi_metadata_block_size || - drro->drr_nlevels < doi.doi_indirection))) { + indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection)) { + err = dmu_free_long_range_raw(rwa->os, + drro->drr_object, 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + } else if (drro->drr_blksz != doi.doi_data_block_size || + nblkptr < doi.doi_nblkptr) { err = dmu_free_long_range(rwa->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) @@ -2538,6 +2546,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_blksz, drro->drr_indblkshift, tx)); VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object, drro->drr_nlevels, tx)); + VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object, + drro->drr_maxblkid, tx)); } if (data != NULL) { @@ -2839,9 +2849,12 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, dmu_tx_abort(tx); return (err); } - dmu_buf_will_dirty(db_spill, tx); - if (rwa->raw) + if (rwa->raw) { VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx)); + dmu_buf_will_change_crypt_params(db_spill, tx); + } else { + dmu_buf_will_dirty(db_spill, tx); + } if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, @@ -3772,7 +3785,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int next_err = 0; while (next_err == 0) { - free_err = dmu_free_long_object(rwa->os, obj); + if (drc->drc_raw) { + free_err = dmu_free_long_object_raw(rwa->os, + obj); + } else { + free_err = dmu_free_long_object(rwa->os, obj); + } if (free_err != 0 && free_err != ENOENT) break; diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index c1fbf3c3b..544e736d8 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -134,6 +134,7 @@ dnode_cons(void *arg, void *unused, int kmflag) bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk)); bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); + bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_link_init(&dn->dn_dirty_link[i]); @@ -193,6 +194,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_bonuslen[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); } ASSERT0(dn->dn_allocated_txg); @@ -602,6 +604,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); @@ -767,6 +770,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) sizeof (odn->dn_next_bonuslen)); bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0], sizeof (odn->dn_next_blksz)); + bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0], + sizeof (odn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_move_tail(&ndn->dn_dirty_records[i], &odn->dn_dirty_records[i]); @@ -1751,6 +1756,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) goto out; dn->dn_maxblkid = blkid; + dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = blkid; /* * Compute the number of levels necessary to support the new maxblkid. diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 2ec729a6f..09437993a 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -519,6 +519,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; dn->dn_next_indblkshift[txgoff] = 0; dn->dn_next_blksz[txgoff] = 0; + dn->dn_next_maxblkid[txgoff] = 0; /* ASSERT(blkptrs are zero); */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); @@ -718,6 +719,17 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; } + /* + * This must be done after dnode_sync_free_range() + * and dnode_increase_indirection(). + */ + if (dn->dn_next_maxblkid[txgoff]) { + mutex_enter(&dn->dn_mtx); + dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff]; + dn->dn_next_maxblkid[txgoff] = 0; + mutex_exit(&dn->dn_mtx); + } + if (dn->dn_next_nblkptr[txgoff]) { /* this should only happen on a realloc */ ASSERT(dn->dn_allocated_txg == tx->tx_txg); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 59562d194..cb13d2cdc 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -347,7 +347,7 @@ spa_keystore_fini(spa_keystore_t *sk) rw_destroy(&sk->sk_dk_lock); } -int +static int dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) { if (dd->dd_crypto_obj == 0) @@ -357,6 +357,34 @@ dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); } +int +dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version) +{ + *version = 0; + + if (dd->dd_crypto_obj == 0) + return (SET_ERROR(ENOENT)); + + /* version 0 is implied by ENOENT */ + (void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_VERSION, 8, 1, version); + + return (0); +} + +boolean_t +dsl_dir_incompatible_encryption_version(dsl_dir_t *dd) +{ + int ret; + uint64_t version = 0; + + ret = dsl_dir_get_encryption_version(dd, &version); + if (ret != 0) + return (B_FALSE); + + return (version != ZIO_CRYPT_KEY_CURRENT_VERSION); +} + static int spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, void *tag, dsl_wrapping_key_t **wkey_out) @@ -514,7 +542,7 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) { int ret; - uint64_t crypt = 0, guid = 0; + uint64_t crypt = 0, guid = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -556,12 +584,15 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, if (ret != 0) goto error; + /* the initial on-disk format for encryption did not have a version */ + (void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + /* * Unwrap the keys. If there is an error return EACCES to indicate * an authentication failure. */ - ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, guid, raw_keydata, - raw_hmac_keydata, iv, mac, &dck->dck_key); + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, + raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); if (ret != 0) { ret = SET_ERROR(EACCES); goto error; @@ -1883,7 +1914,7 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) dsl_dataset_t *ds = NULL; uint8_t *buf = NULL; uint_t len; - uint64_t intval, guid, nlevels, blksz, ibs, nblkptr; + uint64_t intval, guid, nlevels, blksz, ibs, nblkptr, maxblkid, version; boolean_t is_passphrase = B_FALSE; ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); @@ -1952,6 +1983,17 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) goto error; } + /* + * We don't support receiving old on-disk formats. The version 0 + * implementation protected several fields in an objset that were + * not always portable during a raw receive. As a result, we call + * the old version an on-disk errata #3. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + ret = SET_ERROR(ENOTSUP); + goto error; + } ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) { @@ -2028,6 +2070,12 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) goto error; } + ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + ret = dmu_objset_from_ds(ds, &os); if (ret != 0) goto error; @@ -2078,8 +2126,9 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) uint8_t *keydata, *hmac_keydata, *iv, *mac, *portable_mac; uint_t len; uint64_t rddobj, one = 1; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; uint64_t crypt, guid, keyformat, iters, salt; - uint64_t compress, checksum, nlevels, blksz, ibs; + uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid; char *keylocation = "prompt"; VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); @@ -2108,6 +2157,7 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); + maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid"); /* if we haven't created an objset for the ds yet, do that now */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); @@ -2132,6 +2182,11 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) /* set metadnode compression and checksum */ mdn->dn_compress = compress; mdn->dn_checksum = checksum; + + rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(mdn, maxblkid, tx, B_FALSE); + rw_exit(&mdn->dn_struct_rwlock); + dsl_dataset_dirty(ds, tx); /* if this is a new dataset setup the DSL Crypto Key. */ @@ -2146,6 +2201,9 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, + sizeof (uint64_t), 1, &version, tx)); dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE; @@ -2209,7 +2267,8 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) dsl_dir_t *rdd = NULL; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; - uint64_t crypt = 0, guid = 0, format = 0, iters = 0, salt = 0; + uint64_t crypt = 0, guid = 0, format = 0; + uint64_t iters = 0, salt = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -2254,6 +2313,17 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) if (ret != 0) goto error; + /* + * We don't support raw sends of legacy on-disk formats. See the + * comment in dsl_crypto_recv_key_check() for details. + */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + ret = SET_ERROR(ENOTSUP); + goto error; + } + /* * Lookup wrapping key properties. An early version of the code did * not correctly add these values to the wrapping key or the DSL @@ -2293,6 +2363,7 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, guid); + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, raw_keydata, MASTER_KEY_MAX_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, @@ -2312,6 +2383,7 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); + fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid); *nvl_out = nvl; return (0); @@ -2332,7 +2404,8 @@ dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) { dsl_crypto_key_t dck; - uint64_t one = 1; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; + uint64_t one = 1ULL; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); @@ -2349,6 +2422,8 @@ dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dsl_crypto_key_sync(&dck, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, + DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); zio_crypt_key_destroy(&dck.dck_key); bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 68791fe74..96e8dd62e 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -187,6 +188,12 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, VERIFY0(zap_lookup(dp->dp_meta_objset, ddobj, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj)); + + /* check for on-disk format errata */ + if (dsl_dir_incompatible_encryption_version(dd)) { + dp->dp_spa->spa_errata = + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + } } mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 7286773d9..18b4ec3d6 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1100,6 +1100,15 @@ static int zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) { int error; + boolean_t readonly = zfs_is_readonly(zfsvfs); + + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if (!readonly && + dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) + return (SET_ERROR(EROFS)); error = zfs_register_callbacks(zfsvfs->z_vfs); if (error) @@ -1113,13 +1122,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * operations out since we closed the ZIL. */ if (mounting) { - boolean_t readonly; - /* * During replay we remove the read only flag to * allow replays to succeed. */ - readonly = zfs_is_readonly(zfsvfs); if (readonly != 0) readonly_changed_cb(zfsvfs, B_FALSE); else diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 263c77e4a..37259ad8e 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -402,6 +402,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) int ret; void *tmp; blkptr_t *bp = zio->io_bp; + spa_t *spa = zio->io_spa; + uint64_t dsobj = zio->io_bookmark.zb_objset; uint64_t lsize = BP_GET_LSIZE(bp); dmu_object_type_t ot = BP_GET_TYPE(bp); uint8_t salt[ZIO_DATA_SALT_LEN]; @@ -460,13 +462,12 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) */ if (BP_IS_AUTHENTICATED(bp)) { if (ot == DMU_OT_OBJSET) { - ret = spa_do_crypt_objset_mac_abd(B_FALSE, zio->io_spa, - zio->io_bookmark.zb_objset, zio->io_abd, size, - BP_SHOULD_BYTESWAP(bp)); + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, + dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp)); } else { zio_crypt_decode_mac_bp(bp, mac); - ret = spa_do_crypt_mac_abd(B_FALSE, zio->io_spa, - zio->io_bookmark.zb_objset, zio->io_abd, size, mac); + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, + zio->io_abd, size, mac); } abd_copy(data, zio->io_abd, size); @@ -486,9 +487,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) zio_crypt_decode_mac_bp(bp, mac); } - ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, zio->io_bookmark.zb_objset, - bp, bp->blk_birth, size, data, zio->io_abd, iv, mac, salt, - &no_crypt); + ret = spa_do_crypt_abd(B_FALSE, spa, dsobj, bp, bp->blk_birth, + size, data, zio->io_abd, iv, mac, salt, &no_crypt); if (no_crypt) abd_copy(data, zio->io_abd, size); @@ -509,7 +509,7 @@ error: ret = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, - zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0); + spa, NULL, &zio->io_bookmark, zio, 0, 0); } } else { zio->io_error = ret; @@ -3729,6 +3729,7 @@ zio_encrypt(zio_t *zio) spa_t *spa = zio->io_spa; blkptr_t *bp = zio->io_bp; uint64_t psize = BP_GET_PSIZE(bp); + uint64_t dsobj = zio->io_bookmark.zb_objset; dmu_object_type_t ot = BP_GET_TYPE(bp); void *enc_buf = NULL; abd_t *eabd = NULL; @@ -3752,10 +3753,27 @@ zio_encrypt(zio_t *zio) /* if we are doing raw encryption set the provided encryption params */ if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + ASSERT0(BP_GET_LEVEL(bp)); BP_SET_CRYPT(bp, B_TRUE); BP_SET_BYTEORDER(bp, zp->zp_byteorder); if (ot != DMU_OT_OBJSET) zio_crypt_encode_mac_bp(bp, zp->zp_mac); + + /* dnode blocks must be written out in the provided byteorder */ + if (zp->zp_byteorder != ZFS_HOST_BYTEORDER && + ot == DMU_OT_DNODE) { + void *bswap_buf = zio_buf_alloc(psize); + abd_t *babd = abd_get_from_buf(bswap_buf, psize); + + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + abd_copy_to_buf(bswap_buf, zio->io_abd, psize); + dmu_ot_byteswap[DMU_OT_BYTESWAP(ot)].ob_func(bswap_buf, + psize); + + abd_take_ownership_of_buf(babd, B_TRUE); + zio_push_transform(zio, babd, psize, psize, NULL); + } + if (DMU_OT_IS_ENCRYPTED(ot)) zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); return (ZIO_PIPELINE_CONTINUE); @@ -3779,17 +3797,16 @@ zio_encrypt(zio_t *zio) ASSERT0(DMU_OT_IS_ENCRYPTED(ot)); ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); BP_SET_CRYPT(bp, B_TRUE); - VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, - zio->io_bookmark.zb_objset, zio->io_abd, psize, - BP_SHOULD_BYTESWAP(bp))); + VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, BP_SHOULD_BYTESWAP(bp))); return (ZIO_PIPELINE_CONTINUE); } /* unencrypted object types are only authenticated with a MAC */ if (!DMU_OT_IS_ENCRYPTED(ot)) { BP_SET_CRYPT(bp, B_TRUE); - VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, - zio->io_bookmark.zb_objset, zio->io_abd, psize, mac)); + VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, mac)); zio_crypt_encode_mac_bp(bp, mac); return (ZIO_PIPELINE_CONTINUE); } @@ -3823,8 +3840,8 @@ zio_encrypt(zio_t *zio) } /* Perform the encryption. This should not fail */ - VERIFY0(spa_do_crypt_abd(B_TRUE, spa, zio->io_bookmark.zb_objset, bp, - zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt)); + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, dsobj, bp, zio->io_txg, + psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt)); /* encode encryption metadata into the bp */ if (ot == DMU_OT_INTENT_LOG) { @@ -4154,7 +4171,6 @@ zio_done(zio_t *zio) if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { - ASSERT(!BP_SHOULD_BYTESWAP(zio->io_bp)); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); ASSERT(BP_COUNT_GANG(zio->io_bp) == 0 || diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index 5ffa1e8b0..823e6b8d6 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -187,6 +187,12 @@ (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; +typedef struct blkptr_auth_buf { + uint64_t bab_prop; /* blk_prop - portable mask */ + uint8_t bab_mac[ZIO_DATA_MAC_LEN]; /* MAC from blk_cksum */ + uint64_t bab_pad; /* reserved for future use */ +} blkptr_auth_buf_t; + zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { {"", ZC_TYPE_NONE, 0, "inherit"}, {"", ZC_TYPE_NONE, 0, "on"}, @@ -275,6 +281,7 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; + key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); @@ -472,10 +479,10 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, { int ret; uio_t puio, cuio; + uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint64_t crypt = key->zk_crypt; - uint64_t le_guid = LE_64(key->zk_guid); - uint_t enc_len, keydata_len; + uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -500,6 +507,22 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + /* + * Although we don't support writing to the old format, we do + * support rewrapping the key so that the user can move and + * quarantine datasets on the old format. + */ + if (key->zk_version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(key->zk_guid); + } else { + ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(key->zk_guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(key->zk_version); + } + enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_iovcnt = 2; @@ -510,7 +533,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, /* encrypt the keys and store the resulting ciphertext and mac */ ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, - &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -521,16 +544,16 @@ error: } int -zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, - uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, - zio_crypt_key_t *key) +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key) { int ret; crypto_mechanism_t mech; uio_t puio, cuio; + uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; - uint_t enc_len, keydata_len; - uint64_t le_guid = LE_64(guid); + uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -550,6 +573,17 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + if (version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(guid); + } else { + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(version); + } + enc_len = keydata_len + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_segflg = UIO_SYSSPACE; @@ -560,7 +594,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, /* decrypt the keys and store the result in the output buffers */ ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, - &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -602,6 +636,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; + key->zk_version = version; key->zk_guid = guid; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); @@ -700,19 +735,32 @@ zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, * byte strings, which normally means that these strings would not need to deal * with byteswapping at all. However, both blkptr_t and zil_header_t may be * byteswapped by lower layers and so we must "undo" that byteswap here upon - * decoding. + * decoding and encoding in a non-native byteorder. These functions require + * that the byteorder bit is correct before being called. */ void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) { + uint64_t val64; uint32_t val32; ASSERT(BP_IS_ENCRYPTED(bp)); - bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); - bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); - bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); - BP_SET_IV2(bp, val32); + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); + bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, val32); + } else { + bcopy(salt, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[0] = BSWAP_64(val64); + + bcopy(iv, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[1] = BSWAP_64(val64); + + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, BSWAP_32(val32)); + } } void @@ -751,12 +799,22 @@ zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) { + uint64_t val64; + ASSERT(BP_USES_CRYPT(bp)); ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); - bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); - bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], - sizeof (uint64_t)); + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], + sizeof (uint64_t)); + } else { + bcopy(mac, &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[2] = BSWAP_64(val64); + + bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[3] = BSWAP_64(val64); + } } void @@ -841,55 +899,107 @@ zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) abd_return_buf(src_abd, src, datalen); } +/* + * This function decides what fields from blk_prop are included in + * the on-disk various MAC algorithms. + */ static void -zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp) +zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) { - BP_SET_DEDUP(bp, 0); - BP_SET_CHECKSUM(bp, 0); + /* + * Version 0 did not properly zero out all non-portable fields + * as it should have done. We maintain this code so that we can + * do read-only imports of pools on this version. + */ + if (version == 0) { + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + return; + } + + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + + /* + * The hole_birth feature might set these fields even if this bp + * is a hole. We zero them out here to guarantee that raw sends + * will function with or without the feature. + */ + if (BP_IS_HOLE(bp)) { + bp->blk_prop = 0ULL; + return; + } /* - * psize cannot be set to zero or it will trigger asserts, but the - * value doesn't really matter as long as it is constant. + * At L0 we want to verify these fields to ensure that data blocks + * can not be reinterpretted. For instance, we do not want an attacker + * to trick us into returning raw lz4 compressed data to the user + * by modifying the compression bits. At higher levels, we cannot + * enforce this policy since raw sends do not convey any information + * about indirect blocks, so these values might be different on the + * receive side. Fortunately, this does not open any new attack + * vectors, since any alterations that can be made to a higher level + * bp must still verify the correct order of the layer below it. */ - BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + if (BP_GET_LEVEL(bp) != 0) { + BP_SET_BYTEORDER(bp, 0); + BP_SET_COMPRESS(bp, 0); + + /* + * psize cannot be set to zero or it will trigger + * asserts, but the value doesn't really matter as + * long as it is constant. + */ + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + } + + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); } -static int -zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, - blkptr_t *bp) +static void +zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp, + blkptr_auth_buf_t *bab, uint_t *bab_len) { - int ret; - crypto_data_t cd; - uint64_t le_blkprop; blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - cd.cd_format = CRYPTO_DATA_RAW; - cd.cd_offset = 0; if (should_bswap) byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - le_blkprop = (ZFS_HOST_BYTEORDER) ? - tmpbp.blk_prop : BSWAP_64(tmpbp.blk_prop); + zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac); - cd.cd_length = sizeof (uint64_t); - cd.cd_raw.iov_base = (char *)&le_blkprop; - cd.cd_raw.iov_len = cd.cd_length; + /* + * We always MAC blk_prop in LE to ensure portability. This + * must be done after decoding the mac, since the endianness + * will get zero'd out here. + */ + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version); + bab->bab_prop = LE_64(tmpbp.blk_prop); + bab->bab_pad = 0ULL; + + /* version 0 did not include the padding */ + *bab_len = sizeof (blkptr_auth_buf_t); + if (version == 0) + *bab_len -= sizeof (uint64_t); +} - ret = crypto_mac_update(ctx, &cd, NULL); - if (ret != CRYPTO_SUCCESS) { - ret = SET_ERROR(EIO); - goto error; - } +static int +zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) +{ + int ret; + uint_t bab_len; + blkptr_auth_buf_t bab; + crypto_data_t cd; - zio_crypt_decode_mac_bp(&tmpbp, mac); - cd.cd_length = ZIO_DATA_MAC_LEN; - cd.cd_raw.iov_base = (char *)mac; + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + cd.cd_length = bab_len; + cd.cd_raw.iov_base = (char *)&bab; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); @@ -905,60 +1015,32 @@ error: } static void -zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, boolean_t should_bswap, - blkptr_t *bp) +zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) { - blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + uint_t bab_len; + blkptr_auth_buf_t bab; - ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); - ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - zio_crypt_decode_mac_bp(&tmpbp, mac); - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); - - SHA2Update(ctx, &tmpbp.blk_prop, sizeof (uint64_t)); - SHA2Update(ctx, mac, ZIO_DATA_MAC_LEN); + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + SHA2Update(ctx, &bab, bab_len); } static void -zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, +zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { - uint_t crypt_len; - blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); - - ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); - ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - zio_crypt_decode_mac_bp(&tmpbp, mac); - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + uint_t bab_len; + blkptr_auth_buf_t bab; - crypt_len = sizeof (uint64_t); - bcopy(&tmpbp.blk_prop, *aadp, crypt_len); - *aadp += crypt_len; - *aad_len += crypt_len; - - crypt_len = ZIO_DATA_MAC_LEN; - bcopy(mac, *aadp, crypt_len); - *aadp += crypt_len; - *aad_len += crypt_len; + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + bcopy(&bab, *aadp, bab_len); + *aadp += bab_len; + *aad_len += bab_len; } static int -zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, - dnode_phys_t *dnp) +zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, dnode_phys_t *dnp) { int ret, i; dnode_phys_t *adnp; @@ -992,14 +1074,14 @@ zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, } for (i = 0; i < dnp->dn_nblkptr; i++) { - ret = zio_crypt_bp_do_hmac_updates(ctx, + ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, &dnp->dn_blkptr[i]); if (ret != 0) goto error; } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { - ret = zio_crypt_bp_do_hmac_updates(ctx, + ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, DN_SPILL_BLKPTR(dnp)); if (ret != 0) goto error; @@ -1095,8 +1177,8 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, } /* add in fields from the metadnode */ - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_meta_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_meta_dnode); if (ret) goto error; @@ -1149,13 +1231,13 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, } /* add in fields from the user accounting dnodes */ - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_userused_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_userused_dnode); if (ret) goto error; - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_groupused_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_groupused_dnode); if (ret) goto error; @@ -1194,9 +1276,9 @@ zio_crypt_destroy_uio(uio_t *uio) * checksum, and psize bits. For an explanation of the purpose of this, see * the comment block on object set authentication. */ -int -zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, - uint_t datalen, boolean_t byteswap, uint8_t *cksum) +static int +zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf, + uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum) { blkptr_t *bp; int i, epb = datalen >> SPA_BLKPTRSHIFT; @@ -1206,7 +1288,8 @@ zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, /* checksum all of the MACs from the layer below */ SHA2Init(SHA512, &ctx); for (i = 0, bp = buf; i < epb; i++, bp++) { - zio_crypt_bp_do_indrect_checksum_updates(&ctx, byteswap, bp); + zio_crypt_bp_do_indrect_checksum_updates(&ctx, version, + byteswap, bp); } SHA2Final(digestbuf, &ctx); @@ -1222,10 +1305,34 @@ zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, } int -zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, +zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { + int ret; + /* + * Unfortunately, callers of this function will not always have + * easy access to the on-disk format version. This info is + * normally found in the DSL Crypto Key, but the checksum-of-MACs + * is expected to be verifiable even when the key isn't loaded. + * Here, instead of doing a ZAP lookup for the version for each + * zio, we simply try both existing formats. + */ + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, + datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum); + if (ret == ECKSUM) { + ASSERT(!generate); + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, + buf, datalen, 0, byteswap, cksum); + } + + return (ret); +} + +int +zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ int ret; void *buf; @@ -1439,10 +1546,10 @@ error: * Special case handling routine for encrypting / decrypting dnode blocks. */ static int -zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, - uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, - boolean_t *no_crypt) +zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + uint_t *auth_len, boolean_t *no_crypt) { int ret; uint_t nr_src, nr_dst, crypt_len; @@ -1544,12 +1651,12 @@ zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, for (j = 0; j < dnp->dn_nblkptr; j++) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, - byteswap, &dnp->dn_blkptr[j]); + version, byteswap, &dnp->dn_blkptr[j]); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, - byteswap, DN_SPILL_BLKPTR(dnp)); + version, byteswap, DN_SPILL_BLKPTR(dnp)); } /* @@ -1682,9 +1789,9 @@ error: * data (AAD) for the encryption modes. */ static int -zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac, - uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, +zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; @@ -1700,9 +1807,9 @@ zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, no_crypt); break; case DMU_OT_DNODE: - ret = zio_crypt_init_uios_dnode(encrypt, plainbuf, cipherbuf, - datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, - no_crypt); + ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf, + cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, + auth_len, no_crypt); break; default: ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, @@ -1754,9 +1861,9 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, bzero(&cuio, sizeof (uio_t)); /* create uios for encryption */ - ret = zio_crypt_init_uios(encrypt, ot, plainbuf, cipherbuf, datalen, - byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, - no_crypt); + ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, + cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, + &authbuf, &auth_len, no_crypt); if (ret != 0) return (ret); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 6ea822467..572018d75 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1406,7 +1406,12 @@ zvol_open(struct block_device *bdev, fmode_t flag) goto out_mutex; } - if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) { + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if ((flag & FMODE_WRITE) && ((zv->zv_flags & ZVOL_RDONLY) || + dmu_objset_incompatible_encryption_version(zv->zv_objset))) { error = -EROFS; goto out_open_count; } -- cgit v1.2.3