diff options
author | Tom Caputi <[email protected]> | 2017-11-08 14:12:59 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-02-02 11:37:16 -0800 |
commit | ae76f45cda0e0857f99e53959cf71c7a5d66bd8b (patch) | |
tree | e1c3cabe0971272785a442d6fb627bf6ebd07148 | |
parent | 4c46b99d24a6e71b3c72462c11cb051d0930ad60 (diff) |
Encryption Stability and On-Disk Format Fixes
The on-disk format for encrypted datasets protects not only
the encrypted and authenticated blocks themselves, but also
the order and interpretation of these blocks. In order to
make this work while maintaining the ability to do raw
sends, the indirect bps maintain a secure checksum of all
the MACs in the block below it along with a few other
fields that determine how the data is interpreted.
Unfortunately, the current on-disk format erroneously
includes some fields which are not portable and thus cannot
support raw sends. It is not possible to easily work around
this issue due to a separate and much smaller bug which
causes indirect blocks for encrypted dnodes to not be
compressed, which conflicts with the previous bug. In
addition, the current code generates incompatible on-disk
formats on big endian and little endian systems due to an
issue with how block pointers are authenticated. Finally,
raw send streams do not currently include dn_maxblkid when
sending both the metadnode and normal dnodes which are
needed in order to ensure that we are correctly maintaining
the portable objset MAC.
This patch zero's out the offending fields when computing
the bp MAC and ensures that these MACs are always
calculated in little endian order (regardless of the host
system's byte order). This patch also registers an errata
for the old on-disk format, which we detect by adding a
"version" field to newly created DSL Crypto Keys. We allow
datasets without a version (version 0) to only be mounted
for read so that they can easily be migrated. We also now
include dn_maxblkid in raw send streams to ensure the MAC
can be maintained correctly.
This patch also contains minor bug fixes and cleanups.
Reviewed-by: Jorgen Lundman <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Closes #6845
Closes #6864
Closes #7052
31 files changed, 787 insertions, 187 deletions
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index fa1a5d9fd..22735ae38 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -342,8 +342,8 @@ get_usage(zfs_help_t idx) return (gettext("\tunload-key [-r] " "<-a | filesystem|volume>\n")); case HELP_CHANGE_KEY: - return (gettext("\tchange-key [-l] [-o keyformat=<value>]" - "\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]" + return (gettext("\tchange-key [-l] [-o keyformat=<value>]\n" + "\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]\n" "\t <filesystem|volume>\n" "\tchange-key -i [-l] <filesystem|volume>\n")); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index f8d3a75bd..273453f00 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -2123,6 +2123,15 @@ show_import(nvlist_t *config) "updating.\n")); break; + case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: + (void) printf(gettext(" action: Existing " + "encrypted datasets contain an on-disk " + "incompatibility, which\n\tneeds to be " + "corrected. Backup these datasets to new " + "encrypted datasets\n\tand destroy the " + "old ones.\n")); + break; + default: /* * All errata must contain an action message. @@ -6501,6 +6510,17 @@ status_callback(zpool_handle_t *zhp, void *data) "run 'zpool scrub'.\n")); break; + case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: + (void) printf(gettext("\tExisting encrypted datasets " + "contain an on-disk incompatibility\n\twhich " + "needs to be corrected.\n")); + (void) printf(gettext("action: To correct the issue " + "backup existing encrypted datasets to new\n\t" + "encrypted datasets and destroy the old ones. " + "'zfs mount -o ro' can\n\tbe used to temporarily " + "mount existing encrypted datasets readonly.\n")); + break; + default: /* * All errata which allow the pool to be imported diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index f7bba4c8c..4c33e0a5a 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -443,6 +443,8 @@ main(int argc, char *argv[]) drro->drr_raw_bonuslen = BSWAP_32(drro->drr_raw_bonuslen); drro->drr_toguid = BSWAP_64(drro->drr_toguid); + drro->drr_maxblkid = + BSWAP_64(drro->drr_maxblkid); } payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); @@ -451,7 +453,8 @@ main(int argc, char *argv[]) (void) printf("OBJECT object = %llu type = %u " "bonustype = %u blksz = %u bonuslen = %u " "dn_slots = %u raw_bonuslen = %u " - "flags = %u indblkshift = %u nlevels = %u " + "flags = %u maxblkid = %llu " + "indblkshift = %u nlevels = %u " "nblkptr = %u\n", (u_longlong_t)drro->drr_object, drro->drr_type, @@ -461,6 +464,7 @@ main(int argc, char *argv[]) drro->drr_dn_slots, drro->drr_raw_bonuslen, drro->drr_flags, + (u_longlong_t)drro->drr_maxblkid, drro->drr_indblkshift, drro->drr_nlevels, drro->drr_nblkptr); diff --git a/contrib/dracut/90zfs/zfs-load-key.sh b/contrib/dracut/90zfs/zfs-load-key.sh new file mode 100644 index 000000000..d86763fcc --- /dev/null +++ b/contrib/dracut/90zfs/zfs-load-key.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# This script only gets executed on systemd systems, see mount-zfs.sh for non-systemd systems + +# import the libs now that we know the pool imported +[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh +[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh +. "$dracutlib" + +# load the kernel command line vars +[ -z "$root" ] && root=$(getarg root=) +# If root is not ZFS= or zfs: or rootfstype is not zfs then we are not supposed to handle it. +[ "${root##zfs:}" = "${root}" -a "${root##ZFS=}" = "${root}" -a "$rootfstype" != "zfs" ] && exit 0 + +# There is a race between the zpool import and the pre-mount hooks, so we wait for a pool to be imported +while true; do + zpool list -H | grep -q -v '^$' && break + [[ $(systemctl is-failed zfs-import-cache.service) == 'failed' ]] && exit 1 + [[ $(systemctl is-failed zfs-import-scan.service) == 'failed' ]] && exit 1 + sleep 0.1s +done + +# run this after import as zfs-import-cache/scan service is confirmed good +if [[ "${root}" = "zfs:AUTO" ]] ; then + root=$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}') +else + root="${root##zfs:}" + root="${root##ZFS=}" +fi + +# if pool encryption is active and the zfs command understands '-o encryption' +if [[ $(zpool list -H -o feature@encryption $(echo "${root}" | awk -F\/ '{print $1}')) == 'active' ]]; then + # check if root dataset has encryption enabled + if $(zfs list -H -o encryption "${root}" | grep -q -v off); then + # figure out where the root dataset has its key, the keylocation should not be none + while true; do + if [[ $(zfs list -H -o keylocation "${root}") == 'none' ]]; then + root=$(echo -n "${root}" | awk 'BEGIN{FS=OFS="/"}{NF--; print}') + [[ "${root}" == '' ]] && exit 1 + else + break + fi + done + # decrypt them + TRY_COUNT=5 + while [ $TRY_COUNT != 0 ]; do + zfs load-key "$root" <<< $(systemd-ask-password "Encrypted ZFS password for ${root}: ") + [[ $? == 0 ]] && break + ((TRY_COUNT-=1)) + done + fi +fi diff --git a/include/sys/dmu.h b/include/sys/dmu.h index ffc070726..5553667c3 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -438,6 +438,13 @@ int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx); /* + * Manually set the maxblkid on a dnode. This will adjust nlevels accordingly + * to accommodate the change. + */ +int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx); + +/* * Set the checksum property on a dnode. The new checksum algorithm will * apply to all newly written blocks; existing blocks will not be affected. */ diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 11b8fc625..f3013ad13 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -217,6 +217,7 @@ boolean_t dmu_objset_userobjused_enabled(objset_t *os); boolean_t dmu_objset_userobjspace_upgradable(objset_t *os); void dmu_objset_userobjspace_upgrade(objset_t *os); boolean_t dmu_objset_userobjspace_present(objset_t *os); +boolean_t dmu_objset_incompatible_encryption_version(objset_t *os); int dmu_fsname(const char *snapname, char *buf); diff --git a/include/sys/dnode.h b/include/sys/dnode.h index e5e39b18c..a2bef9d2c 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -221,6 +221,13 @@ typedef struct dnode_phys { uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ + /* + * Both dn_pad2 and dn_pad3 are protected by the block's MAC. This + * allows us to protect any fields that might be added here in the + * future. In either case, developers will want to check + * zio_crypt_init_uios_dnode() to ensure the new field is being + * protected properly. + */ uint64_t dn_pad3[4]; /* @@ -301,6 +308,7 @@ struct dnode { uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */ uint16_t dn_next_bonuslen[TXG_SIZE]; uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */ + uint64_t dn_next_maxblkid[TXG_SIZE]; /* next maxblkid in bytes */ /* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */ uint32_t dn_dbufs_count; /* count of dn_dbufs */ diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index 6fb91f67d..d0c789035 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -39,7 +39,7 @@ #define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" #define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ" #define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT" - +#define DSL_CRYPTO_KEY_VERSION "DSL_CRYPTO_VERSION" /* * In-memory representation of a wrapping key. One of these structs will exist @@ -169,6 +169,7 @@ int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv); int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation); +boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd); void spa_keystore_init(spa_keystore_t *sk); void spa_keystore_fini(spa_keystore_t *sk); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 6b1c3bb56..611279d6b 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -891,6 +891,7 @@ typedef enum zpool_errata { ZPOOL_ERRATA_NONE, ZPOOL_ERRATA_ZOL_2094_SCRUB, ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY, + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION, } zpool_errata_t; /* diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 6924280c4..827f619d9 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -219,10 +219,12 @@ typedef struct dmu_replay_record { uint8_t drr_flags; uint32_t drr_raw_bonuslen; uint64_t drr_toguid; - /* only nonzero for raw streams */ + /* only (possibly) nonzero for raw streams */ uint8_t drr_indblkshift; uint8_t drr_nlevels; uint8_t drr_nblkptr; + uint8_t drr_pad[5]; + uint64_t drr_maxblkid; /* bonus content follows */ } drr_object; struct drr_freeobjects { diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h index 9cf9a17c2..57b4c1e7c 100644 --- a/include/sys/zio_crypt.h +++ b/include/sys/zio_crypt.h @@ -36,6 +36,8 @@ struct zbookmark_phys; #define MASTER_KEY_MAX_LEN 32 #define SHA512_HMAC_KEYLEN 64 +#define ZIO_CRYPT_KEY_CURRENT_VERSION 1ULL + typedef enum zio_crypt_type { ZC_TYPE_NONE = 0, ZC_TYPE_CCM, @@ -64,6 +66,9 @@ typedef struct zio_crypt_key { /* encryption algorithm */ uint64_t zk_crypt; + /* on-disk format version */ + uint64_t zk_version; + /* GUID for uniquely identifying this key. Not encrypted on disk. */ uint64_t zk_guid; @@ -104,9 +109,9 @@ int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); -int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, - uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, - zio_crypt_key_t *key); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key); int zio_crypt_generate_iv(uint8_t *ivbuf); int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *ivbuf, uint8_t *salt); diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c index b1fac2f62..6ccee740f 100644 --- a/lib/libzfs/libzfs_crypto.c +++ b/lib/libzfs/libzfs_crypto.c @@ -1054,7 +1054,7 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation) } try_again: - /* fetching and deriving the key are correctible errors. set the flag */ + /* fetching and deriving the key are correctable errors. set the flag */ correctible = B_TRUE; /* get key material from key format and location */ @@ -1110,22 +1110,25 @@ try_again: error: zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); - if (key_material != NULL) + if (key_material != NULL) { free(key_material); - if (key_data != NULL) + key_material = NULL; + } + if (key_data != NULL) { free(key_data); + key_data = NULL; + } /* * Here we decide if it is ok to allow the user to retry entering their * key. The can_retry flag will be set if the user is entering their - * key from an interactive prompt. The correctible flag will only be - * set if an error that occured could be corrected by retrying. Both + * key from an interactive prompt. The correctable flag will only be + * set if an error that occurred could be corrected by retrying. Both * flags are needed to allow the user to attempt key entry again */ - if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) { - attempts++; + attempts++; + if (can_retry && correctible && attempts < MAX_KEY_PROMPT_ATTEMPTS) goto try_again; - } return (ret); } diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index 320783523..f900ac723 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -352,6 +352,15 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) return (ZPOOL_STATUS_REMOVED_DEV); /* + * Informational errata available. + */ + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); + if (errata) { + *erratap = errata; + return (ZPOOL_STATUS_ERRATA); + } + + /* * Outdated, but usable, version */ if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) @@ -382,15 +391,6 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) } } - /* - * Informational errata available. - */ - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); - if (errata) { - *erratap = errata; - return (ZPOOL_STATUS_ERRATA); - } - return (ZPOOL_STATUS_OK); } diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 45b0abe7f..2f3fe9771 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1229,6 +1229,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) arc_buf_hdr_t *hdr = vbuf; bzero(hdr, HDR_FULL_SIZE); + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); refcount_create(&hdr->b_l1hdr.b_refcnt); mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -3246,9 +3247,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata); IMPLY(alloc_rdata, HDR_PROTECTED(hdr)); - if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - if (alloc_rdata) { size = HDR_GET_PSIZE(hdr); ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); @@ -6751,6 +6749,17 @@ arc_write_ready(zio_t *zio) ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); ASSERT(HDR_PROTECTED(hdr)); + if (BP_SHOULD_BYTESWAP(bp)) { + if (BP_GET_LEVEL(bp) > 0) { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64; + } else { + hdr->b_l1hdr.b_byteswap = + DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); + } + } else { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + } + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 56740ae37..20ed3ebff 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2029,6 +2029,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, return (err); } +int +dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(dn, maxblkid, tx, B_FALSE); + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + return (0); +} + void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, dmu_tx_t *tx) @@ -2214,8 +2231,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) dedup = B_FALSE; } - if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET) + if (level <= 0 && + (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) { compress = ZIO_COMPRESS_EMPTY; + } } zp->zp_compress = compress; @@ -2488,6 +2507,7 @@ EXPORT_SYMBOL(dmu_object_size_from_db); EXPORT_SYMBOL(dmu_object_dnsize_from_db); EXPORT_SYMBOL(dmu_object_set_nlevels); EXPORT_SYMBOL(dmu_object_set_blocksize); +EXPORT_SYMBOL(dmu_object_set_maxblkid); EXPORT_SYMBOL(dmu_object_set_checksum); EXPORT_SYMBOL(dmu_object_set_compress); EXPORT_SYMBOL(dmu_write_policy); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index e596b70e9..2b069b6ce 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -663,6 +663,9 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { return (SET_ERROR(EROFS)); + } else if (!readonly && decrypt && + dsl_dir_incompatible_encryption_version(ds->ds_dir)) { + return (SET_ERROR(EROFS)); } /* if we are decrypting, we can now check MACs in os->os_phys_buf */ @@ -2635,6 +2638,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg, return (error); } +boolean_t +dmu_objset_incompatible_encryption_version(objset_t *os) +{ + return (dsl_dir_incompatible_encryption_version( + os->os_dsl_dataset->ds_dir)); +} + void dmu_objset_set_user(objset_t *os, void *user_ptr) { diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 09d79742b..63a4f98bf 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -570,6 +570,7 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, drro->drr_flags |= DRR_RAW_BYTESWAP; /* needed for reconstructing dnp on recv side */ + drro->drr_maxblkid = dnp->dn_maxblkid; drro->drr_indblkshift = dnp->dn_indblkshift; drro->drr_nlevels = dnp->dn_nlevels; drro->drr_nblkptr = dnp->dn_nblkptr; @@ -2294,6 +2295,7 @@ byteswap_record(dmu_replay_record_t *drr) DO32(drr_object.drr_bonuslen); DO32(drr_object.drr_raw_bonuslen); DO64(drr_object.drr_toguid); + DO64(drr_object.drr_maxblkid); break; case DRR_FREEOBJECTS: DO64(drr_freeobjects.drr_firstobj); @@ -2478,11 +2480,17 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, if (rwa->raw && nblkptr != drro->drr_nblkptr) return (SET_ERROR(EINVAL)); - if (drro->drr_blksz != doi.doi_data_block_size || + if (rwa->raw && + (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || - (rwa->raw && - (indblksz != doi.doi_metadata_block_size || - drro->drr_nlevels < doi.doi_indirection))) { + indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection)) { + err = dmu_free_long_range_raw(rwa->os, + drro->drr_object, 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + } else if (drro->drr_blksz != doi.doi_data_block_size || + nblkptr < doi.doi_nblkptr) { err = dmu_free_long_range(rwa->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) @@ -2538,6 +2546,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_blksz, drro->drr_indblkshift, tx)); VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object, drro->drr_nlevels, tx)); + VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object, + drro->drr_maxblkid, tx)); } if (data != NULL) { @@ -2839,9 +2849,12 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, dmu_tx_abort(tx); return (err); } - dmu_buf_will_dirty(db_spill, tx); - if (rwa->raw) + if (rwa->raw) { VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx)); + dmu_buf_will_change_crypt_params(db_spill, tx); + } else { + dmu_buf_will_dirty(db_spill, tx); + } if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, @@ -3772,7 +3785,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int next_err = 0; while (next_err == 0) { - free_err = dmu_free_long_object(rwa->os, obj); + if (drc->drc_raw) { + free_err = dmu_free_long_object_raw(rwa->os, + obj); + } else { + free_err = dmu_free_long_object(rwa->os, obj); + } if (free_err != 0 && free_err != ENOENT) break; diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index c1fbf3c3b..544e736d8 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -134,6 +134,7 @@ dnode_cons(void *arg, void *unused, int kmflag) bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk)); bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); + bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_link_init(&dn->dn_dirty_link[i]); @@ -193,6 +194,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_bonuslen[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); } ASSERT0(dn->dn_allocated_txg); @@ -602,6 +604,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); @@ -767,6 +770,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) sizeof (odn->dn_next_bonuslen)); bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0], sizeof (odn->dn_next_blksz)); + bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0], + sizeof (odn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_move_tail(&ndn->dn_dirty_records[i], &odn->dn_dirty_records[i]); @@ -1751,6 +1756,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) goto out; dn->dn_maxblkid = blkid; + dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = blkid; /* * Compute the number of levels necessary to support the new maxblkid. diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 2ec729a6f..09437993a 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -519,6 +519,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; dn->dn_next_indblkshift[txgoff] = 0; dn->dn_next_blksz[txgoff] = 0; + dn->dn_next_maxblkid[txgoff] = 0; /* ASSERT(blkptrs are zero); */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); @@ -718,6 +719,17 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; } + /* + * This must be done after dnode_sync_free_range() + * and dnode_increase_indirection(). + */ + if (dn->dn_next_maxblkid[txgoff]) { + mutex_enter(&dn->dn_mtx); + dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff]; + dn->dn_next_maxblkid[txgoff] = 0; + mutex_exit(&dn->dn_mtx); + } + if (dn->dn_next_nblkptr[txgoff]) { /* this should only happen on a realloc */ ASSERT(dn->dn_allocated_txg == tx->tx_txg); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 59562d194..cb13d2cdc 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -347,7 +347,7 @@ spa_keystore_fini(spa_keystore_t *sk) rw_destroy(&sk->sk_dk_lock); } -int +static int dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) { if (dd->dd_crypto_obj == 0) @@ -357,6 +357,34 @@ dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); } +int +dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version) +{ + *version = 0; + + if (dd->dd_crypto_obj == 0) + return (SET_ERROR(ENOENT)); + + /* version 0 is implied by ENOENT */ + (void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_VERSION, 8, 1, version); + + return (0); +} + +boolean_t +dsl_dir_incompatible_encryption_version(dsl_dir_t *dd) +{ + int ret; + uint64_t version = 0; + + ret = dsl_dir_get_encryption_version(dd, &version); + if (ret != 0) + return (B_FALSE); + + return (version != ZIO_CRYPT_KEY_CURRENT_VERSION); +} + static int spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, void *tag, dsl_wrapping_key_t **wkey_out) @@ -514,7 +542,7 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) { int ret; - uint64_t crypt = 0, guid = 0; + uint64_t crypt = 0, guid = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -556,12 +584,15 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, if (ret != 0) goto error; + /* the initial on-disk format for encryption did not have a version */ + (void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + /* * Unwrap the keys. If there is an error return EACCES to indicate * an authentication failure. */ - ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, guid, raw_keydata, - raw_hmac_keydata, iv, mac, &dck->dck_key); + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, + raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); if (ret != 0) { ret = SET_ERROR(EACCES); goto error; @@ -1883,7 +1914,7 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) dsl_dataset_t *ds = NULL; uint8_t *buf = NULL; uint_t len; - uint64_t intval, guid, nlevels, blksz, ibs, nblkptr; + uint64_t intval, guid, nlevels, blksz, ibs, nblkptr, maxblkid, version; boolean_t is_passphrase = B_FALSE; ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); @@ -1952,6 +1983,17 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) goto error; } + /* + * We don't support receiving old on-disk formats. The version 0 + * implementation protected several fields in an objset that were + * not always portable during a raw receive. As a result, we call + * the old version an on-disk errata #3. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + ret = SET_ERROR(ENOTSUP); + goto error; + } ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) { @@ -2028,6 +2070,12 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) goto error; } + ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + ret = dmu_objset_from_ds(ds, &os); if (ret != 0) goto error; @@ -2078,8 +2126,9 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) uint8_t *keydata, *hmac_keydata, *iv, *mac, *portable_mac; uint_t len; uint64_t rddobj, one = 1; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; uint64_t crypt, guid, keyformat, iters, salt; - uint64_t compress, checksum, nlevels, blksz, ibs; + uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid; char *keylocation = "prompt"; VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); @@ -2108,6 +2157,7 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); + maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid"); /* if we haven't created an objset for the ds yet, do that now */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); @@ -2132,6 +2182,11 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) /* set metadnode compression and checksum */ mdn->dn_compress = compress; mdn->dn_checksum = checksum; + + rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(mdn, maxblkid, tx, B_FALSE); + rw_exit(&mdn->dn_struct_rwlock); + dsl_dataset_dirty(ds, tx); /* if this is a new dataset setup the DSL Crypto Key. */ @@ -2146,6 +2201,9 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, + sizeof (uint64_t), 1, &version, tx)); dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE; @@ -2209,7 +2267,8 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) dsl_dir_t *rdd = NULL; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; - uint64_t crypt = 0, guid = 0, format = 0, iters = 0, salt = 0; + uint64_t crypt = 0, guid = 0, format = 0; + uint64_t iters = 0, salt = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -2255,6 +2314,17 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) goto error; /* + * We don't support raw sends of legacy on-disk formats. See the + * comment in dsl_crypto_recv_key_check() for details. + */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + ret = SET_ERROR(ENOTSUP); + goto error; + } + + /* * Lookup wrapping key properties. An early version of the code did * not correctly add these values to the wrapping key or the DSL * Crypto Key on disk for non encryption roots, so to be safe we @@ -2293,6 +2363,7 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, guid); + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, raw_keydata, MASTER_KEY_MAX_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, @@ -2312,6 +2383,7 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); + fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid); *nvl_out = nvl; return (0); @@ -2332,7 +2404,8 @@ dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) { dsl_crypto_key_t dck; - uint64_t one = 1; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; + uint64_t one = 1ULL; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); @@ -2349,6 +2422,8 @@ dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dsl_crypto_key_sync(&dck, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, + DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); zio_crypt_key_destroy(&dck.dck_key); bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 68791fe74..96e8dd62e 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -37,6 +37,7 @@ #include <sys/dsl_deleg.h> #include <sys/dmu_impl.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/metaslab.h> #include <sys/zap.h> #include <sys/zio.h> @@ -187,6 +188,12 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, VERIFY0(zap_lookup(dp->dp_meta_objset, ddobj, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj)); + + /* check for on-disk format errata */ + if (dsl_dir_incompatible_encryption_version(dd)) { + dp->dp_spa->spa_errata = + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + } } mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 7286773d9..18b4ec3d6 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1100,6 +1100,15 @@ static int zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) { int error; + boolean_t readonly = zfs_is_readonly(zfsvfs); + + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if (!readonly && + dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) + return (SET_ERROR(EROFS)); error = zfs_register_callbacks(zfsvfs->z_vfs); if (error) @@ -1113,13 +1122,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * operations out since we closed the ZIL. */ if (mounting) { - boolean_t readonly; - /* * During replay we remove the read only flag to * allow replays to succeed. */ - readonly = zfs_is_readonly(zfsvfs); if (readonly != 0) readonly_changed_cb(zfsvfs, B_FALSE); else diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 263c77e4a..37259ad8e 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -402,6 +402,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) int ret; void *tmp; blkptr_t *bp = zio->io_bp; + spa_t *spa = zio->io_spa; + uint64_t dsobj = zio->io_bookmark.zb_objset; uint64_t lsize = BP_GET_LSIZE(bp); dmu_object_type_t ot = BP_GET_TYPE(bp); uint8_t salt[ZIO_DATA_SALT_LEN]; @@ -460,13 +462,12 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) */ if (BP_IS_AUTHENTICATED(bp)) { if (ot == DMU_OT_OBJSET) { - ret = spa_do_crypt_objset_mac_abd(B_FALSE, zio->io_spa, - zio->io_bookmark.zb_objset, zio->io_abd, size, - BP_SHOULD_BYTESWAP(bp)); + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, + dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp)); } else { zio_crypt_decode_mac_bp(bp, mac); - ret = spa_do_crypt_mac_abd(B_FALSE, zio->io_spa, - zio->io_bookmark.zb_objset, zio->io_abd, size, mac); + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, + zio->io_abd, size, mac); } abd_copy(data, zio->io_abd, size); @@ -486,9 +487,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) zio_crypt_decode_mac_bp(bp, mac); } - ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, zio->io_bookmark.zb_objset, - bp, bp->blk_birth, size, data, zio->io_abd, iv, mac, salt, - &no_crypt); + ret = spa_do_crypt_abd(B_FALSE, spa, dsobj, bp, bp->blk_birth, + size, data, zio->io_abd, iv, mac, salt, &no_crypt); if (no_crypt) abd_copy(data, zio->io_abd, size); @@ -509,7 +509,7 @@ error: ret = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, - zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0); + spa, NULL, &zio->io_bookmark, zio, 0, 0); } } else { zio->io_error = ret; @@ -3729,6 +3729,7 @@ zio_encrypt(zio_t *zio) spa_t *spa = zio->io_spa; blkptr_t *bp = zio->io_bp; uint64_t psize = BP_GET_PSIZE(bp); + uint64_t dsobj = zio->io_bookmark.zb_objset; dmu_object_type_t ot = BP_GET_TYPE(bp); void *enc_buf = NULL; abd_t *eabd = NULL; @@ -3752,10 +3753,27 @@ zio_encrypt(zio_t *zio) /* if we are doing raw encryption set the provided encryption params */ if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + ASSERT0(BP_GET_LEVEL(bp)); BP_SET_CRYPT(bp, B_TRUE); BP_SET_BYTEORDER(bp, zp->zp_byteorder); if (ot != DMU_OT_OBJSET) zio_crypt_encode_mac_bp(bp, zp->zp_mac); + + /* dnode blocks must be written out in the provided byteorder */ + if (zp->zp_byteorder != ZFS_HOST_BYTEORDER && + ot == DMU_OT_DNODE) { + void *bswap_buf = zio_buf_alloc(psize); + abd_t *babd = abd_get_from_buf(bswap_buf, psize); + + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + abd_copy_to_buf(bswap_buf, zio->io_abd, psize); + dmu_ot_byteswap[DMU_OT_BYTESWAP(ot)].ob_func(bswap_buf, + psize); + + abd_take_ownership_of_buf(babd, B_TRUE); + zio_push_transform(zio, babd, psize, psize, NULL); + } + if (DMU_OT_IS_ENCRYPTED(ot)) zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); return (ZIO_PIPELINE_CONTINUE); @@ -3779,17 +3797,16 @@ zio_encrypt(zio_t *zio) ASSERT0(DMU_OT_IS_ENCRYPTED(ot)); ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); BP_SET_CRYPT(bp, B_TRUE); - VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, - zio->io_bookmark.zb_objset, zio->io_abd, psize, - BP_SHOULD_BYTESWAP(bp))); + VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, BP_SHOULD_BYTESWAP(bp))); return (ZIO_PIPELINE_CONTINUE); } /* unencrypted object types are only authenticated with a MAC */ if (!DMU_OT_IS_ENCRYPTED(ot)) { BP_SET_CRYPT(bp, B_TRUE); - VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, - zio->io_bookmark.zb_objset, zio->io_abd, psize, mac)); + VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, mac)); zio_crypt_encode_mac_bp(bp, mac); return (ZIO_PIPELINE_CONTINUE); } @@ -3823,8 +3840,8 @@ zio_encrypt(zio_t *zio) } /* Perform the encryption. This should not fail */ - VERIFY0(spa_do_crypt_abd(B_TRUE, spa, zio->io_bookmark.zb_objset, bp, - zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt)); + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, dsobj, bp, zio->io_txg, + psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt)); /* encode encryption metadata into the bp */ if (ot == DMU_OT_INTENT_LOG) { @@ -4154,7 +4171,6 @@ zio_done(zio_t *zio) if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { - ASSERT(!BP_SHOULD_BYTESWAP(zio->io_bp)); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); ASSERT(BP_COUNT_GANG(zio->io_bp) == 0 || diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index 5ffa1e8b0..823e6b8d6 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -187,6 +187,12 @@ (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; +typedef struct blkptr_auth_buf { + uint64_t bab_prop; /* blk_prop - portable mask */ + uint8_t bab_mac[ZIO_DATA_MAC_LEN]; /* MAC from blk_cksum */
+ uint64_t bab_pad; /* reserved for future use */ +} blkptr_auth_buf_t; + zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { {"", ZC_TYPE_NONE, 0, "inherit"}, {"", ZC_TYPE_NONE, 0, "on"}, @@ -275,6 +281,7 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; + key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); @@ -472,10 +479,10 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, { int ret; uio_t puio, cuio; + uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint64_t crypt = key->zk_crypt; - uint64_t le_guid = LE_64(key->zk_guid); - uint_t enc_len, keydata_len; + uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -500,6 +507,22 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + /* + * Although we don't support writing to the old format, we do + * support rewrapping the key so that the user can move and + * quarantine datasets on the old format. + */ + if (key->zk_version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(key->zk_guid); + } else { + ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(key->zk_guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(key->zk_version); + } + enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_iovcnt = 2; @@ -510,7 +533,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, /* encrypt the keys and store the resulting ciphertext and mac */ ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, - &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -521,16 +544,16 @@ error: } int -zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, - uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, - zio_crypt_key_t *key) +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key) { int ret; crypto_mechanism_t mech; uio_t puio, cuio; + uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; - uint_t enc_len, keydata_len; - uint64_t le_guid = LE_64(guid); + uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -550,6 +573,17 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + if (version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(guid); + } else { + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(version); + } + enc_len = keydata_len + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_segflg = UIO_SYSSPACE; @@ -560,7 +594,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, /* decrypt the keys and store the result in the output buffers */ ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, - &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -602,6 +636,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; + key->zk_version = version; key->zk_guid = guid; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); @@ -700,19 +735,32 @@ zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, * byte strings, which normally means that these strings would not need to deal * with byteswapping at all. However, both blkptr_t and zil_header_t may be * byteswapped by lower layers and so we must "undo" that byteswap here upon - * decoding. + * decoding and encoding in a non-native byteorder. These functions require + * that the byteorder bit is correct before being called. */ void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) { + uint64_t val64; uint32_t val32; ASSERT(BP_IS_ENCRYPTED(bp)); - bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); - bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); - bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); - BP_SET_IV2(bp, val32); + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); + bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, val32); + } else { + bcopy(salt, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[0] = BSWAP_64(val64); + + bcopy(iv, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[1] = BSWAP_64(val64); + + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, BSWAP_32(val32)); + } } void @@ -751,12 +799,22 @@ zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) { + uint64_t val64; + ASSERT(BP_USES_CRYPT(bp)); ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); - bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); - bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], - sizeof (uint64_t)); + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], + sizeof (uint64_t)); + } else { + bcopy(mac, &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[2] = BSWAP_64(val64); + + bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[3] = BSWAP_64(val64); + } } void @@ -841,55 +899,107 @@ zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) abd_return_buf(src_abd, src, datalen); } +/* + * This function decides what fields from blk_prop are included in + * the on-disk various MAC algorithms. + */ static void -zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp) +zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) { - BP_SET_DEDUP(bp, 0); - BP_SET_CHECKSUM(bp, 0); + /* + * Version 0 did not properly zero out all non-portable fields + * as it should have done. We maintain this code so that we can + * do read-only imports of pools on this version. + */ + if (version == 0) { + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + return; + } + + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + + /* + * The hole_birth feature might set these fields even if this bp + * is a hole. We zero them out here to guarantee that raw sends + * will function with or without the feature. + */ + if (BP_IS_HOLE(bp)) { + bp->blk_prop = 0ULL; + return; + } /* - * psize cannot be set to zero or it will trigger asserts, but the - * value doesn't really matter as long as it is constant. + * At L0 we want to verify these fields to ensure that data blocks + * can not be reinterpretted. For instance, we do not want an attacker + * to trick us into returning raw lz4 compressed data to the user + * by modifying the compression bits. At higher levels, we cannot + * enforce this policy since raw sends do not convey any information + * about indirect blocks, so these values might be different on the + * receive side. Fortunately, this does not open any new attack + * vectors, since any alterations that can be made to a higher level + * bp must still verify the correct order of the layer below it. */ - BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + if (BP_GET_LEVEL(bp) != 0) { + BP_SET_BYTEORDER(bp, 0); + BP_SET_COMPRESS(bp, 0); + + /* + * psize cannot be set to zero or it will trigger + * asserts, but the value doesn't really matter as + * long as it is constant. + */ + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + } + + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); } -static int -zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, - blkptr_t *bp) +static void +zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp, + blkptr_auth_buf_t *bab, uint_t *bab_len) { - int ret; - crypto_data_t cd; - uint64_t le_blkprop; blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - cd.cd_format = CRYPTO_DATA_RAW; - cd.cd_offset = 0; if (should_bswap) byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - le_blkprop = (ZFS_HOST_BYTEORDER) ? - tmpbp.blk_prop : BSWAP_64(tmpbp.blk_prop); + zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac); - cd.cd_length = sizeof (uint64_t); - cd.cd_raw.iov_base = (char *)&le_blkprop; - cd.cd_raw.iov_len = cd.cd_length; + /* + * We always MAC blk_prop in LE to ensure portability. This + * must be done after decoding the mac, since the endianness + * will get zero'd out here. + */ + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version); + bab->bab_prop = LE_64(tmpbp.blk_prop); + bab->bab_pad = 0ULL; + + /* version 0 did not include the padding */ + *bab_len = sizeof (blkptr_auth_buf_t); + if (version == 0) + *bab_len -= sizeof (uint64_t); +} - ret = crypto_mac_update(ctx, &cd, NULL); - if (ret != CRYPTO_SUCCESS) { - ret = SET_ERROR(EIO); - goto error; - } +static int +zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) +{ + int ret; + uint_t bab_len; + blkptr_auth_buf_t bab; + crypto_data_t cd; - zio_crypt_decode_mac_bp(&tmpbp, mac); - cd.cd_length = ZIO_DATA_MAC_LEN; - cd.cd_raw.iov_base = (char *)mac; + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + cd.cd_length = bab_len; + cd.cd_raw.iov_base = (char *)&bab; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); @@ -905,60 +1015,32 @@ error: } static void -zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, boolean_t should_bswap, - blkptr_t *bp) +zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) { - blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + uint_t bab_len; + blkptr_auth_buf_t bab; - ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); - ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - zio_crypt_decode_mac_bp(&tmpbp, mac); - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); - - SHA2Update(ctx, &tmpbp.blk_prop, sizeof (uint64_t)); - SHA2Update(ctx, mac, ZIO_DATA_MAC_LEN); + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + SHA2Update(ctx, &bab, bab_len); } static void -zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, +zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { - uint_t crypt_len; - blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); - - ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); - ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - zio_crypt_decode_mac_bp(&tmpbp, mac); - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + uint_t bab_len; + blkptr_auth_buf_t bab; - crypt_len = sizeof (uint64_t); - bcopy(&tmpbp.blk_prop, *aadp, crypt_len); - *aadp += crypt_len; - *aad_len += crypt_len; - - crypt_len = ZIO_DATA_MAC_LEN; - bcopy(mac, *aadp, crypt_len); - *aadp += crypt_len; - *aad_len += crypt_len; + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + bcopy(&bab, *aadp, bab_len); + *aadp += bab_len; + *aad_len += bab_len; } static int -zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, - dnode_phys_t *dnp) +zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, dnode_phys_t *dnp) { int ret, i; dnode_phys_t *adnp; @@ -992,14 +1074,14 @@ zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, } for (i = 0; i < dnp->dn_nblkptr; i++) { - ret = zio_crypt_bp_do_hmac_updates(ctx, + ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, &dnp->dn_blkptr[i]); if (ret != 0) goto error; } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { - ret = zio_crypt_bp_do_hmac_updates(ctx, + ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, DN_SPILL_BLKPTR(dnp)); if (ret != 0) goto error; @@ -1095,8 +1177,8 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, } /* add in fields from the metadnode */ - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_meta_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_meta_dnode); if (ret) goto error; @@ -1149,13 +1231,13 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, } /* add in fields from the user accounting dnodes */ - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_userused_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_userused_dnode); if (ret) goto error; - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_groupused_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_groupused_dnode); if (ret) goto error; @@ -1194,9 +1276,9 @@ zio_crypt_destroy_uio(uio_t *uio) * checksum, and psize bits. For an explanation of the purpose of this, see * the comment block on object set authentication. */ -int -zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, - uint_t datalen, boolean_t byteswap, uint8_t *cksum) +static int +zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf, + uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum) { blkptr_t *bp; int i, epb = datalen >> SPA_BLKPTRSHIFT; @@ -1206,7 +1288,8 @@ zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, /* checksum all of the MACs from the layer below */ SHA2Init(SHA512, &ctx); for (i = 0, bp = buf; i < epb; i++, bp++) { - zio_crypt_bp_do_indrect_checksum_updates(&ctx, byteswap, bp); + zio_crypt_bp_do_indrect_checksum_updates(&ctx, version, + byteswap, bp); } SHA2Final(digestbuf, &ctx); @@ -1222,10 +1305,34 @@ zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, } int -zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, +zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { + int ret; + /* + * Unfortunately, callers of this function will not always have + * easy access to the on-disk format version. This info is + * normally found in the DSL Crypto Key, but the checksum-of-MACs + * is expected to be verifiable even when the key isn't loaded. + * Here, instead of doing a ZAP lookup for the version for each + * zio, we simply try both existing formats. + */ + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, + datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum); + if (ret == ECKSUM) { + ASSERT(!generate); + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, + buf, datalen, 0, byteswap, cksum); + } + + return (ret); +} + +int +zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ int ret; void *buf; @@ -1439,10 +1546,10 @@ error: * Special case handling routine for encrypting / decrypting dnode blocks. */ static int -zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, - uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, - boolean_t *no_crypt) +zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + uint_t *auth_len, boolean_t *no_crypt) { int ret; uint_t nr_src, nr_dst, crypt_len; @@ -1544,12 +1651,12 @@ zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, for (j = 0; j < dnp->dn_nblkptr; j++) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, - byteswap, &dnp->dn_blkptr[j]); + version, byteswap, &dnp->dn_blkptr[j]); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, - byteswap, DN_SPILL_BLKPTR(dnp)); + version, byteswap, DN_SPILL_BLKPTR(dnp)); } /* @@ -1682,9 +1789,9 @@ error: * data (AAD) for the encryption modes. */ static int -zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac, - uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, +zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; @@ -1700,9 +1807,9 @@ zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, no_crypt); break; case DMU_OT_DNODE: - ret = zio_crypt_init_uios_dnode(encrypt, plainbuf, cipherbuf, - datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, - no_crypt); + ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf, + cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, + auth_len, no_crypt); break; default: ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, @@ -1754,9 +1861,9 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, bzero(&cuio, sizeof (uio_t)); /* create uios for encryption */ - ret = zio_crypt_init_uios(encrypt, ot, plainbuf, cipherbuf, datalen, - byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, - no_crypt); + ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, + cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, + &authbuf, &auth_len, no_crypt); if (ret != 0) return (ret); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 6ea822467..572018d75 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1406,7 +1406,12 @@ zvol_open(struct block_device *bdev, fmode_t flag) goto out_mutex; } - if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) { + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if ((flag & FMODE_WRITE) && ((zv->zv_flags & ZVOL_RDONLY) || + dmu_objset_incompatible_encryption_version(zv->zv_objset))) { error = -EROFS; goto out_open_count; } diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index b8b898892..a62dd352c 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -337,7 +337,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'zpool_import_features_003_pos','zpool_import_missing_001_pos', 'zpool_import_missing_002_pos', 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos', - 'zpool_import_encrypted', 'zpool_import_encrypted_load'] + 'zpool_import_encrypted', 'zpool_import_encrypted_load', + 'zpool_import_errata3'] tags = ['functional', 'cli_root', 'zpool_import'] [tests/functional/cli_root/zpool_labelclear] @@ -655,7 +656,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', - 'send-c_recv_dedup', 'send_encrypted_heirarchy', 'send_freeobjects'] + 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy', + 'send_freeobjects'] tags = ['functional', 'rsend'] [tests/functional/scrub_mirror] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am index 94031b9a7..6200af247 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am @@ -27,10 +27,12 @@ dist_pkgdata_SCRIPTS = \ zpool_import_missing_003_pos.ksh \ zpool_import_rename_001_pos.ksh \ zpool_import_encrypted.ksh \ - zpool_import_encrypted_load.ksh + zpool_import_encrypted_load.ksh \ + zpool_import_errata3.ksh BLOCKFILES = \ - unclean_export.dat.bz2 + unclean_export.dat.bz2 \ + cryptv0.dat.bz2 dist_pkgdata_DATA = $(BLOCKFILES) EXTRA_DIST = $(BLOCKFILES) diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 b/tests/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 Binary files differnew file mode 100644 index 000000000..1c625c2c4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh new file mode 100755 index 000000000..67e4caee8 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with Errata #3 while preventing +# the user from performing read write operations +# +# STRATEGY: +# 1. Import a pre-packaged pool with Errata #3 +# 2. Attempt to write to the effected datasets +# 3. Attempt to read from the effected datasets +# 4. Attempt to perform a raw send of the effected datasets +# 5. Perform a regular send of the datasets under a new encryption root +# 6. Verify the new datasets can be read from and written to +# 7. Destroy the old effected datasets +# 8. Reimport the pool and verify that the errata is no longer present +# + +verify_runnable "global" + +POOL_NAME=cryptv0 +POOL_FILE=cryptv0.dat + +function uncompress_pool +{ + log_note "Creating pool from $POOL_FILE" + log_must bzcat \ + $STF_SUITE/tests/functional/cli_root/zpool_import/$POOL_FILE.bz2 \ + > /$TESTPOOL/$POOL_FILE + return 0 +} + +function cleanup +{ + poolexists $POOL_NAME && log_must zpool destroy $POOL_NAME + [[ -e /$TESTPOOL/$POOL_FILE ]] && rm /$TESTPOOL/$POOL_FILE + return 0 +} +log_onexit cleanup + +log_assert "Verify that Errata 3 is properly handled" + +uncompress_pool +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_must eval "zpool status | grep -q Errata" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testfs" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testvol" + +log_mustnot zfs mount $POOL_NAME/testfs +log_must zfs mount -o ro $POOL_NAME/testfs + +old_mntpnt=$(get_prop mountpoint $POOL_NAME/testfs) +log_must eval "ls $old_mntpnt | grep -q testfile" +block_device_wait +log_mustnot dd if=/dev/zero of=/dev/zvol/$POOL_NAME/testvol bs=512 count=1 +log_must dd if=/dev/zvol/$POOL_NAME/testvol of=/dev/null bs=512 count=1 +log_must eval "echo 'password' | zfs create \ + -o encryption=on -o keyformat=passphrase -o keylocation=prompt \ + cryptv0/encroot" +log_mustnot eval "zfs send -w $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/encroot/testfs" +log_mustnot eval "zfs send -w $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/encroot/testvol" + +log_must eval "zfs send $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/encroot/testfs" +log_must eval "zfs send $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/encroot/testvol" +block_device_wait +log_must dd if=/dev/zero of=/dev/zvol/$POOL_NAME/encroot/testvol bs=512 count=1 +new_mntpnt=$(get_prop mountpoint $POOL_NAME/encroot/testfs) +log_must eval "ls $new_mntpnt | grep -q testfile" +log_must zfs destroy -r $POOL_NAME/testfs +log_must zfs destroy -r $POOL_NAME/testvol + +log_must zpool export $POOL_NAME +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_mustnot eval "zpool status | grep -q Errata" +log_pass "Errata 3 is properly handled" diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am index 8833d1d76..6598b1c3d 100644 --- a/tests/zfs-tests/tests/functional/rsend/Makefile.am +++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am @@ -23,6 +23,7 @@ dist_pkgdata_SCRIPTS = \ rsend_021_pos.ksh \ rsend_022_pos.ksh \ rsend_024_pos.ksh \ + send_encrypted_files.ksh \ send_encrypted_heirarchy.ksh \ send-cD.ksh \ send-c_embedded_blocks.ksh \ diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh new file mode 100644 index 000000000..20f3788d5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh @@ -0,0 +1,101 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# DESCRIPTION: +# +# +# STRATEGY: +# 1. Create a new encrypted filesystem +# 2. Add an empty file to the filesystem +# 3. Add a small 512 byte file to the filesystem +# 4. Add a larger 32M file to the filesystem +# 5. Add a large sparse file to the filesystem +# 6. Add a file truncated to 4M to the filesystem +# 7. Add a sparse file with metadata compression disabled to the filesystem +# 8. Add and remove 1000 empty files to the filesystem +# 9. Snapshot the filesystem +# 10. Send and receive the filesystem, ensuring that it can be mounted +# + +verify_runnable "both" + +function set_metadata_compression_disabled # <0|1> +{ + echo $1 > /sys/module/zfs/parameters/zfs_mdcomp_disable +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/recv && \ + log_must zfs destroy -r $TESTPOOL/recv + [[ -f $keyfile ]] && log_must rm $keyfile + [[ -f $sendfile ]] && log_must rm $sendfile +} +log_onexit cleanup + +log_assert "Verify 'zfs send -w' works with many different file layouts" + +typeset keyfile=/$TESTPOOL/pkey +typeset sendfile=/$TESTPOOL/sendfile + +log_must eval "echo 'password' > $keyfile" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$keyfile $TESTPOOL/$TESTFS2 + +log_must touch /$TESTPOOL/$TESTFS2/empty +log_must mkfile 512 /$TESTPOOL/$TESTFS2/small +log_must mkfile 32M /$TESTPOOL/$TESTFS2/full +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/sparse \ + bs=512 count=1 seek=10G >/dev/null 2>&1 +log_must mkfile 32M /$TESTPOOL/$TESTFS2/truncated +log_must truncate -s 4M /$TESTPOOL/$TESTFS2/truncated +sync + +log_must set_metadata_compression_disabled 1 +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/no_mdcomp \ + count=1 bs=512 seek=10G >/dev/null 2>&1 +sync +log_must set_metadata_compression_disabled 0 + +log_must mkdir -p /$TESTPOOL/$TESTFS2/dir +for i in {1..1000}; do + log_must mkfile 512 /$TESTPOOL/$TESTFS2/dir/file-$i +done +sync + +for i in {1..1000}; do + log_must rm /$TESTPOOL/$TESTFS2/dir/file-$i +done +sync + +log_must zfs snapshot $TESTPOOL/$TESTFS2@now +log_must eval "zfs send -wR $TESTPOOL/$TESTFS2@now > $sendfile" + +log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile" +log_must zfs load-key $TESTPOOL/recv + +log_must zfs mount -a + +log_pass "Verified 'zfs send -w' works with many different file layouts" |